Merge branch 'master' into upstream
diff --git a/CREDITS b/CREDITS
index 0fe904e..cc3453a 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2384,6 +2384,13 @@
 E: tmolina@cablespeed.com
 D: bug fixes, documentation, minor hackery
 
+N: Paul Moore
+E: paul.moore@hp.com
+D: NetLabel author
+S: Hewlett-Packard
+S: 110 Spit Brook Road
+S: Nashua, NH 03062
+
 N: James Morris
 E: jmorris@namei.org
 W: http://namei.org/
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 5f7f7d7..02457ec 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -184,6 +184,8 @@
 	- how to use PPro Memory Type Range Registers to increase performance.
 nbd.txt
 	- info on a TCP implementation of a network block device.
+netlabel/
+	- directory with information on the NetLabel subsystem.
 networking/
 	- directory with info on various aspects of networking with Linux.
 nfsroot.txt
diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt
index 74dffc6..5a03a28 100644
--- a/Documentation/crypto/api-intro.txt
+++ b/Documentation/crypto/api-intro.txt
@@ -19,15 +19,14 @@
 API.
 
 'Transforms' are user-instantiated objects, which maintain state, handle all
-of the implementation logic (e.g. manipulating page vectors), provide an 
-abstraction to the underlying algorithms, and handle common logical 
-operations (e.g. cipher modes, HMAC for digests).  However, at the user 
+of the implementation logic (e.g. manipulating page vectors) and provide an 
+abstraction to the underlying algorithms.  However, at the user 
 level they are very simple.
 
 Conceptually, the API layering looks like this:
 
   [transform api]  (user interface)
-  [transform ops]  (per-type logic glue e.g. cipher.c, digest.c)
+  [transform ops]  (per-type logic glue e.g. cipher.c, compress.c)
   [algorithm api]  (for registering algorithms)
   
 The idea is to make the user interface and algorithm registration API
@@ -44,22 +43,27 @@
 Here's an example of how to use the API:
 
 	#include <linux/crypto.h>
+	#include <linux/err.h>
+	#include <linux/scatterlist.h>
 	
 	struct scatterlist sg[2];
 	char result[128];
-	struct crypto_tfm *tfm;
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
 	
-	tfm = crypto_alloc_tfm("md5", 0);
-	if (tfm == NULL)
+	tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		fail();
 		
 	/* ... set up the scatterlists ... */
+
+	desc.tfm = tfm;
+	desc.flags = 0;
 	
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &sg, 2);
-	crypto_digest_final(tfm, result);
+	if (crypto_hash_digest(&desc, &sg, 2, result))
+		fail();
 	
-	crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 
     
 Many real examples are available in the regression test module (tcrypt.c).
@@ -126,7 +130,7 @@
 BUGS
 
 Send bug reports to:
-James Morris <jmorris@redhat.com>
+Herbert Xu <herbert@gondor.apana.org.au>
 Cc: David S. Miller <davem@redhat.com>
 
 
@@ -134,13 +138,14 @@
 
 For further patches and various updates, including the current TODO
 list, see:
-http://samba.org/~jamesm/crypto/
+http://gondor.apana.org.au/~herbert/crypto/
 
 
 AUTHORS
 
 James Morris
 David S. Miller
+Herbert Xu
 
 
 CREDITS
@@ -238,8 +243,11 @@
 Tiger algorithm contributors:
   Aaron Grothe
 
+VIA PadLock contributors:
+  Michal Ludvig
+
 Generic scatterwalk code by Adam J. Richter <adam@yggdrasil.com>
 
 Please send any credits updates or corrections to:
-James Morris <jmorris@redhat.com>
+Herbert Xu <herbert@gondor.apana.org.au>
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 87a1733..71d05f4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1189,8 +1189,6 @@
 				Mechanism 2.
 		nommconf	[IA-32,X86_64] Disable use of MMCONFIG for PCI
 				Configuration
-		mmconf		[IA-32,X86_64] Force MMCONFIG. This is useful
-				to override the builtin blacklist.
 		nomsi		[MSI] If the PCI_MSI kernel config parameter is
 				enabled, this kernel boot option can be used to
 				disable the use of MSI interrupts system-wide.
diff --git a/Documentation/netlabel/00-INDEX b/Documentation/netlabel/00-INDEX
new file mode 100644
index 0000000..837bf35
--- /dev/null
+++ b/Documentation/netlabel/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+	- this file.
+cipso_ipv4.txt
+	- documentation on the IPv4 CIPSO protocol engine.
+draft-ietf-cipso-ipsecurity-01.txt
+	- IETF draft of the CIPSO protocol, dated 16 July 1992.
+introduction.txt
+	- NetLabel introduction, READ THIS FIRST.
+lsm_interface.txt
+	- documentation on the NetLabel kernel security module API.
diff --git a/Documentation/netlabel/cipso_ipv4.txt b/Documentation/netlabel/cipso_ipv4.txt
new file mode 100644
index 0000000..93dacb1
--- /dev/null
+++ b/Documentation/netlabel/cipso_ipv4.txt
@@ -0,0 +1,48 @@
+NetLabel CIPSO/IPv4 Protocol Engine
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial IP
+Security Option (CIPSO) draft from July 16, 1992.  A copy of this draft can be
+found in this directory, consult '00-INDEX' for the filename.  While the IETF
+draft never made it to an RFC standard it has become a de-facto standard for
+labeled networking and is used in many trusted operating systems.
+
+ * Outbound Packet Processing
+
+The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by
+adding the CIPSO label to the socket.  This causes all packets leaving the
+system through the socket to have the CIPSO IP option applied.  The socket's
+CIPSO label can be changed at any point in time, however, it is recommended
+that it is set upon the socket's creation.  The LSM can set the socket's CIPSO
+label by using the NetLabel security module API; if the NetLabel "domain" is
+configured to use CIPSO for packet labeling then a CIPSO IP option will be
+generated and attached to the socket.
+
+ * Inbound Packet Processing
+
+The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the
+IP layer without any special handling required by the LSM.  However, in order
+to decode and translate the CIPSO label on the packet the LSM must use the
+NetLabel security module API to extract the security attributes of the packet.
+This is typically done at the socket layer using the 'socket_sock_rcv_skb()'
+LSM hook.
+
+ * Label Translation
+
+The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security
+attributes such as sensitivity level and category to values which are
+appropriate for the host.  These mappings are defined as part of a CIPSO
+Domain Of Interpretation (DOI) definition and are configured through the
+NetLabel user space communication layer.  Each DOI definition can have a
+different security attribute mapping table.
+
+ * Label Translation Cache
+
+The NetLabel system provides a framework for caching security attribute
+mappings from the network labels to the corresponding LSM identifiers.  The
+CIPSO/IPv4 protocol engine supports this caching mechanism.
diff --git a/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
new file mode 100644
index 0000000..256c2c9
--- /dev/null
+++ b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
@@ -0,0 +1,791 @@
+IETF CIPSO Working Group
+16 July, 1992
+
+
+
+                 COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
+
+
+
+1.    Status
+
+This Internet Draft provides the high level specification for a Commercial
+IP Security Option (CIPSO).  This draft reflects the version as approved by
+the CIPSO IETF Working Group.  Distribution of this memo is unlimited.
+
+This document is an Internet Draft.  Internet Drafts are working documents
+of the Internet Engineering Task Force (IETF), its Areas, and its Working
+Groups. Note that other groups may also distribute working documents as
+Internet Drafts.
+
+Internet Drafts are draft documents valid for a maximum of six months.
+Internet Drafts may be updated, replaced, or obsoleted by other documents
+at any time.  It is not appropriate to use Internet Drafts as reference
+material or to cite them other than as a "working draft" or "work in
+progress."
+
+Please check the I-D abstract listing contained in each Internet Draft
+directory to learn the current status of this or any other Internet Draft.
+
+
+
+
+2.    Background
+
+Currently the Internet Protocol includes two security options.  One of
+these options is the DoD Basic Security Option (BSO) (Type 130) which allows
+IP datagrams to be labeled with security classifications.  This option
+provides sixteen security classifications and a variable number of handling
+restrictions.  To handle additional security information, such as security
+categories or compartments, another security option (Type 133) exists and
+is referred to as the DoD Extended Security Option (ESO).  The values for
+the fixed fields within these two options are administered by the Defense
+Information Systems Agency (DISA).
+
+Computer vendors are now building commercial operating systems with
+mandatory access controls and multi-level security.  These systems are
+no longer built specifically for a particular group in the defense or
+intelligence communities.  They are generally available commercial systems
+for use in a variety of government and civil sector environments.
+
+The small number of ESO format codes can not support all the possible
+applications of a commercial security option.  The BSO and ESO were
+designed to only support the United States DoD.  CIPSO has been designed
+to support multiple security policies.  This Internet Draft provides the
+format and procedures required to support a Mandatory Access Control
+security policy.  Support for additional security policies shall be
+defined in future RFCs.
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 1]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+3.    CIPSO Format
+
+Option type: 134 (Class 0, Number 6, Copy on Fragmentation)
+Option length: Variable
+
+This option permits security related information to be passed between
+systems within a single Domain of Interpretation (DOI).  A DOI is a
+collection of systems which agree on the meaning of particular values
+in the security option.  An authority that has been assigned a DOI
+identifier will define a mapping between appropriate CIPSO field values
+and their human readable equivalent.  This authority will distribute that
+mapping to hosts within the authority's domain.  These mappings may be
+sensitive, therefore a DOI authority is not required to make these
+mappings available to anyone other than the systems that are included in
+the DOI.
+
+This option MUST be copied on fragmentation.  This option appears at most
+once in a datagram.  All multi-octet fields in the option are defined to be
+transmitted in network byte order.  The format of this option is as follows:
+
++----------+----------+------//------+-----------//---------+
+| 10000110 | LLLLLLLL | DDDDDDDDDDDD | TTTTTTTTTTTTTTTTTTTT |
++----------+----------+------//------+-----------//---------+
+
+  TYPE=134    OPTION    DOMAIN OF               TAGS
+              LENGTH    INTERPRETATION
+
+
+                Figure 1. CIPSO Format
+
+
+3.1    Type
+
+This field is 1 octet in length.  Its value is 134.
+
+
+3.2    Length
+
+This field is 1 octet in length.  It is the total length of the option
+including the type and length fields.  With the current IP header length
+restriction of 40 octets the value of this field MUST not exceed 40.
+
+
+3.3    Domain of Interpretation Identifier
+
+This field is an unsigned 32 bit integer.  The value 0 is reserved and MUST
+not appear as the DOI identifier in any CIPSO option.  Implementations
+should assume that the DOI identifier field is not aligned on any particular
+byte boundary.
+
+To conserve space in the protocol, security levels and categories are
+represented by numbers rather than their ASCII equivalent.  This requires
+a mapping table within CIPSO hosts to map these numbers to their
+corresponding ASCII representations.  Non-related groups of systems may
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 2]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+have their own unique mappings.  For example, one group of systems may
+use the number 5 to represent Unclassified while another group may use the
+number 1 to represent that same security level.  The DOI identifier is used
+to identify which mapping was used for the values within the option.
+
+
+3.4    Tag Types
+
+A common format for passing security related information is necessary
+for interoperability.  CIPSO uses sets of "tags" to contain the security
+information relevant to the data in the IP packet.  Each tag begins with
+a tag type identifier followed by the length of the tag and ends with the
+actual security information to be passed.  All multi-octet fields in a tag
+are defined to be transmitted in network byte order.  Like the DOI
+identifier field in the CIPSO header, implementations should assume that
+all tags, as well as fields within a tag, are not aligned on any particular
+octet boundary.   The tag types defined in this document contain alignment
+bytes to assist alignment of some information, however alignment can not
+be guaranteed if CIPSO is not the first IP option.
+
+CIPSO tag types 0 through 127 are reserved for defining standard tag
+formats.  Their definitions will be published in RFCs.  Tag types whose
+identifiers are greater than 127 are defined by the DOI authority and may
+only be meaningful in certain Domains of Interpretation.  For these tag
+types, implementations will require the DOI identifier as well as the tag
+number to determine the security policy and the format associated with the
+tag.  Use of tag types above 127 are restricted to closed networks where
+interoperability with other networks will not be an issue.  Implementations
+that support a tag type greater than 127 MUST support at least one DOI that
+requires only tag types 1 to 127.
+
+Tag type 0 is reserved. Tag types 1, 2, and 5 are defined in this
+Internet Draft.  Types 3 and 4 are reserved for work in progress.
+The standard format for all current and future CIPSO tags is shown below:
+
++----------+----------+--------//--------+
+| TTTTTTTT | LLLLLLLL | IIIIIIIIIIIIIIII |
++----------+----------+--------//--------+
+    TAG       TAG         TAG
+    TYPE      LENGTH      INFORMATION
+
+    Figure 2:  Standard Tag Format
+
+In the three tag types described in this document, the length and count
+restrictions are based on the current IP limitation of 40 octets for all
+IP options.  If the IP header is later expanded, then the length and count
+restrictions specified in this document may increase to use the full area
+provided for IP options.
+
+
+3.4.1    Tag Type Classes
+
+Tag classes consist of tag types that have common processing requirements
+and support the same security policy.  The three tags defined in this
+Internet Draft belong to the Mandatory Access Control (MAC) Sensitivity
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 3]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+class and support the MAC Sensitivity security policy.
+
+
+3.4.2    Tag Type 1
+
+This is referred to as the "bit-mapped" tag type.  Tag type 1 is included
+in the MAC Sensitivity tag type class.  The format of this tag type is as
+follows:
+
++----------+----------+----------+----------+--------//---------+
+| 00000001 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+--------//---------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY    BIT MAP OF
+    TYPE      LENGTH   OCTET      LEVEL          CATEGORIES
+
+            Figure 3. Tag Type 1 Format
+
+
+3.4.2.1    Tag Type
+
+This field is 1 octet in length and has a value of 1.
+
+
+3.4.2.2    Tag Length
+
+This field is 1 octet in length.  It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.2.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category bitmap field on an even octet boundary.  This will
+speed many implementations including router implementations.
+
+
+3.4.2.4    Sensitivity Level
+
+This field is 1 octet in length.  Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.2.5    Bit Map of Categories
+
+The length of this field is variable and ranges from 0 to 30 octets.  This
+provides representation of categories 0 to 239.  The ordering of the bits
+is left to right or MSB to LSB.  For example category 0 is represented by
+the most significant bit of the first byte and category 15 is represented
+by the least significant bit of the second byte.  Figure 4 graphically
+shows this ordering.  Bit N is binary 1 if category N is part of the label
+for the datagram, and bit N is binary 0 if category N is not part of the
+label.  Except for the optimized tag 1 format described in the next section,
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 4]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+minimal encoding SHOULD be used resulting in no trailing zero octets in the
+category bitmap.
+
+        octet 0  octet 1  octet 2  octet 3  octet 4  octet 5
+        XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX . . .
+bit     01234567 89111111 11112222 22222233 33333333 44444444
+number             012345 67890123 45678901 23456789 01234567
+
+            Figure 4. Ordering of Bits in Tag 1 Bit Map
+
+
+3.4.2.6    Optimized Tag 1 Format
+
+Routers work most efficiently when processing fixed length fields.  To
+support these routers there is an optimized form of tag type 1.  The format
+does not change.  The only change is to the category bitmap which is set to
+a constant length of 10 octets.  Trailing octets required to fill out the 10
+octets are zero filled.  Ten octets, allowing for 80 categories, was chosen
+because it makes the total length of the CIPSO option 20 octets.  If CIPSO
+is the only option then the option will be full word aligned and additional
+filler octets will not be required.
+
+
+3.4.3    Tag Type 2
+
+This is referred to as the "enumerated" tag type.  It is used to describe
+large but sparsely populated sets of categories.  Tag type 2 is in the MAC
+Sensitivity tag type class.  The format of this tag type is as follows:
+
++----------+----------+----------+----------+-------------//-------------+
+| 00000010 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+-------------//-------------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY         ENUMERATED
+    TYPE      LENGTH   OCTET      LEVEL               CATEGORIES
+
+                Figure 5. Tag Type 2 Format
+
+
+3.4.3.1     Tag Type
+
+This field is one octet in length and has a value of 2.
+
+
+3.4.3.2    Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.3.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category field on an even octet boundary.  This will
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 5]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+speed many implementations including router implementations.
+
+
+3.4.3.4    Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the
+maximum value.
+
+
+3.4.3.5    Enumerated Categories
+
+In this tag, categories are represented by their actual value rather than
+by their position within a bit field.  The length of each category is 2
+octets.  Up to 15 categories may be represented by this tag.  Valid values
+for categories are 0 to 65534.  Category 65535 is not a valid category
+value.  The categories MUST be listed in ascending order within the tag.
+
+
+3.4.4    Tag Type 5
+
+This is referred to as the "range" tag type.  It is used to represent
+labels where all categories in a range, or set of ranges, are included
+in the sensitivity label.  Tag type 5 is in the MAC Sensitivity tag type
+class.  The format of this tag type is as follows:
+
++----------+----------+----------+----------+------------//-------------+
+| 00000101 | LLLLLLLL | 00000000 | LLLLLLLL |  Top/Bottom | Top/Bottom  |
++----------+----------+----------+----------+------------//-------------+
+
+    TAG       TAG      ALIGNMENT  SENSITIVITY        CATEGORY RANGES
+    TYPE      LENGTH   OCTET      LEVEL
+
+                     Figure 6. Tag Type 5 Format
+
+
+3.4.4.1     Tag Type
+
+This field is one octet in length and has a value of 5.
+
+
+3.4.4.2    Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields.  With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.4.3    Alignment Octet
+
+This field is 1 octet in length and always has the value of 0.  Its purpose
+is to align the category range field on an even octet boundary.  This will
+speed many implementations including router implementations.
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 6]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+3.4.4.4    Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255.  The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.4.5    Category Ranges
+
+A category range is a 4 octet field comprised of the 2 octet index of the
+highest numbered category followed by the 2 octet index of the lowest
+numbered category.  These range endpoints are inclusive within the range of
+categories.  All categories within a range are included in the sensitivity
+label.  This tag may contain a maximum of 7 category pairs.  The bottom
+category endpoint for the last pair in the tag MAY be omitted and SHOULD be
+assumed to be 0.  The ranges MUST be non-overlapping and be listed in
+descending order.  Valid values for categories are 0 to 65534.  Category
+65535 is not a valid category value.
+
+
+3.4.5     Minimum Requirements
+
+A CIPSO implementation MUST be capable of generating at least tag type 1 in
+the non-optimized form.  In addition, a CIPSO implementation MUST be able
+to receive any valid tag type 1 even those using the optimized tag type 1
+format.
+
+
+4.    Configuration Parameters
+
+The configuration parameters defined below are required for all CIPSO hosts,
+gateways, and routers that support multiple sensitivity labels.  A CIPSO
+host is defined to be the origination or destination system for an IP
+datagram.  A CIPSO gateway provides IP routing services between two or more
+IP networks and may be required to perform label translations between
+networks.  A CIPSO gateway may be an enhanced CIPSO host or it may just
+provide gateway services with no end system CIPSO capabilities.  A CIPSO
+router is a dedicated IP router that routes IP datagrams between two or more
+IP networks.
+
+An implementation of CIPSO on a host MUST have the capability to reject a
+datagram for reasons that the information contained can not be adequately
+protected by the receiving host or if acceptance may result in violation of
+the host or network security policy.  In addition, a CIPSO gateway or router
+MUST be able to reject datagrams going to networks that can not provide
+adequate protection or may violate the network's security policy.  To
+provide this capability the following minimal set of configuration
+parameters are required for CIPSO implementations:
+
+HOST_LABEL_MAX - This parameter contains the maximum sensitivity label that
+a CIPSO host is authorized to handle.  All datagrams that have a label
+greater than this maximum MUST be rejected by the CIPSO host.  This
+parameter does not apply to CIPSO gateways or routers.  This parameter need
+not be defined explicitly as it can be implicitly derived from the
+PORT_LABEL_MAX parameters for the associated interfaces.
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 7]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+HOST_LABEL_MIN - This parameter contains the minimum sensitivity label that
+a CIPSO host is authorized to handle.  All datagrams that have a label less
+than this minimum MUST be rejected by the CIPSO host.  This parameter does
+not apply to CIPSO gateways or routers.  This parameter need not be defined
+explicitly as it can be implicitly derived from the PORT_LABEL_MIN
+parameters for the associated interfaces.
+
+PORT_LABEL_MAX - This parameter contains the maximum sensitivity label for
+all datagrams that may exit a particular network interface port.  All
+outgoing datagrams that have a label greater than this maximum MUST be
+rejected by the CIPSO system.  The label within this parameter MUST be
+less than or equal to the label within the HOST_LABEL_MAX parameter.  This
+parameter does not apply to CIPSO hosts that support only one network port.
+
+PORT_LABEL_MIN - This parameter contains the minimum sensitivity label for
+all datagrams that may exit a particular network interface port.  All
+outgoing datagrams that have a label less than this minimum MUST be
+rejected by the CIPSO system.  The label within this parameter MUST be
+greater than or equal to the label within the HOST_LABEL_MIN parameter.
+This parameter does not apply to CIPSO hosts that support only one network
+port.
+
+PORT_DOI - This parameter is used to assign a DOI identifier value to a
+particular network interface port.  All CIPSO labels within datagrams
+going out this port MUST use the specified DOI identifier.  All CIPSO
+hosts and gateways MUST support either this parameter, the NET_DOI
+parameter, or the HOST_DOI parameter.
+
+NET_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP network address.  All CIPSO labels within datagrams destined
+for the particular IP network MUST use the specified DOI identifier.  All
+CIPSO hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the HOST_DOI parameter.
+
+HOST_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP host address.  All CIPSO labels within datagrams destined for
+the particular IP host will use the specified DOI identifier.  All CIPSO
+hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the NET_DOI parameter.
+
+This list represents the minimal set of configuration parameters required
+to be compliant.  Implementors are encouraged to add to this list to
+provide enhanced functionality and control.  For example, many security
+policies may require both incoming and outgoing datagrams be checked against
+the port and host label ranges.
+
+
+4.1    Port Range Parameters
+
+The labels represented by the PORT_LABEL_MAX and PORT_LABEL_MIN parameters
+MAY be in CIPSO or local format.  Some CIPSO systems, such as routers, may
+want to have the range parameters expressed in CIPSO format so that incoming
+labels do not have to be converted to a local format before being compared
+against the range.  If multiple DOIs are supported by one of these CIPSO
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 8]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+systems then multiple port range parameters would be needed, one set for
+each DOI supported on a particular port.
+
+The port range will usually represent the total set of labels that may
+exist on the logical network accessed through the corresponding network
+interface.  It may, however, represent a subset of these labels that are
+allowed to enter the CIPSO system.
+
+
+4.2    Single Label CIPSO Hosts
+
+CIPSO implementations that support only one label are not required to
+support the parameters described above.  These limited implementations are
+only required to support a NET_LABEL parameter.  This parameter contains
+the CIPSO label that may be inserted in datagrams that exit the host.  In
+addition, the host MUST reject any incoming datagram that has a label which
+is not equivalent to the NET_LABEL parameter.
+
+
+5.    Handling Procedures
+
+This section describes the processing requirements for incoming and
+outgoing IP datagrams.  Just providing the correct CIPSO label format
+is not enough.  Assumptions will be made by one system on how a
+receiving system will handle the CIPSO label.  Wrong assumptions may
+lead to non-interoperability or even a security incident.  The
+requirements described below represent the minimal set needed for
+interoperability and that provide users some level of confidence.
+Many other requirements could be added to increase user confidence,
+however at the risk of restricting creativity and limiting vendor
+participation.
+
+
+5.1    Input Procedures
+
+All datagrams received through a network port MUST have a security label
+associated with them, either contained in the datagram or assigned to the
+receiving port.  Without this label the host, gateway, or router will not
+have the information it needs to make security decisions.  This security
+label will be obtained from the CIPSO if the option is present in the
+datagram.  See section 4.1.2 for handling procedures for unlabeled
+datagrams.  This label will be compared against the PORT (if appropriate)
+and HOST configuration parameters defined in section 3.
+
+If any field within the CIPSO option, such as the DOI identifier, is not
+recognized the IP datagram is discarded and an ICMP "parameter problem"
+(type 12) is generated and returned.  The ICMP code field is set to "bad
+parameter" (code 0) and the pointer is set to the start of the CIPSO field
+that is unrecognized.
+
+If the contents of the CIPSO are valid but the security label is
+outside of the configured host or port label range, the datagram is
+discarded and an ICMP "destination unreachable" (type 3) is generated
+and returned.  The code field of the ICMP is set to "communication with
+destination network administratively prohibited" (code 9) or to
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 9]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+"communication with destination host administratively prohibited"
+(code 10).  The value of the code field used is dependent upon whether
+the originator of the ICMP message is acting as a CIPSO host or a CIPSO
+gateway.  The recipient of the ICMP message MUST be able to handle either
+value.  The same procedure is performed if a CIPSO can not be added to an
+IP packet because it is too large to fit in the IP options area.
+
+If the error is triggered by receipt of an ICMP message, the message
+is discarded and no response is permitted (consistent with general ICMP
+processing rules).
+
+
+5.1.1    Unrecognized tag types
+
+The default condition for any CIPSO implementation is that an
+unrecognized tag type MUST be treated as a "parameter problem" and
+handled as described in section 4.1.  A CIPSO implementation MAY allow
+the system administrator to identify tag types that may safely be
+ignored.  This capability is an allowable enhancement, not a
+requirement.
+
+
+5.1.2    Unlabeled Packets
+
+A network port may be configured to not require a CIPSO label for all
+incoming  datagrams.  For this configuration a CIPSO label must be
+assigned to that network port and associated with all unlabeled IP
+datagrams.  This capability might be used for single level networks or
+networks that have CIPSO and non-CIPSO hosts and the non-CIPSO hosts
+all operate at the same label.
+
+If a CIPSO option is required and none is found, the datagram is
+discarded and an ICMP "parameter problem" (type 12) is generated and
+returned to the originator of the datagram.  The code field of the ICMP
+is set to "option missing" (code 1) and the ICMP pointer is set to 134
+(the value of the option type for the missing CIPSO option).
+
+
+5.2    Output Procedures
+
+A CIPSO option MUST appear only once in a datagram.  Only one tag type
+from the MAC Sensitivity class MAY be included in a CIPSO option.  Given
+the current set of defined tag types, this means that CIPSO labels at
+first will contain only one tag.
+
+All datagrams leaving a CIPSO system MUST meet the following condition:
+
+        PORT_LABEL_MIN <= CIPSO label <= PORT_LABEL_MAX
+
+If this condition is not satisfied the datagram MUST be discarded.
+If the CIPSO system only supports one port, the HOST_LABEL_MIN and the
+HOST_LABEL_MAX parameters MAY be substituted for the PORT parameters in
+the above condition.
+
+The DOI identifier to be used for all outgoing datagrams is configured by
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 10]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+the administrator.  If port level DOI identifier assignment is used, then
+the PORT_DOI configuration parameter MUST contain the DOI identifier to
+use.  If network level DOI assignment is used, then the NET_DOI parameter
+MUST contain the DOI identifier to use.  And if host level DOI assignment
+is employed, then the HOST_DOI parameter MUST contain the DOI identifier
+to use.  A CIPSO implementation need only support one level of DOI
+assignment.
+
+
+5.3    DOI Processing Requirements
+
+A CIPSO implementation MUST support at least one DOI and SHOULD support
+multiple DOIs.  System and network administrators are cautioned to
+ensure that at least one DOI is common within an IP network to allow for
+broadcasting of IP datagrams.
+
+CIPSO gateways MUST be capable of translating a CIPSO option from one
+DOI to another when forwarding datagrams between networks.  For
+efficiency purposes this capability is only a desired feature for CIPSO
+routers.
+
+
+5.4    Label of ICMP Messages
+
+The CIPSO label to be used on all outgoing ICMP messages MUST be equivalent
+to the label of the datagram that caused the ICMP message.  If the ICMP was
+generated due to a problem associated with the original CIPSO label then the
+following responses are allowed:
+
+  a.  Use the CIPSO label of the original IP datagram
+  b.  Drop the original datagram with no return message generated
+
+In most cases these options will have the same effect.  If you can not
+interpret the label or if it is outside the label range of your host or
+interface then an ICMP message with the same label will probably not be
+able to exit the system.
+
+
+6.    Assignment of DOI Identifier Numbers                                   =
+
+Requests for assignment of a DOI identifier number should be addressed to
+the Internet Assigned Numbers Authority (IANA).
+
+
+7.    Acknowledgements
+
+Much of the material in this RFC is based on (and copied from) work
+done by Gary Winiger of Sun Microsystems and published as Commercial
+IP Security Option at the INTEROP 89, Commercial IPSO Workshop.
+
+
+8.    Author's Address
+
+To submit mail for distribution to members of the IETF CIPSO Working
+Group, send mail to: cipso@wdl1.wdl.loral.com.
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 11]
+
+
+
+CIPSO INTERNET DRAFT                                         16 July, 1992
+
+
+
+
+To be added to or deleted from this distribution, send mail to:
+cipso-request@wdl1.wdl.loral.com.
+
+
+9.    References
+
+RFC 1038, "Draft Revised IP Security Option", M. St. Johns, IETF, January
+1988.
+
+RFC 1108, "U.S. Department of Defense Security Options
+for the Internet Protocol", Stephen Kent, IAB, 1 March, 1991.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93                                 [PAGE 12]
+
+
+
diff --git a/Documentation/netlabel/introduction.txt b/Documentation/netlabel/introduction.txt
new file mode 100644
index 0000000..a4ffba1
--- /dev/null
+++ b/Documentation/netlabel/introduction.txt
@@ -0,0 +1,46 @@
+NetLabel Introduction
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+August 2, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can be used by kernel security modules to attach
+security attributes to outgoing network packets generated from user space
+applications and read security attributes from incoming network packets.  It
+is composed of three main components, the protocol engines, the communication
+layer, and the kernel security module API.
+
+ * Protocol Engines
+
+The protocol engines are responsible for both applying and retrieving the
+network packet's security attributes.  If any translation between the network
+security attributes and those on the host are required then the protocol
+engine will handle those tasks as well.  Other kernel subsystems should
+refrain from calling the protocol engines directly, instead they should use
+the NetLabel kernel security module API described below.
+
+Detailed information about each NetLabel protocol engine can be found in this
+directory, consult '00-INDEX' for filenames.
+
+ * Communication Layer
+
+The communication layer exists to allow NetLabel configuration and monitoring
+from user space.  The NetLabel communication layer uses a message based
+protocol built on top of the Generic NETLINK transport mechanism.  The exact
+formatting of these NetLabel messages as well as the Generic NETLINK family
+names can be found in the the 'net/netlabel/' directory as comments in the
+header files as well as in 'include/net/netlabel.h'.
+
+ * Security Module API
+
+The purpose of the NetLabel security module API is to provide a protocol
+independent interface to the underlying NetLabel protocol engines.  In addition
+to protocol independence, the security module API is designed to be completely
+LSM independent which should allow multiple LSMs to leverage the same code
+base.
+
+Detailed information about the NetLabel security module API can be found in the
+'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file
+found in this directory.
diff --git a/Documentation/netlabel/lsm_interface.txt b/Documentation/netlabel/lsm_interface.txt
new file mode 100644
index 0000000..98dd9f7
--- /dev/null
+++ b/Documentation/netlabel/lsm_interface.txt
@@ -0,0 +1,47 @@
+NetLabel Linux Security Module Interface
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can set and retrieve security attributes from
+network packets.  It is intended to be used by LSM developers who want to make
+use of a common code base for several different packet labeling protocols.
+The NetLabel security module API is defined in 'include/net/netlabel.h' but a
+brief overview is given below.
+
+ * NetLabel Security Attributes
+
+Since NetLabel supports multiple different packet labeling protocols and LSMs
+it uses the concept of security attributes to refer to the packet's security
+labels.  The NetLabel security attributes are defined by the
+'netlbl_lsm_secattr' structure in the NetLabel header file.  Internally the
+NetLabel subsystem converts the security attributes to and from the correct
+low-level packet label depending on the NetLabel build time and run time
+configuration.  It is up to the LSM developer to translate the NetLabel
+security attributes into whatever security identifiers are in use for their
+particular LSM.
+
+ * NetLabel LSM Protocol Operations
+
+These are the functions which allow the LSM developer to manipulate the labels
+on outgoing packets as well as read the labels on incoming packets.  Functions
+exist to operate both on sockets as well as the sk_buffs directly.  These high
+level functions are translated into low level protocol operations based on how
+the administrator has configured the NetLabel subsystem.
+
+ * NetLabel Label Mapping Cache Operations
+
+Depending on the exact configuration, translation between the network packet
+label and the internal LSM security identifier can be time consuming.  The
+NetLabel label mapping cache is a caching mechanism which can be used to
+sidestep much of this overhead once a mapping has been established.  Once the
+LSM has received a packet, used NetLabel to decode it's security attributes,
+and translated the security attributes into a LSM internal identifier the LSM
+can use the NetLabel caching functions to associate the LSM internal
+identifier with the network packet's label.  This means that in the future
+when a incoming packet matches a cached value not only are the internal
+NetLabel translation mechanisms bypassed but the LSM translation mechanisms are
+bypassed as well which should result in a significant reduction in overhead.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 90ed781..935e298 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -375,6 +375,41 @@
 	be timed out after an idle period.
 	Default: 1
 
+CIPSOv4 Variables:
+
+cipso_cache_enable - BOOLEAN
+	If set, enable additions to and lookups from the CIPSO label mapping
+	cache.  If unset, additions are ignored and lookups always result in a
+	miss.  However, regardless of the setting the cache is still
+	invalidated when required when means you can safely toggle this on and
+	off and the cache will always be "safe".
+	Default: 1
+
+cipso_cache_bucket_size - INTEGER
+	The CIPSO label cache consists of a fixed size hash table with each
+	hash bucket containing a number of cache entries.  This variable limits
+	the number of entries in each hash bucket; the larger the value the
+	more CIPSO label mappings that can be cached.  When the number of
+	entries in a given hash bucket reaches this limit adding new entries
+	causes the oldest entry in the bucket to be removed to make room.
+	Default: 10
+
+cipso_rbm_optfmt - BOOLEAN
+	Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of
+	the CIPSO draft specification (see Documentation/netlabel for details).
+	This means that when set the CIPSO tag will be padded with empty
+	categories in order to make the packet data 32-bit aligned.
+	Default: 0
+
+cipso_rbm_structvalid - BOOLEAN
+	If set, do a very strict check of the CIPSO option when
+	ip_options_compile() is called.  If unset, relax the checks done during
+	ip_options_compile().  Either way is "safe" as errors are caught else
+	where in the CIPSO processing code but setting this to 0 (False) should
+	result in less work (i.e. it should be faster) but could cause problems
+	with other implementations that require strict checking.
+	Default: 0
+
 IP Variables:
 
 ip_local_port_range - 2 INTEGERS
@@ -730,6 +765,9 @@
 
 	This referred to as global forwarding.
 
+proxy_ndp - BOOLEAN
+	Do proxy ndp.
+
 conf/interface/*:
 	Change special settings per interface.
 
diff --git a/Documentation/networking/secid.txt b/Documentation/networking/secid.txt
new file mode 100644
index 0000000..95ea067
--- /dev/null
+++ b/Documentation/networking/secid.txt
@@ -0,0 +1,14 @@
+flowi structure:
+
+The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate
+the label of the flow. This label of the flow is currently used in selecting
+matching labeled xfrm(s).
+
+If this is an outbound flow, the label is derived from the socket, if any, or
+the incoming packet this flow is being generated as a response to (e.g. tcp
+resets, timewait ack, etc.). It is also conceivable that the label could be
+derived from other sources such as process context, device, etc., in special
+cases, as may be appropriate.
+
+If this is an inbound flow, the label is derived from the IPSec security
+associations, if any, used by the packet.
diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr
new file mode 100644
index 0000000..162c47f
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.arcmsr
@@ -0,0 +1,56 @@
+**************************************************************************
+** History
+**
+**   REV#         DATE             NAME         DESCRIPTION
+** 1.00.00.00    3/31/2004       Erich Chen     First release
+** 1.10.00.04    7/28/2004       Erich Chen     modify for ioctl
+** 1.10.00.06    8/28/2004       Erich Chen     modify for 2.6.x
+** 1.10.00.08    9/28/2004       Erich Chen     modify for x86_64
+** 1.10.00.10   10/10/2004       Erich Chen     bug fix for SMP & ioctl
+** 1.20.00.00   11/29/2004       Erich Chen     bug fix with arcmsr_bus_reset when PHY error
+** 1.20.00.02   12/09/2004       Erich Chen     bug fix with over 2T bytes RAID Volume
+** 1.20.00.04    1/09/2005       Erich Chen     fits for Debian linux kernel version 2.2.xx
+** 1.20.00.05    2/20/2005       Erich Chen     cleanly as look like a Linux driver at 2.6.x
+**                                              thanks for peoples kindness comment
+**						Kornel Wieliczek
+**						Christoph Hellwig
+**						Adrian Bunk
+**						Andrew Morton
+**						Christoph Hellwig
+**						James Bottomley
+**						Arjan van de Ven
+** 1.20.00.06    3/12/2005       Erich Chen     fix with arcmsr_pci_unmap_dma "unsigned long" cast,
+**						modify PCCB POOL allocated by "dma_alloc_coherent"
+**						(Kornel Wieliczek's comment)
+** 1.20.00.07    3/23/2005       Erich Chen     bug fix with arcmsr_scsi_host_template_init
+**						occur segmentation fault,
+**						if RAID adapter does not on PCI slot
+**						and modprobe/rmmod this driver twice.
+**						bug fix enormous stack usage (Adrian Bunk's comment)
+** 1.20.00.08    6/23/2005       Erich Chen     bug fix with abort command,
+**						in case of heavy loading when sata cable
+**						working on low quality connection
+** 1.20.00.09    9/12/2005       Erich Chen     bug fix with abort command handling, firmware version check
+**						and firmware update notify for hardware bug fix
+** 1.20.00.10    9/23/2005       Erich Chen     enhance sysfs function for change driver's max tag Q number.
+**						add DMA_64BIT_MASK for backward compatible with all 2.6.x
+**						add some useful message for abort command
+**						add ioctl code 'ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE'
+**						customer can send this command for sync raid volume data
+** 1.20.00.11    9/29/2005       Erich Chen     by comment of Arjan van de Ven fix incorrect msleep redefine
+**						cast off sizeof(dma_addr_t) condition for 64bit pci_set_dma_mask
+** 1.20.00.12    9/30/2005       Erich Chen     bug fix with 64bit platform's ccbs using if over 4G system memory
+**						change 64bit pci_set_consistent_dma_mask into 32bit
+**						increcct adapter count if adapter initialize fail.
+**						miss edit at arcmsr_build_ccb....
+**						psge += sizeof(struct _SG64ENTRY *) =>
+**						psge += sizeof(struct _SG64ENTRY)
+**						64 bits sg entry would be incorrectly calculated
+**						thanks Kornel Wieliczek give me kindly notify
+**						and detail description
+** 1.20.00.13   11/15/2005       Erich Chen     scheduling pending ccb with FIFO
+**						change the architecture of arcmsr command queue list
+**						for linux standard list
+**						enable usage of pci message signal interrupt
+**						follow Randy.Danlup kindness suggestion cleanup this code
+**************************************************************************
\ No newline at end of file
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index be55670..ee03678 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -11,38 +11,43 @@
 Supported Cards/Chipsets
 -------------------------
 	PCI ID (pci.ids)	OEM	Product
-	9005:0285:9005:028a	Adaptec	2020ZCR (Skyhawk)
-	9005:0285:9005:028e	Adaptec	2020SA (Skyhawk)
-	9005:0285:9005:028b	Adaptec 2025ZCR (Terminator)
-	9005:0285:9005:028f	Adaptec 2025SA (Terminator)
-	9005:0285:9005:0286	Adaptec	2120S (Crusader)
-	9005:0286:9005:028d	Adaptec	2130S (Lancer)
-	9005:0285:9005:0285	Adaptec	2200S (Vulcan)
-	9005:0285:9005:0287	Adaptec	2200S (Vulcan-2m)
-	9005:0286:9005:028c	Adaptec	2230S (Lancer)
-	9005:0286:9005:028c	Adaptec	2230SLP (Lancer)
-	9005:0285:9005:0296	Adaptec	2240S (SabreExpress)
-	9005:0285:9005:0290	Adaptec	2410SA (Jaguar)
-	9005:0285:9005:0293	Adaptec 21610SA (Corsair-16)
-	9005:0285:103c:3227	Adaptec 2610SA (Bearcat HP release)
-	9005:0285:9005:0292	Adaptec	2810SA (Corsair-8)
-	9005:0285:9005:0294	Adaptec	Prowler
-	9005:0286:9005:029d	Adaptec	2420SA (Intruder HP release)
-	9005:0286:9005:029c	Adaptec	2620SA (Intruder)
-	9005:0286:9005:029b	Adaptec	2820SA (Intruder)
-	9005:0286:9005:02a7	Adaptec	2830SA (Skyray)
-	9005:0286:9005:02a8	Adaptec	2430SA (Skyray)
-	9005:0285:9005:0288	Adaptec	3230S (Harrier)
-	9005:0285:9005:0289	Adaptec	3240S (Tornado)
-	9005:0285:9005:0298	Adaptec	4000SAS (BlackBird)
-	9005:0285:9005:0297	Adaptec	4005SAS (AvonPark)
-	9005:0285:9005:0299	Adaptec	4800SAS (Marauder-X)
-	9005:0285:9005:029a	Adaptec	4805SAS (Marauder-E)
-	9005:0286:9005:02a2	Adaptec	3800SAS (Hurricane44)
-	1011:0046:9005:0364	Adaptec	5400S (Mustang)
-	1011:0046:9005:0365	Adaptec	5400S (Mustang)
 	9005:0283:9005:0283	Adaptec	Catapult (3210S with arc firmware)
 	9005:0284:9005:0284	Adaptec	Tomcat (3410S with arc firmware)
+	9005:0285:9005:0285	Adaptec	2200S (Vulcan)
+	9005:0285:9005:0286	Adaptec	2120S (Crusader)
+	9005:0285:9005:0287	Adaptec	2200S (Vulcan-2m)
+	9005:0285:9005:0288	Adaptec	3230S (Harrier)
+	9005:0285:9005:0289	Adaptec	3240S (Tornado)
+	9005:0285:9005:028a	Adaptec	2020ZCR (Skyhawk)
+	9005:0285:9005:028b	Adaptec 2025ZCR (Terminator)
+	9005:0286:9005:028c	Adaptec	2230S (Lancer)
+	9005:0286:9005:028c	Adaptec	2230SLP (Lancer)
+	9005:0286:9005:028d	Adaptec	2130S (Lancer)
+	9005:0285:9005:028e	Adaptec	2020SA (Skyhawk)
+	9005:0285:9005:028f	Adaptec 2025SA (Terminator)
+	9005:0285:9005:0290	Adaptec	2410SA (Jaguar)
+	9005:0285:103c:3227	Adaptec 2610SA (Bearcat HP release)
+	9005:0285:9005:0293	Adaptec 21610SA (Corsair-16)
+	9005:0285:9005:0296	Adaptec	2240S (SabreExpress)
+	9005:0285:9005:0292	Adaptec	2810SA (Corsair-8)
+	9005:0285:9005:0294	Adaptec	Prowler
+	9005:0285:9005:0297	Adaptec	4005SAS (AvonPark)
+	9005:0285:9005:0298	Adaptec	4000SAS (BlackBird)
+	9005:0285:9005:0299	Adaptec	4800SAS (Marauder-X)
+	9005:0285:9005:029a	Adaptec	4805SAS (Marauder-E)
+	9005:0286:9005:029b	Adaptec	2820SA (Intruder)
+	9005:0286:9005:029c	Adaptec	2620SA (Intruder)
+	9005:0286:9005:029d	Adaptec	2420SA (Intruder HP release)
+	9005:0286:9005:02a2	Adaptec	3800SAS (Hurricane44)
+	9005:0286:9005:02a7	Adaptec	3805SAS (Hurricane80)
+	9005:0286:9005:02a8	Adaptec	3400SAS (Hurricane40)
+	9005:0286:9005:02ac	Adaptec	1800SAS (Typhoon44)
+	9005:0286:9005:02b3	Adaptec	2400SAS (Hurricane40lm)
+	9005:0285:9005:02b5	Adaptec ASR5800 (Voodoo44)
+	9005:0285:9005:02b6	Adaptec ASR5805 (Voodoo80)
+	9005:0285:9005:02b7	Adaptec ASR5808 (Voodoo08)
+	1011:0046:9005:0364	Adaptec	5400S (Mustang)
+	1011:0046:9005:0365	Adaptec	5400S (Mustang)
 	9005:0287:9005:0800	Adaptec	Themisto (Jupiter)
 	9005:0200:9005:0200	Adaptec	Themisto (Jupiter)
 	9005:0286:9005:0800	Adaptec	Callisto (Jupiter)
@@ -64,18 +69,20 @@
 	9005:0285:9005:0290	IBM	ServeRAID 7t (Jaguar)
 	9005:0285:1014:02F2	IBM	ServeRAID 8i (AvonPark)
 	9005:0285:1014:0312	IBM	ServeRAID 8i (AvonParkLite)
-	9005:0286:1014:9580	IBM	ServeRAID 8k/8k-l8 (Aurora)
 	9005:0286:1014:9540	IBM	ServeRAID 8k/8k-l4 (AuroraLite)
-	9005:0286:9005:029f	ICP	ICP9014R0 (Lancer)
+	9005:0286:1014:9580	IBM	ServeRAID 8k/8k-l8 (Aurora)
+	9005:0286:1014:034d	IBM	ServeRAID 8s (Hurricane)
 	9005:0286:9005:029e	ICP	ICP9024R0 (Lancer)
+	9005:0286:9005:029f	ICP	ICP9014R0 (Lancer)
 	9005:0286:9005:02a0	ICP	ICP9047MA (Lancer)
 	9005:0286:9005:02a1	ICP	ICP9087MA (Lancer)
+	9005:0286:9005:02a3	ICP	ICP5445AU (Hurricane44)
 	9005:0286:9005:02a4	ICP	ICP9085LI (Marauder-X)
 	9005:0286:9005:02a5	ICP	ICP5085BR (Marauder-E)
-	9005:0286:9005:02a3	ICP	ICP5445AU (Hurricane44)
 	9005:0286:9005:02a6	ICP	ICP9067MA (Intruder-6)
-	9005:0286:9005:02a9	ICP	ICP5087AU (Skyray)
-	9005:0286:9005:02aa	ICP	ICP5047AU (Skyray)
+	9005:0286:9005:02a9	ICP	ICP5085AU (Hurricane80)
+	9005:0286:9005:02aa	ICP	ICP5045AU (Hurricane40)
+	9005:0286:9005:02b4	ICP	ICP5045AL (Hurricane40lm)
 
 People
 -------------------------
diff --git a/Documentation/scsi/arcmsr_spec.txt b/Documentation/scsi/arcmsr_spec.txt
new file mode 100644
index 0000000..5e00423
--- /dev/null
+++ b/Documentation/scsi/arcmsr_spec.txt
@@ -0,0 +1,574 @@
+*******************************************************************************
+**                            ARECA FIRMWARE SPEC
+*******************************************************************************
+**	Usage of IOP331 adapter
+**	(All In/Out is in IOP331's view)
+**	1. Message 0 --> InitThread message and retrun code
+**	2. Doorbell is used for RS-232 emulation
+**		inDoorBell :    bit0 -- data in ready
+**			(DRIVER DATA WRITE OK)
+**				bit1 -- data out has been read
+**			(DRIVER DATA READ OK)
+**		outDooeBell:    bit0 -- data out ready
+**			(IOP331 DATA WRITE OK)
+**				bit1 -- data in has been read
+**			(IOP331 DATA READ OK)
+**	3. Index Memory Usage
+**	offset 0xf00 : for RS232 out (request buffer)
+**	offset 0xe00 : for RS232 in  (scratch buffer)
+**	offset 0xa00 : for inbound message code message_rwbuffer
+**			(driver send to IOP331)
+**	offset 0xa00 : for outbound message code message_rwbuffer
+**			(IOP331 send to driver)
+**	4. RS-232 emulation
+**		Currently 128 byte buffer is used
+**			1st uint32_t : Data length (1--124)
+**			Byte 4--127  : Max 124 bytes of data
+**	5. PostQ
+**	All SCSI Command must be sent through postQ:
+**	(inbound queue port)	Request frame must be 32 bytes aligned
+**	#bit27--bit31 => flag for post ccb
+**	#bit0--bit26  => real address (bit27--bit31) of post arcmsr_cdb
+**		bit31 :
+**			0 : 256 bytes frame
+**			1 : 512 bytes frame
+**		bit30 :
+**			0 : normal request
+**			1 : BIOS request
+**		bit29 : reserved
+**		bit28 : reserved
+**		bit27 : reserved
+**  ---------------------------------------------------------------------------
+**	(outbount queue port)	Request reply
+**	#bit27--bit31
+**		=> flag for reply
+**	#bit0--bit26
+**		=> real address (bit27--bit31) of reply arcmsr_cdb
+**			bit31 : must be 0 (for this type of reply)
+**			bit30 : reserved for BIOS handshake
+**			bit29 : reserved
+**			bit28 :
+**			0 : no error, ignore AdapStatus/DevStatus/SenseData
+**			1 : Error, error code in AdapStatus/DevStatus/SenseData
+**			bit27 : reserved
+**	6. BIOS request
+**		All BIOS request is the same with request from PostQ
+**		Except :
+**			Request frame is sent from configuration space
+**		offset: 0x78 : Request Frame (bit30 == 1)
+**		offset: 0x18 : writeonly to generate
+**					IRQ to IOP331
+**		Completion of request:
+**			(bit30 == 0, bit28==err flag)
+**	7. Definition of SGL entry (structure)
+**	8. Message1 Out - Diag Status Code (????)
+**	9. Message0 message code :
+**		0x00 : NOP
+**		0x01 : Get Config
+**		->offset 0xa00 :for outbound message code message_rwbuffer
+**		(IOP331 send to driver)
+**		Signature             0x87974060(4)
+**		Request len           0x00000200(4)
+**		numbers of queue      0x00000100(4)
+**		SDRAM Size            0x00000100(4)-->256 MB
+**		IDE Channels          0x00000008(4)
+**		vendor                40 bytes char
+**		model                  8 bytes char
+**		FirmVer               16 bytes char
+**		Device Map            16 bytes char
+**		FirmwareVersion DWORD <== Added for checking of
+**						new firmware capability
+**		0x02 : Set Config
+**		->offset 0xa00 :for inbound message code message_rwbuffer
+**		(driver send to IOP331)
+**		Signature             0x87974063(4)
+**		UPPER32 of Request Frame  (4)-->Driver Only
+**		0x03 : Reset (Abort all queued Command)
+**		0x04 : Stop Background Activity
+**		0x05 : Flush Cache
+**		0x06 : Start Background Activity
+**			(re-start if background is halted)
+**		0x07 : Check If Host Command Pending
+**			(Novell May Need This Function)
+**		0x08 : Set controller time
+**		->offset 0xa00 : for inbound message code message_rwbuffer
+**		(driver to IOP331)
+**		byte 0 : 0xaa <-- signature
+**		byte 1 : 0x55 <-- signature
+**		byte 2 : year (04)
+**		byte 3 : month (1..12)
+**		byte 4 : date (1..31)
+**		byte 5 : hour (0..23)
+**		byte 6 : minute (0..59)
+**		byte 7 : second (0..59)
+*******************************************************************************
+*******************************************************************************
+**      	RS-232 Interface for Areca Raid Controller
+**      The low level command interface is exclusive with VT100 terminal
+**  --------------------------------------------------------------------
+**      1. Sequence of command execution
+**  --------------------------------------------------------------------
+**    	(A) Header : 3 bytes sequence (0x5E, 0x01, 0x61)
+**    	(B) Command block : variable length of data including length,
+**    		command code, data and checksum byte
+**    	(C) Return data : variable length of data
+**  --------------------------------------------------------------------
+**    2. Command block
+**  --------------------------------------------------------------------
+**    	(A) 1st byte : command block length (low byte)
+**    	(B) 2nd byte : command block length (high byte)
+**                note ..command block length shouldn't > 2040 bytes,
+**    		length excludes these two bytes
+**    	(C) 3rd byte : command code
+**    	(D) 4th and following bytes : variable length data bytes
+**    		depends on command code
+**    	(E) last byte : checksum byte (sum of 1st byte until last data byte)
+**  --------------------------------------------------------------------
+**    3. Command code and associated data
+**  --------------------------------------------------------------------
+**    	The following are command code defined in raid controller Command
+**    	code 0x10--0x1? are used for system level management,
+**    	no password checking is needed and should be implemented in separate
+**    	well controlled utility and not for end user access.
+**    	Command code 0x20--0x?? always check the password,
+**    	password must be entered to enable these command.
+**    	enum
+**    	{
+**    		GUI_SET_SERIAL=0x10,
+**    		GUI_SET_VENDOR,
+**    		GUI_SET_MODEL,
+**    		GUI_IDENTIFY,
+**    		GUI_CHECK_PASSWORD,
+**    		GUI_LOGOUT,
+**    		GUI_HTTP,
+**    		GUI_SET_ETHERNET_ADDR,
+**    		GUI_SET_LOGO,
+**    		GUI_POLL_EVENT,
+**    		GUI_GET_EVENT,
+**    		GUI_GET_HW_MONITOR,
+**    		//    GUI_QUICK_CREATE=0x20, (function removed)
+**    		GUI_GET_INFO_R=0x20,
+**    		GUI_GET_INFO_V,
+**    		GUI_GET_INFO_P,
+**    		GUI_GET_INFO_S,
+**    		GUI_CLEAR_EVENT,
+**    		GUI_MUTE_BEEPER=0x30,
+**    		GUI_BEEPER_SETTING,
+**    		GUI_SET_PASSWORD,
+**    		GUI_HOST_INTERFACE_MODE,
+**    		GUI_REBUILD_PRIORITY,
+**    		GUI_MAX_ATA_MODE,
+**    		GUI_RESET_CONTROLLER,
+**    		GUI_COM_PORT_SETTING,
+**    		GUI_NO_OPERATION,
+**    		GUI_DHCP_IP,
+**    		GUI_CREATE_PASS_THROUGH=0x40,
+**    		GUI_MODIFY_PASS_THROUGH,
+**    		GUI_DELETE_PASS_THROUGH,
+**    		GUI_IDENTIFY_DEVICE,
+**    		GUI_CREATE_RAIDSET=0x50,
+**    		GUI_DELETE_RAIDSET,
+**    		GUI_EXPAND_RAIDSET,
+**    		GUI_ACTIVATE_RAIDSET,
+**    		GUI_CREATE_HOT_SPARE,
+**    		GUI_DELETE_HOT_SPARE,
+**    		GUI_CREATE_VOLUME=0x60,
+**    		GUI_MODIFY_VOLUME,
+**    		GUI_DELETE_VOLUME,
+**    		GUI_START_CHECK_VOLUME,
+**    		GUI_STOP_CHECK_VOLUME
+**    	};
+**    Command description :
+**    	GUI_SET_SERIAL : Set the controller serial#
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x10
+**    		byte 3          : password length (should be 0x0f)
+**    		byte 4-0x13     : should be "ArEcATecHnoLogY"
+**    		byte 0x14--0x23 : Serial number string (must be 16 bytes)
+**      GUI_SET_VENDOR : Set vendor string for the controller
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x11
+**    		byte 3          : password length (should be 0x08)
+**    		byte 4-0x13     : should be "ArEcAvAr"
+**    		byte 0x14--0x3B : vendor string (must be 40 bytes)
+**      GUI_SET_MODEL : Set the model name of the controller
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x12
+**    		byte 3          : password length (should be 0x08)
+**    		byte 4-0x13     : should be "ArEcAvAr"
+**    		byte 0x14--0x1B : model string (must be 8 bytes)
+**      GUI_IDENTIFY : Identify device
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x13
+**    		                  return "Areca RAID Subsystem "
+**      GUI_CHECK_PASSWORD : Verify password
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x14
+**    		byte 3          : password length
+**    		byte 4-0x??     : user password to be checked
+**      GUI_LOGOUT : Logout GUI (force password checking on next command)
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x15
+**      GUI_HTTP : HTTP interface (reserved for Http proxy service)(0x16)
+**
+**      GUI_SET_ETHERNET_ADDR : Set the ethernet MAC address
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x17
+**    		byte 3          : password length (should be 0x08)
+**    		byte 4-0x13     : should be "ArEcAvAr"
+**    		byte 0x14--0x19 : Ethernet MAC address (must be 6 bytes)
+**      GUI_SET_LOGO : Set logo in HTTP
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x18
+**    		byte 3          : Page# (0/1/2/3) (0xff --> clear OEM logo)
+**    		byte 4/5/6/7    : 0x55/0xaa/0xa5/0x5a
+**    		byte 8          : TITLE.JPG data (each page must be 2000 bytes)
+**    		                  note page0 1st 2 byte must be
+**    					actual length of the JPG file
+**      GUI_POLL_EVENT : Poll If Event Log Changed
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x19
+**      GUI_GET_EVENT : Read Event
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x1a
+**    		byte 3          : Event Page (0:1st page/1/2/3:last page)
+**      GUI_GET_HW_MONITOR : Get HW monitor data
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x1b
+**    		byte 3 			: # of FANs(example 2)
+**    		byte 4 			: # of Voltage sensor(example 3)
+**    		byte 5 			: # of temperature sensor(example 2)
+**    		byte 6 			: # of power
+**    		byte 7/8        : Fan#0 (RPM)
+**    		byte 9/10       : Fan#1
+**    		byte 11/12 		: Voltage#0 original value in *1000
+**    		byte 13/14 		: Voltage#0 value
+**    		byte 15/16 		: Voltage#1 org
+**    		byte 17/18 		: Voltage#1
+**    		byte 19/20 		: Voltage#2 org
+**    		byte 21/22 		: Voltage#2
+**    		byte 23 		: Temp#0
+**    		byte 24 		: Temp#1
+**    		byte 25 		: Power indicator (bit0 : power#0,
+**    						 bit1 : power#1)
+**    		byte 26 		: UPS indicator
+**      GUI_QUICK_CREATE : Quick create raid/volume set
+**    	    byte 0,1        : length
+**    	    byte 2          : command code 0x20
+**    	    byte 3/4/5/6    : raw capacity
+**    	    byte 7 			: raid level
+**    	    byte 8 			: stripe size
+**    	    byte 9 			: spare
+**    	    byte 10/11/12/13: device mask (the devices to create raid/volume)
+**    		This function is removed, application like
+**    		to implement quick create function
+**    	need to use GUI_CREATE_RAIDSET and GUI_CREATE_VOLUMESET function.
+**      GUI_GET_INFO_R : Get Raid Set Information
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x20
+**    		byte 3          : raidset#
+**    	typedef struct sGUI_RAIDSET
+**    	{
+**    		BYTE grsRaidSetName[16];
+**    		DWORD grsCapacity;
+**    		DWORD grsCapacityX;
+**    		DWORD grsFailMask;
+**    		BYTE grsDevArray[32];
+**    		BYTE grsMemberDevices;
+**    		BYTE grsNewMemberDevices;
+**    		BYTE grsRaidState;
+**    		BYTE grsVolumes;
+**    		BYTE grsVolumeList[16];
+**    		BYTE grsRes1;
+**    		BYTE grsRes2;
+**    		BYTE grsRes3;
+**    		BYTE grsFreeSegments;
+**    		DWORD grsRawStripes[8];
+**    		DWORD grsRes4;
+**    		DWORD grsRes5; //     Total to 128 bytes
+**    		DWORD grsRes6; //     Total to 128 bytes
+**    	} sGUI_RAIDSET, *pGUI_RAIDSET;
+**      GUI_GET_INFO_V : Get Volume Set Information
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x21
+**    		byte 3          : volumeset#
+**    	typedef struct sGUI_VOLUMESET
+**    	{
+**    		BYTE gvsVolumeName[16]; //     16
+**    		DWORD gvsCapacity;
+**    		DWORD gvsCapacityX;
+**    		DWORD gvsFailMask;
+**    		DWORD gvsStripeSize;
+**    		DWORD gvsNewFailMask;
+**    		DWORD gvsNewStripeSize;
+**    		DWORD gvsVolumeStatus;
+**    		DWORD gvsProgress; //     32
+**    		sSCSI_ATTR gvsScsi;
+**    		BYTE gvsMemberDisks;
+**    		BYTE gvsRaidLevel; //     8
+**    		BYTE gvsNewMemberDisks;
+**    		BYTE gvsNewRaidLevel;
+**    		BYTE gvsRaidSetNumber;
+**    		BYTE gvsRes0; //     4
+**    		BYTE gvsRes1[4]; //     64 bytes
+**    	} sGUI_VOLUMESET, *pGUI_VOLUMESET;
+**      GUI_GET_INFO_P : Get Physical Drive Information
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x22
+**    		byte 3          : drive # (from 0 to max-channels - 1)
+**    	typedef struct sGUI_PHY_DRV
+**    	{
+**    		BYTE gpdModelName[40];
+**    		BYTE gpdSerialNumber[20];
+**    		BYTE gpdFirmRev[8];
+**    		DWORD gpdCapacity;
+**    		DWORD gpdCapacityX; //     Reserved for expansion
+**    		BYTE gpdDeviceState;
+**    		BYTE gpdPioMode;
+**    		BYTE gpdCurrentUdmaMode;
+**    		BYTE gpdUdmaMode;
+**    		BYTE gpdDriveSelect;
+**    		BYTE gpdRaidNumber; //     0xff if not belongs to a raid set
+**    		sSCSI_ATTR gpdScsi;
+**    		BYTE gpdReserved[40]; //     Total to 128 bytes
+**    	} sGUI_PHY_DRV, *pGUI_PHY_DRV;
+**    	GUI_GET_INFO_S : Get System Information
+**      	byte 0,1        : length
+**      	byte 2          : command code 0x23
+**    	typedef struct sCOM_ATTR
+**    	{
+**    		BYTE comBaudRate;
+**    		BYTE comDataBits;
+**    		BYTE comStopBits;
+**    		BYTE comParity;
+**    		BYTE comFlowControl;
+**    	} sCOM_ATTR, *pCOM_ATTR;
+**    	typedef struct sSYSTEM_INFO
+**    	{
+**    		BYTE gsiVendorName[40];
+**    		BYTE gsiSerialNumber[16];
+**    		BYTE gsiFirmVersion[16];
+**    		BYTE gsiBootVersion[16];
+**    		BYTE gsiMbVersion[16];
+**    		BYTE gsiModelName[8];
+**    		BYTE gsiLocalIp[4];
+**    		BYTE gsiCurrentIp[4];
+**    		DWORD gsiTimeTick;
+**    		DWORD gsiCpuSpeed;
+**    		DWORD gsiICache;
+**    		DWORD gsiDCache;
+**    		DWORD gsiScache;
+**    		DWORD gsiMemorySize;
+**    		DWORD gsiMemorySpeed;
+**    		DWORD gsiEvents;
+**    		BYTE gsiMacAddress[6];
+**    		BYTE gsiDhcp;
+**    		BYTE gsiBeeper;
+**    		BYTE gsiChannelUsage;
+**    		BYTE gsiMaxAtaMode;
+**    		BYTE gsiSdramEcc; //     1:if ECC enabled
+**    		BYTE gsiRebuildPriority;
+**    		sCOM_ATTR gsiComA; //     5 bytes
+**    		sCOM_ATTR gsiComB; //     5 bytes
+**    		BYTE gsiIdeChannels;
+**    		BYTE gsiScsiHostChannels;
+**    		BYTE gsiIdeHostChannels;
+**    		BYTE gsiMaxVolumeSet;
+**    		BYTE gsiMaxRaidSet;
+**    		BYTE gsiEtherPort; //     1:if ether net port supported
+**    		BYTE gsiRaid6Engine; //     1:Raid6 engine supported
+**    		BYTE gsiRes[75];
+**    	} sSYSTEM_INFO, *pSYSTEM_INFO;
+**    	GUI_CLEAR_EVENT : Clear System Event
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x24
+**      GUI_MUTE_BEEPER : Mute current beeper
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x30
+**      GUI_BEEPER_SETTING : Disable beeper
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x31
+**    		byte 3          : 0->disable, 1->enable
+**      GUI_SET_PASSWORD : Change password
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x32
+**    		byte 3 			: pass word length ( must <= 15 )
+**    		byte 4 			: password (must be alpha-numerical)
+**    	GUI_HOST_INTERFACE_MODE : Set host interface mode
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x33
+**    		byte 3 			: 0->Independent, 1->cluster
+**      GUI_REBUILD_PRIORITY : Set rebuild priority
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x34
+**    		byte 3 			: 0/1/2/3 (low->high)
+**      GUI_MAX_ATA_MODE : Set maximum ATA mode to be used
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x35
+**    		byte 3 			: 0/1/2/3 (133/100/66/33)
+**      GUI_RESET_CONTROLLER : Reset Controller
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x36
+**                            *Response with VT100 screen (discard it)
+**      GUI_COM_PORT_SETTING : COM port setting
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x37
+**    		byte 3 			: 0->COMA (term port),
+**    					  1->COMB (debug port)
+**    		byte 4 			: 0/1/2/3/4/5/6/7
+**    			(1200/2400/4800/9600/19200/38400/57600/115200)
+**    		byte 5 			: data bit
+**    					(0:7 bit, 1:8 bit : must be 8 bit)
+**    		byte 6 			: stop bit (0:1, 1:2 stop bits)
+**    		byte 7 			: parity (0:none, 1:off, 2:even)
+**    		byte 8 			: flow control
+**    			(0:none, 1:xon/xoff, 2:hardware => must use none)
+**      GUI_NO_OPERATION : No operation
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x38
+**      GUI_DHCP_IP : Set DHCP option and local IP address
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x39
+**    		byte 3          : 0:dhcp disabled, 1:dhcp enabled
+**    		byte 4/5/6/7    : IP address
+**      GUI_CREATE_PASS_THROUGH : Create pass through disk
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x40
+**    		byte 3 			: device #
+**    		byte 4 			: scsi channel (0/1)
+**    		byte 5 			: scsi id (0-->15)
+**    		byte 6 			: scsi lun (0-->7)
+**    		byte 7 			: tagged queue (1 : enabled)
+**    		byte 8 			: cache mode (1 : enabled)
+**    		byte 9 			: max speed (0/1/2/3/4,
+**    			async/20/40/80/160 for scsi)
+**    			(0/1/2/3/4, 33/66/100/133/150 for ide  )
+**      GUI_MODIFY_PASS_THROUGH : Modify pass through disk
+**    		byte 0,1        : length
+**    		byte 2 			: command code 0x41
+**    		byte 3 			: device #
+**    		byte 4 			: scsi channel (0/1)
+**    		byte 5 			: scsi id (0-->15)
+**    		byte 6 			: scsi lun (0-->7)
+**    		byte 7 			: tagged queue (1 : enabled)
+**    		byte 8 			: cache mode (1 : enabled)
+**    		byte 9 			: max speed (0/1/2/3/4,
+**    					async/20/40/80/160 for scsi)
+**    			(0/1/2/3/4, 33/66/100/133/150 for ide  )
+**      GUI_DELETE_PASS_THROUGH : Delete pass through disk
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x42
+**    		byte 3          : device# to be deleted
+**      GUI_IDENTIFY_DEVICE : Identify Device
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x43
+**    		byte 3          : Flash Method
+**    				(0:flash selected, 1:flash not selected)
+**    		byte 4/5/6/7    : IDE device mask to be flashed
+**                           note .... no response data available
+**    	GUI_CREATE_RAIDSET : Create Raid Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x50
+**    		byte 3/4/5/6    : device mask
+**    		byte 7-22       : raidset name (if byte 7 == 0:use default)
+**      GUI_DELETE_RAIDSET : Delete Raid Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x51
+**    		byte 3          : raidset#
+**    	GUI_EXPAND_RAIDSET : Expand Raid Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x52
+**    		byte 3          : raidset#
+**    		byte 4/5/6/7    : device mask for expansion
+**    		byte 8/9/10     : (8:0 no change, 1 change, 0xff:terminate,
+**    				9:new raid level,
+**    				10:new stripe size
+**    				0/1/2/3/4/5->4/8/16/32/64/128K )
+**    		byte 11/12/13   : repeat for each volume in the raidset
+**      GUI_ACTIVATE_RAIDSET : Activate incomplete raid set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x53
+**    		byte 3          : raidset#
+**      GUI_CREATE_HOT_SPARE : Create hot spare disk
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x54
+**    		byte 3/4/5/6    : device mask for hot spare creation
+**    	GUI_DELETE_HOT_SPARE : Delete hot spare disk
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x55
+**    		byte 3/4/5/6    : device mask for hot spare deletion
+**    	GUI_CREATE_VOLUME : Create volume set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x60
+**    		byte 3          : raidset#
+**    		byte 4-19       : volume set name
+**    				(if byte4 == 0, use default)
+**    		byte 20-27      : volume capacity (blocks)
+**    		byte 28 		: raid level
+**    		byte 29 		: stripe size
+**    				(0/1/2/3/4/5->4/8/16/32/64/128K)
+**    		byte 30 		: channel
+**    		byte 31 		: ID
+**    		byte 32 		: LUN
+**    		byte 33 		: 1 enable tag
+**    		byte 34 		: 1 enable cache
+**    		byte 35 		: speed
+**    		(0/1/2/3/4->async/20/40/80/160 for scsi)
+**    		(0/1/2/3/4->33/66/100/133/150 for IDE  )
+**    		byte 36 		: 1 to select quick init
+**
+**    	GUI_MODIFY_VOLUME : Modify volume Set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x61
+**    		byte 3          : volumeset#
+**    		byte 4-19       : new volume set name
+**    		(if byte4 == 0, not change)
+**    		byte 20-27      : new volume capacity (reserved)
+**    		byte 28 		: new raid level
+**    		byte 29 		: new stripe size
+**    		(0/1/2/3/4/5->4/8/16/32/64/128K)
+**    		byte 30 		: new channel
+**    		byte 31 		: new ID
+**    		byte 32 		: new LUN
+**    		byte 33 		: 1 enable tag
+**    		byte 34 		: 1 enable cache
+**    		byte 35 		: speed
+**    		(0/1/2/3/4->async/20/40/80/160 for scsi)
+**    		(0/1/2/3/4->33/66/100/133/150 for IDE  )
+**    	GUI_DELETE_VOLUME : Delete volume set
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x62
+**    		byte 3          : volumeset#
+**    	GUI_START_CHECK_VOLUME : Start volume consistency check
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x63
+**    		byte 3          : volumeset#
+**    	GUI_STOP_CHECK_VOLUME : Stop volume consistency check
+**    		byte 0,1        : length
+**    		byte 2          : command code 0x64
+** ---------------------------------------------------------------------
+**    4. Returned data
+** ---------------------------------------------------------------------
+**    	(A) Header          : 3 bytes sequence (0x5E, 0x01, 0x61)
+**    	(B) Length          : 2 bytes
+**    			(low byte 1st, excludes length and checksum byte)
+**    	(C) status or data  :
+**           <1> If length == 1 ==> 1 byte status code
+**    		#define GUI_OK                    0x41
+**    		#define GUI_RAIDSET_NOT_NORMAL    0x42
+**    		#define GUI_VOLUMESET_NOT_NORMAL  0x43
+**    		#define GUI_NO_RAIDSET            0x44
+**    		#define GUI_NO_VOLUMESET          0x45
+**    		#define GUI_NO_PHYSICAL_DRIVE     0x46
+**    		#define GUI_PARAMETER_ERROR       0x47
+**    		#define GUI_UNSUPPORTED_COMMAND   0x48
+**    		#define GUI_DISK_CONFIG_CHANGED   0x49
+**    		#define GUI_INVALID_PASSWORD      0x4a
+**    		#define GUI_NO_DISK_SPACE         0x4b
+**    		#define GUI_CHECKSUM_ERROR        0x4c
+**    		#define GUI_PASSWORD_REQUIRED     0x4d
+**           <2> If length > 1 ==>
+**    		data block returned from controller
+**    		and the contents depends on the command code
+**    	(E) Checksum        : checksum of length and status or data byte
+**************************************************************************
diff --git a/Documentation/scsi/libsas.txt b/Documentation/scsi/libsas.txt
new file mode 100644
index 0000000..9e2078b
--- /dev/null
+++ b/Documentation/scsi/libsas.txt
@@ -0,0 +1,484 @@
+SAS Layer
+---------
+
+The SAS Layer is a management infrastructure which manages
+SAS LLDDs.  It sits between SCSI Core and SAS LLDDs.  The
+layout is as follows: while SCSI Core is concerned with
+SAM/SPC issues, and a SAS LLDD+sequencer is concerned with
+phy/OOB/link management, the SAS layer is concerned with:
+
+      * SAS Phy/Port/HA event management (LLDD generates,
+        SAS Layer processes),
+      * SAS Port management (creation/destruction),
+      * SAS Domain discovery and revalidation,
+      * SAS Domain device management,
+      * SCSI Host registration/unregistration,
+      * Device registration with SCSI Core (SAS) or libata
+        (SATA), and
+      * Expander management and exporting expander control
+        to user space.
+
+A SAS LLDD is a PCI device driver.  It is concerned with
+phy/OOB management, and vendor specific tasks and generates
+events to the SAS layer.
+
+The SAS Layer does most SAS tasks as outlined in the SAS 1.1
+spec.
+
+The sas_ha_struct describes the SAS LLDD to the SAS layer.
+Most of it is used by the SAS Layer but a few fields need to
+be initialized by the LLDDs.
+
+After initializing your hardware, from the probe() function
+you call sas_register_ha(). It will register your LLDD with
+the SCSI subsystem, creating a SCSI host and it will
+register your SAS driver with the sysfs SAS tree it creates.
+It will then return.  Then you enable your phys to actually
+start OOB (at which point your driver will start calling the
+notify_* event callbacks).
+
+Structure descriptions:
+
+struct sas_phy --------------------
+Normally this is statically embedded to your driver's
+phy structure:
+	struct my_phy {
+	       blah;
+	       struct sas_phy sas_phy;
+	       bleh;
+	};
+And then all the phys are an array of my_phy in your HA
+struct (shown below).
+
+Then as you go along and initialize your phys you also
+initialize the sas_phy struct, along with your own
+phy structure.
+
+In general, the phys are managed by the LLDD and the ports
+are managed by the SAS layer.  So the phys are initialized
+and updated by the LLDD and the ports are initialized and
+updated by the SAS layer.
+
+There is a scheme where the LLDD can RW certain fields,
+and the SAS layer can only read such ones, and vice versa.
+The idea is to avoid unnecessary locking.
+
+enabled -- must be set (0/1)
+id -- must be set [0,MAX_PHYS)
+class, proto, type, role, oob_mode, linkrate -- must be set
+oob_mode --  you set this when OOB has finished and then notify
+the SAS Layer.
+
+sas_addr -- this normally points to an array holding the sas
+address of the phy, possibly somewhere in your my_phy
+struct.
+
+attached_sas_addr -- set this when you (LLDD) receive an
+IDENTIFY frame or a FIS frame, _before_ notifying the SAS
+layer.  The idea is that sometimes the LLDD may want to fake
+or provide a different SAS address on that phy/port and this
+allows it to do this.  At best you should copy the sas
+address from the IDENTIFY frame or maybe generate a SAS
+address for SATA directly attached devices.  The Discover
+process may later change this.
+
+frame_rcvd -- this is where you copy the IDENTIFY/FIS frame
+when you get it; you lock, copy, set frame_rcvd_size and
+unlock the lock, and then call the event.  It is a pointer
+since there's no way to know your hw frame size _exactly_,
+so you define the actual array in your phy struct and let
+this pointer point to it.  You copy the frame from your
+DMAable memory to that area holding the lock.
+
+sas_prim -- this is where primitives go when they're
+received.  See sas.h. Grab the lock, set the primitive,
+release the lock, notify.
+
+port -- this points to the sas_port if the phy belongs
+to a port -- the LLDD only reads this. It points to the
+sas_port this phy is part of.  Set by the SAS Layer.
+
+ha -- may be set; the SAS layer sets it anyway.
+
+lldd_phy -- you should set this to point to your phy so you
+can find your way around faster when the SAS layer calls one
+of your callbacks and passes you a phy.  If the sas_phy is
+embedded you can also use container_of -- whatever you
+prefer.
+
+
+struct sas_port --------------------
+The LLDD doesn't set any fields of this struct -- it only
+reads them.  They should be self explanatory.
+
+phy_mask is 32 bit, this should be enough for now, as I
+haven't heard of a HA having more than 8 phys.
+
+lldd_port -- I haven't found use for that -- maybe other
+LLDD who wish to have internal port representation can make
+use of this.
+
+
+struct sas_ha_struct --------------------
+It normally is statically declared in your own LLDD
+structure describing your adapter:
+struct my_sas_ha {
+       blah;
+       struct sas_ha_struct sas_ha;
+       struct my_phy phys[MAX_PHYS];
+       struct sas_port sas_ports[MAX_PHYS]; /* (1) */
+       bleh;
+};
+
+(1) If your LLDD doesn't have its own port representation.
+
+What needs to be initialized (sample function given below).
+
+pcidev
+sas_addr -- since the SAS layer doesn't want to mess with
+	 memory allocation, etc, this points to statically
+	 allocated array somewhere (say in your host adapter
+	 structure) and holds the SAS address of the host
+	 adapter as given by you or the manufacturer, etc.
+sas_port
+sas_phy -- an array of pointers to structures. (see
+	note above on sas_addr).
+	These must be set.  See more notes below.
+num_phys -- the number of phys present in the sas_phy array,
+	 and the number of ports present in the sas_port
+	 array.  There can be a maximum num_phys ports (one per
+	 port) so we drop the num_ports, and only use
+	 num_phys.
+
+The event interface:
+
+	/* LLDD calls these to notify the class of an event. */
+	void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event);
+	void (*notify_port_event)(struct sas_phy *, enum port_event);
+	void (*notify_phy_event)(struct sas_phy *, enum phy_event);
+
+When sas_register_ha() returns, those are set and can be
+called by the LLDD to notify the SAS layer of such events
+the SAS layer.
+
+The port notification:
+
+	/* The class calls these to notify the LLDD of an event. */
+	void (*lldd_port_formed)(struct sas_phy *);
+	void (*lldd_port_deformed)(struct sas_phy *);
+
+If the LLDD wants notification when a port has been formed
+or deformed it sets those to a function satisfying the type.
+
+A SAS LLDD should also implement at least one of the Task
+Management Functions (TMFs) described in SAM:
+
+	/* Task Management Functions. Must be called from process context. */
+	int (*lldd_abort_task)(struct sas_task *);
+	int (*lldd_abort_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_aca)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_I_T_nexus_reset)(struct domain_device *);
+	int (*lldd_lu_reset)(struct domain_device *, u8 *lun);
+	int (*lldd_query_task)(struct sas_task *);
+
+For more information please read SAM from T10.org.
+
+Port and Adapter management:
+
+	/* Port and Adapter management */
+	int (*lldd_clear_nexus_port)(struct sas_port *);
+	int (*lldd_clear_nexus_ha)(struct sas_ha_struct *);
+
+A SAS LLDD should implement at least one of those.
+
+Phy management:
+
+	/* Phy management */
+	int (*lldd_control_phy)(struct sas_phy *, enum phy_func);
+
+lldd_ha -- set this to point to your HA struct. You can also
+use container_of if you embedded it as shown above.
+
+A sample initialization and registration function
+can look like this (called last thing from probe())
+*but* before you enable the phys to do OOB:
+
+static int register_sas_ha(struct my_sas_ha *my_ha)
+{
+	int i;
+	static struct sas_phy   *sas_phys[MAX_PHYS];
+	static struct sas_port  *sas_ports[MAX_PHYS];
+
+	my_ha->sas_ha.sas_addr = &my_ha->sas_addr[0];
+
+	for (i = 0; i < MAX_PHYS; i++) {
+		sas_phys[i] = &my_ha->phys[i].sas_phy;
+		sas_ports[i] = &my_ha->sas_ports[i];
+	}
+
+	my_ha->sas_ha.sas_phy  = sas_phys;
+	my_ha->sas_ha.sas_port = sas_ports;
+	my_ha->sas_ha.num_phys = MAX_PHYS;
+
+	my_ha->sas_ha.lldd_port_formed = my_port_formed;
+
+	my_ha->sas_ha.lldd_dev_found = my_dev_found;
+	my_ha->sas_ha.lldd_dev_gone = my_dev_gone;
+
+	my_ha->sas_ha.lldd_max_execute_num = lldd_max_execute_num; (1)
+
+	my_ha->sas_ha.lldd_queue_size = ha_can_queue;
+	my_ha->sas_ha.lldd_execute_task = my_execute_task;
+
+	my_ha->sas_ha.lldd_abort_task     = my_abort_task;
+	my_ha->sas_ha.lldd_abort_task_set = my_abort_task_set;
+	my_ha->sas_ha.lldd_clear_aca      = my_clear_aca;
+	my_ha->sas_ha.lldd_clear_task_set = my_clear_task_set;
+	my_ha->sas_ha.lldd_I_T_nexus_reset= NULL; (2)
+	my_ha->sas_ha.lldd_lu_reset       = my_lu_reset;
+	my_ha->sas_ha.lldd_query_task     = my_query_task;
+
+	my_ha->sas_ha.lldd_clear_nexus_port = my_clear_nexus_port;
+	my_ha->sas_ha.lldd_clear_nexus_ha = my_clear_nexus_ha;
+
+	my_ha->sas_ha.lldd_control_phy = my_control_phy;
+
+	return sas_register_ha(&my_ha->sas_ha);
+}
+
+(1) This is normally a LLDD parameter, something of the
+lines of a task collector.  What it tells the SAS Layer is
+whether the SAS layer should run in Direct Mode (default:
+value 0 or 1) or Task Collector Mode (value greater than 1).
+
+In Direct Mode, the SAS Layer calls Execute Task as soon as
+it has a command to send to the SDS, _and_ this is a single
+command, i.e. not linked.
+
+Some hardware (e.g. aic94xx) has the capability to DMA more
+than one task at a time (interrupt) from host memory.  Task
+Collector Mode is an optional feature for HAs which support
+this in their hardware.  (Again, it is completely optional
+even if your hardware supports it.)
+
+In Task Collector Mode, the SAS Layer would do _natural_
+coalescing of tasks and at the appropriate moment it would
+call your driver to DMA more than one task in a single HA
+interrupt. DMBS may want to use this by insmod/modprobe
+setting the lldd_max_execute_num to something greater than
+1.
+
+(2) SAS 1.1 does not define I_T Nexus Reset TMF.
+
+Events
+------
+
+Events are _the only way_ a SAS LLDD notifies the SAS layer
+of anything.  There is no other method or way a LLDD to tell
+the SAS layer of anything happening internally or in the SAS
+domain.
+
+Phy events:
+	PHYE_LOSS_OF_SIGNAL, (C)
+	PHYE_OOB_DONE,
+	PHYE_OOB_ERROR,      (C)
+	PHYE_SPINUP_HOLD.
+
+Port events, passed on a _phy_:
+	PORTE_BYTES_DMAED,      (M)
+	PORTE_BROADCAST_RCVD,   (E)
+	PORTE_LINK_RESET_ERR,   (C)
+	PORTE_TIMER_EVENT,      (C)
+	PORTE_HARD_RESET.
+
+Host Adapter event:
+	HAE_RESET
+
+A SAS LLDD should be able to generate
+	- at least one event from group C (choice),
+	- events marked M (mandatory) are mandatory (only one),
+	- events marked E (expander) if it wants the SAS layer
+	  to handle domain revalidation (only one such).
+	- Unmarked events are optional.
+
+Meaning:
+
+HAE_RESET -- when your HA got internal error and was reset.
+
+PORTE_BYTES_DMAED -- on receiving an IDENTIFY/FIS frame
+PORTE_BROADCAST_RCVD -- on receiving a primitive
+PORTE_LINK_RESET_ERR -- timer expired, loss of signal, loss
+of DWS, etc. (*)
+PORTE_TIMER_EVENT -- DWS reset timeout timer expired (*)
+PORTE_HARD_RESET -- Hard Reset primitive received.
+
+PHYE_LOSS_OF_SIGNAL -- the device is gone (*)
+PHYE_OOB_DONE -- OOB went fine and oob_mode is valid
+PHYE_OOB_ERROR -- Error while doing OOB, the device probably
+got disconnected. (*)
+PHYE_SPINUP_HOLD -- SATA is present, COMWAKE not sent.
+
+(*) should set/clear the appropriate fields in the phy,
+    or alternatively call the inlined sas_phy_disconnected()
+    which is just a helper, from their tasklet.
+
+The Execute Command SCSI RPC:
+
+	int (*lldd_execute_task)(struct sas_task *, int num,
+				 unsigned long gfp_flags);
+
+Used to queue a task to the SAS LLDD.  @task is the tasks to
+be executed.  @num should be the number of tasks being
+queued at this function call (they are linked listed via
+task::list), @gfp_mask should be the gfp_mask defining the
+context of the caller.
+
+This function should implement the Execute Command SCSI RPC,
+or if you're sending a SCSI Task as linked commands, you
+should also use this function.
+
+That is, when lldd_execute_task() is called, the command(s)
+go out on the transport *immediately*.  There is *no*
+queuing of any sort and at any level in a SAS LLDD.
+
+The use of task::list is two-fold, one for linked commands,
+the other discussed below.
+
+It is possible to queue up more than one task at a time, by
+initializing the list element of struct sas_task, and
+passing the number of tasks enlisted in this manner in num.
+
+Returns: -SAS_QUEUE_FULL, -ENOMEM, nothing was queued;
+	 0, the task(s) were queued.
+
+If you want to pass num > 1, then either
+A) you're the only caller of this function and keep track
+   of what you've queued to the LLDD, or
+B) you know what you're doing and have a strategy of
+   retrying.
+
+As opposed to queuing one task at a time (function call),
+batch queuing of tasks, by having num > 1, greatly
+simplifies LLDD code, sequencer code, and _hardware design_,
+and has some performance advantages in certain situations
+(DBMS).
+
+The LLDD advertises if it can take more than one command at
+a time at lldd_execute_task(), by setting the
+lldd_max_execute_num parameter (controlled by "collector"
+module parameter in aic94xx SAS LLDD).
+
+You should leave this to the default 1, unless you know what
+you're doing.
+
+This is a function of the LLDD, to which the SAS layer can
+cater to.
+
+int lldd_queue_size
+	The host adapter's queue size.  This is the maximum
+number of commands the lldd can have pending to domain
+devices on behalf of all upper layers submitting through
+lldd_execute_task().
+
+You really want to set this to something (much) larger than
+1.
+
+This _really_ has absolutely nothing to do with queuing.
+There is no queuing in SAS LLDDs.
+
+struct sas_task {
+	dev -- the device this task is destined to
+	list -- must be initialized (INIT_LIST_HEAD)
+	task_proto -- _one_ of enum sas_proto
+	scatter -- pointer to scatter gather list array
+	num_scatter -- number of elements in scatter
+	total_xfer_len -- total number of bytes expected to be transfered
+	data_dir -- PCI_DMA_...
+	task_done -- callback when the task has finished execution
+};
+
+When an external entity, entity other than the LLDD or the
+SAS Layer, wants to work with a struct domain_device, it
+_must_ call kobject_get() when getting a handle on the
+device and kobject_put() when it is done with the device.
+
+This does two things:
+     A) implements proper kfree() for the device;
+     B) increments/decrements the kref for all players:
+     domain_device
+	all domain_device's ... (if past an expander)
+	    port
+		host adapter
+		     pci device
+			 and up the ladder, etc.
+
+DISCOVERY
+---------
+
+The sysfs tree has the following purposes:
+    a) It shows you the physical layout of the SAS domain at
+       the current time, i.e. how the domain looks in the
+       physical world right now.
+    b) Shows some device parameters _at_discovery_time_.
+
+This is a link to the tree(1) program, very useful in
+viewing the SAS domain:
+ftp://mama.indstate.edu/linux/tree/
+I expect user space applications to actually create a
+graphical interface of this.
+
+That is, the sysfs domain tree doesn't show or keep state if
+you e.g., change the meaning of the READY LED MEANING
+setting, but it does show you the current connection status
+of the domain device.
+
+Keeping internal device state changes is responsibility of
+upper layers (Command set drivers) and user space.
+
+When a device or devices are unplugged from the domain, this
+is reflected in the sysfs tree immediately, and the device(s)
+removed from the system.
+
+The structure domain_device describes any device in the SAS
+domain.  It is completely managed by the SAS layer.  A task
+points to a domain device, this is how the SAS LLDD knows
+where to send the task(s) to.  A SAS LLDD only reads the
+contents of the domain_device structure, but it never creates
+or destroys one.
+
+Expander management from User Space
+-----------------------------------
+
+In each expander directory in sysfs, there is a file called
+"smp_portal".  It is a binary sysfs attribute file, which
+implements an SMP portal (Note: this is *NOT* an SMP port),
+to which user space applications can send SMP requests and
+receive SMP responses.
+
+Functionality is deceptively simple:
+
+1. Build the SMP frame you want to send. The format and layout
+   is described in the SAS spec.  Leave the CRC field equal 0.
+open(2)
+2. Open the expander's SMP portal sysfs file in RW mode.
+write(2)
+3. Write the frame you built in 1.
+read(2)
+4. Read the amount of data you expect to receive for the frame you built.
+   If you receive different amount of data you expected to receive,
+   then there was some kind of error.
+close(2)
+All this process is shown in detail in the function do_smp_func()
+and its callers, in the file "expander_conf.c".
+
+The kernel functionality is implemented in the file
+"sas_expander.c".
+
+The program "expander_conf.c" implements this. It takes one
+argument, the sysfs file name of the SMP portal to the
+expander, and gives expander information, including routing
+tables.
+
+The SMP portal gives you complete control of the expander,
+so please be careful.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index f61af23..e6b57dd 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -758,6 +758,7 @@
     position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
     single_cmd  - Use single immediate commands to communicate with
 		codecs (for debugging only)
+    disable_msi - Disable Message Signaled Interrupt (MSI)
 
     This module supports one card and autoprobe.
 
@@ -778,11 +779,16 @@
 	  6stack-digout	6-jack with a SPDIF out
 	  w810		3-jack
 	  z71v		3-jack (HP shared SPDIF)
-	  asus		3-jack
+	  asus		3-jack (ASUS Mobo)
+	  asus-w1v	ASUS W1V
+	  asus-dig	ASUS with SPDIF out
+	  asus-dig2	ASUS with SPDIF out (using GPIO2)
 	  uniwill	3-jack
 	  F1734		2-jack
 	  lg		LG laptop (m1 express dual)
-	  lg-lw		LG LW20 laptop
+	  lg-lw		LG LW20/LW25 laptop
+	  tcl		TCL S700
+	  clevo		Clevo laptops (m520G, m665n)
 	  test		for testing/debugging purpose, almost all controls can be
 			adjusted.  Appearing only when compiled with
 			$CONFIG_SND_DEBUG=y
@@ -790,6 +796,7 @@
 
 	ALC260
 	  hp		HP machines
+	  hp-3013	HP machines (3013-variant)
 	  fujitsu	Fujitsu S7020
 	  acer		Acer TravelMate
 	  basic		fixed pin assignment (old default model)
@@ -797,24 +804,32 @@
 
 	ALC262
 	  fujitsu	Fujitsu Laptop
+	  hp-bpc	HP xw4400/6400/8400/9400 laptops
+	  benq		Benq ED8
 	  basic		fixed pin assignment w/o SPDIF
 	  auto		auto-config reading BIOS (default)
 
 	ALC882/885
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stck-dig	6-jack digital with SPDIF I/O
+	  arima		Arima W820Di1
 	  auto		auto-config reading BIOS (default)
 
 	ALC883/888
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stack-dig	6-jack digital with SPDIF I/O
-	  6stack-dig-demo  6-stack digital for Intel demo board
+	  3stack-6ch    3-jack 6-channel
+	  3stack-6ch-dig 3-jack 6-channel with SPDIF I/O
+	  6stack-dig-demo  6-jack digital for Intel demo board
+	  acer		Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
 	  auto		auto-config reading BIOS (default)
 
 	ALC861/660
 	  3stack	3-jack
 	  3stack-dig	3-jack with SPDIF I/O
 	  6stack-dig	6-jack with SPDIF I/O
+	  3stack-660	3-jack (for ALC660)
+	  uniwill-m31	Uniwill M31 laptop
 	  auto		auto-config reading BIOS (default)
 
 	CMI9880
@@ -843,10 +858,21 @@
 	  3stack-dig	ditto with SPDIF
 	  laptop	3-jack with hp-jack automute
 	  laptop-dig	ditto with SPDIF
-	  auto		auto-confgi reading BIOS (default)
+	  auto		auto-config reading BIOS (default)
 
-	STAC7661(?)
+	STAC9200/9205/9220/9221/9254
+	  ref		Reference board
+	  3stack	D945 3stack
+	  5stack	D945 5stack + SPDIF
+
+	STAC9227/9228/9229/927x
+	  ref		Reference board
+	  3stack	D965 3stack
+	  5stack	D965 5stack + SPDIF
+
+	STAC9872
 	  vaio		Setup for VAIO FE550G/SZ110
+	  vaio-ar Setup for VAIO AR
 
     If the default configuration doesn't work and one of the above
     matches with your device, report it together with the PCI
@@ -1213,6 +1239,14 @@
     
     Module supports only 1 card.  This module has no enable option.
 
+  Module snd-mts64
+  ----------------
+
+    Module for Ego Systems (ESI) Miditerminal 4140
+
+    This module supports multiple devices.
+    Requires parport (CONFIG_PARPORT).
+
   Module snd-nm256
   ----------------
 
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index b8dc51c..4807ef7 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -1054,9 +1054,8 @@
 
       <para>
       For a device which allows hotplugging, you can use
-      <function>snd_card_free_in_thread</function>.  This one will
-      postpone the destruction and wait in a kernel-thread until all
-      devices are closed.
+      <function>snd_card_free_when_closed</function>.  This one will
+      postpone the destruction until all devices are closed.
       </para>
 
     </section>
diff --git a/MAINTAINERS b/MAINTAINERS
index a34c53c..ed9757e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -298,6 +298,14 @@
 W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:	Supported
 
+AMSO1100 RNIC DRIVER
+P:	Tom Tucker
+M:	tom@opengridcomputing.com
+P:	Steve Wise
+M:	swise@opengridcomputing.com
+L:	openib-general@openib.org
+S:	Maintained
+
 AOA (Apple Onboard Audio) ALSA DRIVER
 P:	Johannes Berg
 M:	johannes@sipsolutions.net
@@ -991,6 +999,14 @@
 W:	http://aeschi.ch.eu.org/efs/
 S:	Orphan
 
+EHCA (IBM GX bus InfiniBand adapter) DRIVER:
+P:	Hoang-Nam Nguyen
+M:	hnguyen@de.ibm.com
+P:	Christoph Raisch
+M:	raisch@de.ibm.com
+L:	openib-general@openib.org
+S:	Supported
+
 EMU10K1 SOUND DRIVER
 P:	James Courtier-Dutton
 M:	James@superbug.demon.co.uk
@@ -1783,6 +1799,13 @@
 L:     linuxppc-embedded@ozlabs.org
 S:     Maintained
 
+LINUX FOR POWERPC PA SEMI PWRFICIENT
+P:	Olof Johansson
+M:	olof@lixom.net
+W:	http://www.pasemi.com/
+L:	linuxppc-dev@ozlabs.org
+S:	Supported
+
 LLC (802.2)
 P:	Arnaldo Carvalho de Melo
 M:	acme@conectiva.com.br
@@ -2445,6 +2468,8 @@
 S390
 P:	Martin Schwidefsky
 M:	schwidefsky@de.ibm.com
+P:	Heiko Carstens
+M:	heiko.carstens@de.ibm.com
 M:	linux390@de.ibm.com
 L:	linux-390@vm.marist.edu
 W:	http://www.ibm.com/developerworks/linux/linux390/
@@ -2459,8 +2484,8 @@
 S:	Supported
 
 S390 ZFCP DRIVER
-P:	Andreas Herrmann
-M:	aherrman@de.ibm.com
+P:	Swen Schillig
+M:	swen@vnet.ibm.com
 M:	linux390@de.ibm.com
 L:	linux-390@vm.marist.edu
 W:	http://www.ibm.com/developerworks/linux/linux390/
diff --git a/Makefile b/Makefile
index a086e32..edfc2fd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 18
-EXTRAVERSION = -rc7
-NAME=Crazed Snow-Weasel
+EXTRAVERSION =
+NAME=Avast! A bilge rat!
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -1082,6 +1082,7 @@
 	@echo  'Static analysers'
 	@echo  '  checkstack      - Generate a list of stack hogs'
 	@echo  '  namespacecheck  - Name space analysis on compiled kernel'
+	@echo  '  headers_check   - Sanity check on exported headers'
 	@echo  ''
 	@echo  'Kernel packaging:'
 	@$(MAKE) $(build)=$(package-dir) help
diff --git a/arch/frv/Makefile b/arch/frv/Makefile
index d163747..038e3a8 100644
--- a/arch/frv/Makefile
+++ b/arch/frv/Makefile
@@ -108,11 +108,8 @@
 bootstrap:
 	$(Q)$(MAKEBOOT) bootstrap
 
-archmrproper:
-	$(Q)$(MAKE) $(build)=arch/frv/boot mrproper
-
 archclean:
-	$(Q)$(MAKE) $(build)=arch/frv/boot clean
+	$(Q)$(MAKE) $(clean)=arch/frv/boot
 
 archdep: scripts/mkdep symlinks
 	$(Q)$(MAKE) $(build)=arch/frv/boot dep
diff --git a/arch/frv/boot/Makefile b/arch/frv/boot/Makefile
index 5dfc93f..dc6f038 100644
--- a/arch/frv/boot/Makefile
+++ b/arch/frv/boot/Makefile
@@ -8,6 +8,8 @@
 # Copyright (C) 1995-2000 Russell King
 #
 
+targets := Image zImage bootpImage
+
 SYSTEM	=$(TOPDIR)/$(LINUX)
 
 ZTEXTADDR	 = 0x02080000
@@ -66,7 +68,6 @@
 # miscellany
 #
 mrproper clean:
-	$(RM) Image zImage bootpImage
 #	@$(MAKE) -C compressed clean
 #	@$(MAKE) -C bootp clean
 
diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile
index 103c353..3fd19af 100644
--- a/arch/i386/crypto/Makefile
+++ b/arch/i386/crypto/Makefile
@@ -5,5 +5,8 @@
 # 
 
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
+obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 
 aes-i586-y := aes-i586-asm.o aes.o
+twofish-i586-y := twofish-i586-asm.o twofish.o
+
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index d3806da..49aad93 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -379,12 +379,13 @@
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	int i;
 	u32 ss[8];
 	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 
 	/* encryption schedule */
 	
diff --git a/arch/i386/crypto/twofish-i586-asm.S b/arch/i386/crypto/twofish-i586-asm.S
new file mode 100644
index 0000000..39b98ed
--- /dev/null
+++ b/arch/i386/crypto/twofish-i586-asm.S
@@ -0,0 +1,335 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-i586-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+/* return adress at 0 */
+
+#define in_blk    12  /* input byte array address parameter*/
+#define out_blk   8  /* output byte array address parameter*/
+#define tfm       4  /* Twofish context structure */
+
+#define a_offset	0
+#define b_offset	4
+#define c_offset	8
+#define d_offset	12
+
+/* Structure of the crypto context struct*/
+
+#define s0	0	/* S0 Array 256 Words each */
+#define s1	1024	/* S1 Array */
+#define s2	2048	/* S2 Array */
+#define s3	3072	/* S3 Array */
+#define w	4096	/* 8 whitening keys (word) */
+#define k	4128	/* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+	xor	w+offset(context),	src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+	xor	w+16+offset(context),	src;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+	push	d ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%ebp,%edi,4),%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%ebp,%edi,4),d ## D;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),%esi;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$15,		b ## D;\
+	xor	(%ebp,%edi,4),	d ## D;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	d ## D,		%esi;\
+	add	%esi,		d ## D;\
+	add	k+round(%ebp),	%esi;\
+	xor	%esi,		c ## D;\
+	rol	$15,		c ## D;\
+	add	k+4+round(%ebp),d ## D;\
+	xor	%edi,		d ## D;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+	push	d ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%ebp,%edi,4),%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%ebp,%edi,4),d ## D;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),%esi;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%ebp,%edi,4),d ## D;\
+	movzx	a ## B,		%edi;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	d ## D;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	d ## D,		%esi;\
+	add	%esi,		d ## D;\
+	add	k+round(%ebp),	%esi;\
+	xor	%esi,		c ## D;\
+	ror	$1,		c ## D;\
+	add	k+4+round(%ebp),d ## D;\
+	xor	%edi,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+	push	c ## D;\
+	movzx	a ## B,		%edi;\
+	mov	(%ebp,%edi,4),	c ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%ebp,%edi,4),c ## D;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$15,		a ## D;\
+	xor	s3(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	%esi,		c ## D;\
+	add	c ## D,		%esi;\
+	add	k+round(%ebp),	c ## D;\
+	xor	%edi,		c ## D;\
+	add	k+4+round(%ebp),%esi;\
+	xor	%esi,		d ## D;\
+	rol	$15,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+	push	c ## D;\
+	movzx	a ## B,		%edi;\
+	mov	(%ebp,%edi,4),	c ## D;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%ebp,%edi,4),	%esi;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%ebp,%edi,4),c ## D;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%ebp,%edi,4),%esi;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%ebp,%edi,4),c ## D;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%ebp,%edi,4),%esi;\
+	pop	%edi;\
+	add	%esi,		c ## D;\
+	add	c ## D,		%esi;\
+	add	k+round(%ebp),	c ## D;\
+	xor	%edi,		c ## D;\
+	add	k+4+round(%ebp),%esi;\
+	xor	%esi,		d ## D;\
+	ror	$1,		d ## D;
+
+.align 4
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+	push	%ebp			/* save registers according to calling convention*/
+	push    %ebx
+	push    %esi
+	push    %edi
+
+	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
+	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+
+	mov	(%edi),		%eax
+	mov	b_offset(%edi),	%ebx
+	mov	c_offset(%edi),	%ecx
+	mov	d_offset(%edi),	%edx
+	input_whitening(%eax,%ebp,a_offset)
+	ror	$16,	%eax
+	input_whitening(%ebx,%ebp,b_offset)
+	input_whitening(%ecx,%ebp,c_offset)
+	input_whitening(%edx,%ebp,d_offset)
+	rol	$1,	%edx
+
+	encrypt_round(R0,R1,R2,R3,0);
+	encrypt_round(R2,R3,R0,R1,8);
+	encrypt_round(R0,R1,R2,R3,2*8);
+	encrypt_round(R2,R3,R0,R1,3*8);
+	encrypt_round(R0,R1,R2,R3,4*8);
+	encrypt_round(R2,R3,R0,R1,5*8);
+	encrypt_round(R0,R1,R2,R3,6*8);
+	encrypt_round(R2,R3,R0,R1,7*8);
+	encrypt_round(R0,R1,R2,R3,8*8);
+	encrypt_round(R2,R3,R0,R1,9*8);
+	encrypt_round(R0,R1,R2,R3,10*8);
+	encrypt_round(R2,R3,R0,R1,11*8);
+	encrypt_round(R0,R1,R2,R3,12*8);
+	encrypt_round(R2,R3,R0,R1,13*8);
+	encrypt_round(R0,R1,R2,R3,14*8);
+	encrypt_last_round(R2,R3,R0,R1,15*8);
+
+	output_whitening(%eax,%ebp,c_offset)
+	output_whitening(%ebx,%ebp,d_offset)
+	output_whitening(%ecx,%ebp,a_offset)
+	output_whitening(%edx,%ebp,b_offset)
+	mov	out_blk+16(%esp),%edi;
+	mov	%eax,		c_offset(%edi)
+	mov	%ebx,		d_offset(%edi)
+	mov	%ecx,		(%edi)
+	mov	%edx,		b_offset(%edi)
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+	pop	%ebp
+	mov	$1,	%eax
+	ret
+
+twofish_dec_blk:
+	push	%ebp			/* save registers according to calling convention*/
+	push    %ebx
+	push    %esi
+	push    %edi
+
+
+	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
+	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+
+	mov	(%edi),		%eax
+	mov	b_offset(%edi),	%ebx
+	mov	c_offset(%edi),	%ecx
+	mov	d_offset(%edi),	%edx
+	output_whitening(%eax,%ebp,a_offset)
+	output_whitening(%ebx,%ebp,b_offset)
+	ror	$16,	%ebx
+	output_whitening(%ecx,%ebp,c_offset)
+	output_whitening(%edx,%ebp,d_offset)
+	rol	$1,	%ecx
+
+	decrypt_round(R0,R1,R2,R3,15*8);
+	decrypt_round(R2,R3,R0,R1,14*8);
+	decrypt_round(R0,R1,R2,R3,13*8);
+	decrypt_round(R2,R3,R0,R1,12*8);
+	decrypt_round(R0,R1,R2,R3,11*8);
+	decrypt_round(R2,R3,R0,R1,10*8);
+	decrypt_round(R0,R1,R2,R3,9*8);
+	decrypt_round(R2,R3,R0,R1,8*8);
+	decrypt_round(R0,R1,R2,R3,7*8);
+	decrypt_round(R2,R3,R0,R1,6*8);
+	decrypt_round(R0,R1,R2,R3,5*8);
+	decrypt_round(R2,R3,R0,R1,4*8);
+	decrypt_round(R0,R1,R2,R3,3*8);
+	decrypt_round(R2,R3,R0,R1,2*8);
+	decrypt_round(R0,R1,R2,R3,1*8);
+	decrypt_last_round(R2,R3,R0,R1,0);
+
+	input_whitening(%eax,%ebp,c_offset)
+	input_whitening(%ebx,%ebp,d_offset)
+	input_whitening(%ecx,%ebp,a_offset)
+	input_whitening(%edx,%ebp,b_offset)
+	mov	out_blk+16(%esp),%edi;
+	mov	%eax,		c_offset(%edi)
+	mov	%ebx,		d_offset(%edi)
+	mov	%ecx,		(%edi)
+	mov	%edx,		b_offset(%edi)
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+	pop	%ebp
+	mov	$1,	%eax
+	ret
diff --git a/arch/i386/crypto/twofish.c b/arch/i386/crypto/twofish.c
new file mode 100644
index 0000000..e3004df
--- /dev/null
+++ b/arch/i386/crypto/twofish.c
@@ -0,0 +1,97 @@
+/*
+ *  Glue Code for optimized 586 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+	.cra_driver_name	=	"twofish-i586",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_ALIAS("twofish");
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index e6ea00e..ea19d09 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
 #include <linux/sched.h>	/* current */
+#include <linux/dmi.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,33 @@
 	return acpi_processor_preregister_performance(acpi_perf_data);
 }
 
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
 static int
 acpi_cpufreq_cpu_init (
 	struct cpufreq_policy   *policy)
@@ -422,8 +450,17 @@
 	 * coordination is required.
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		policy->cpus = perf->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 4f2c3ae..f5cc9f5 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -52,18 +53,26 @@
 #define	CPU_NEHEMIAH	5
 
 static int cpu_model;
-static unsigned int numscales=16, numvscales;
+static unsigned int numscales=16;
 static unsigned int fsb;
-static int minvid, maxvid;
+
+static struct mV_pos *vrm_mV_table;
+static unsigned char *mV_vrm_table;
+struct f_msr {
+	unsigned char vrm;
+};
+static struct f_msr f_msr_table[32];
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
 static unsigned int minmult, maxmult;
 static int can_scale_voltage;
-static int vrmrev;
 static struct acpi_processor *pr = NULL;
 static struct acpi_processor_cx *cx = NULL;
+static int port22_en;
 
 /* Module parameters */
-static int dont_scale_voltage;
-
+static int scale_voltage;
+static int ignore_latency;
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
 
@@ -71,7 +80,6 @@
 /* Clock ratios multiplied by 10 */
 static int clock_ratio[32];
 static int eblcr_table[32];
-static int voltage_table[32];
 static unsigned int highest_speed, lowest_speed; /* kHz */
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
@@ -124,10 +132,9 @@
 
 /* For processor with BCR2 MSR */
 
-static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int clock_ratio_index)
 {
 	union msr_bcr2 bcr2;
-	u32 t;
 
 	rdmsrl(MSR_VIA_BCR2, bcr2.val);
 	/* Enable software clock multiplier */
@@ -136,13 +143,11 @@
 
 	/* Sync to timer tick */
 	safe_halt();
-	ACPI_FLUSH_CPU_CACHE();
 	/* Change frequency on next halt or sleep */
 	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Invoke C3 */
-	inb(cx_address);
-	/* Dummy op - must do something useless after P_LVL3 read */
-	t = inl(acpi_fadt.xpm_tmr_blk.address);
+	/* Invoke transition */
+	ACPI_FLUSH_CPU_CACHE();
+	halt();
 
 	/* Disable software clock multiplier */
 	local_irq_disable();
@@ -164,11 +169,16 @@
 	longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
 	longhaul.bits.EnableSoftBusRatio = 1;
 
+	if (can_scale_voltage) {
+		longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
+		longhaul.bits.EnableSoftVID = 1;
+	}
+
 	/* Sync to timer tick */
 	safe_halt();
-	ACPI_FLUSH_CPU_CACHE();
 	/* Change frequency on next halt or sleep */
 	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	ACPI_FLUSH_CPU_CACHE();
 	/* Invoke C3 */
 	inb(cx_address);
 	/* Dummy op - must do something useless after P_LVL3 read */
@@ -227,10 +237,13 @@
 	outb(0xFF,0xA1);	/* Overkill */
 	outb(0xFE,0x21);	/* TMR0 only */
 
-	/* Disable bus master arbitration */
-	if (pr->flags.bm_check) {
+	if (pr->flags.bm_control) {
+ 		/* Disable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
 				  ACPI_MTX_DO_NOT_LOCK);
+	} else if (port22_en) {
+		/* Disable AGP and PCI arbiters */
+		outb(3, 0x22);
 	}
 
 	switch (longhaul_version) {
@@ -244,7 +257,7 @@
 	 */
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
-		do_longhaul1(cx->address, clock_ratio_index);
+		do_longhaul1(clock_ratio_index);
 		break;
 
 	/*
@@ -259,14 +272,20 @@
 	 * to work in practice.
 	 */
 	case TYPE_POWERSAVER:
+		/* Don't allow wakeup */
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0,
+				  ACPI_MTX_DO_NOT_LOCK);
 		do_powersaver(cx->address, clock_ratio_index);
 		break;
 	}
 
-	/* Enable bus master arbitration */
-	if (pr->flags.bm_check) {
+	if (pr->flags.bm_control) {
+		/* Enable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0,
 				  ACPI_MTX_DO_NOT_LOCK);
+	} else if (port22_en) {
+		/* Enable arbiters */
+		outb(0, 0x22);
 	}
 
 	outb(pic2_mask,0xA1);	/* restore mask */
@@ -446,53 +465,57 @@
 static void __init longhaul_setup_voltagescaling(void)
 {
 	union msr_longhaul longhaul;
+	struct mV_pos minvid, maxvid;
+	unsigned int j, speed, pos, kHz_step, numvscales;
 
-	rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-	if (!(longhaul.bits.RevisionID & 1))
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!(longhaul.bits.RevisionID & 1)) {
+		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
 		return;
+	}
 
-	minvid = longhaul.bits.MinimumVID;
-	maxvid = longhaul.bits.MaximumVID;
-	vrmrev = longhaul.bits.VRMRev;
+	if (!longhaul.bits.VRMRev) {
+		printk (KERN_INFO PFX "VRM 8.5\n");
+		vrm_mV_table = &vrm85_mV[0];
+		mV_vrm_table = &mV_vrm85[0];
+	} else {
+		printk (KERN_INFO PFX "Mobile VRM\n");
+		vrm_mV_table = &mobilevrm_mV[0];
+		mV_vrm_table = &mV_mobilevrm[0];
+	}
 
-	if (minvid == 0 || maxvid == 0) {
+	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+	numvscales = maxvid.pos - minvid.pos + 1;
+	kHz_step = (highest_speed - lowest_speed) / numvscales;
+
+	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
 		printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
 					"Voltage scaling disabled.\n",
-					minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
+					minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	if (minvid == maxvid) {
+	if (minvid.mV == maxvid.mV) {
 		printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
 				"both %d.%03d. Voltage scaling disabled\n",
-				maxvid/1000, maxvid%1000);
+				maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	if (vrmrev==0) {
-		dprintk ("VRM 8.5\n");
-		memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
-		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
-	} else {
-		dprintk ("Mobile VRM\n");
-		memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
-		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
+	printk(KERN_INFO PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
+		maxvid.mV/1000, maxvid.mV%1000,
+		minvid.mV/1000, minvid.mV%1000,
+		numvscales);
+	
+	j = 0;
+	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+		speed = longhaul_table[j].frequency;
+		pos = (speed - lowest_speed) / kHz_step + minvid.pos;
+		f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
+		j++;
 	}
 
-	/* Current voltage isn't readable at first, so we need to
-	   set it to a known value. The spec says to use maxvid */
-	longhaul.bits.RevisionKey = longhaul.bits.RevisionID;	/* FIXME: This is bad. */
-	longhaul.bits.EnableSoftVID = 1;
-	longhaul.bits.SoftVID = maxvid;
-	wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-	minvid = voltage_table[minvid];
-	maxvid = voltage_table[maxvid];
-
-	dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
-		maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
-
 	can_scale_voltage = 1;
 }
 
@@ -540,21 +563,33 @@
 	return 1;
 }
 
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+	struct pci_dev *dev;
+	u8 pci_cmd;
+
+	/* Find PLE133 host bridge */
+	dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL);
+	if (dev != NULL) {
+		/* Enable access to port 0x22 */
+		pci_read_config_byte(dev, 0x78, &pci_cmd);
+		if ( !(pci_cmd & 1<<7) ) {
+			pci_cmd |= 1<<7;
+			pci_write_config_byte(dev, 0x78, pci_cmd);
+		}
+		return 1;
+	}
+	return 0;
+}
+
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
 	struct cpuinfo_x86 *c = cpu_data;
 	char *cpuname=NULL;
 	int ret;
 
-	/* Check ACPI support for C3 state */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
-			 &longhaul_walk_callback, NULL, (void *)&pr);
-	if (pr == NULL) goto err_acpi;
-
-	cx = &pr->power.states[ACPI_STATE_C3];
-	if (cx->address == 0 || cx->latency > 1000) goto err_acpi;
-
-	/* Now check what we have on this motherboard */
+	/* Check what we have on this motherboard */
 	switch (c->x86_model) {
 	case 6:
 		cpu_model = CPU_SAMUEL;
@@ -636,12 +671,36 @@
 		break;
 	};
 
+	/* Find ACPI data for processor */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+			    &longhaul_walk_callback, NULL, (void *)&pr);
+	if (pr == NULL)
+		goto err_acpi;
+
+	if (longhaul_version == TYPE_POWERSAVER) {
+		/* Check ACPI support for C3 state */
+		cx = &pr->power.states[ACPI_STATE_C3];
+		if (cx->address == 0 ||
+		   (cx->latency > 1000 && ignore_latency == 0) )
+			goto err_acpi;
+
+	} else {
+		/* Check ACPI support for bus master arbiter disable */
+		if (!pr->flags.bm_control) {
+			if (!enable_arbiter_disable()) {
+				printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n");
+				return -ENODEV;
+			} else
+				port22_en = 1;
+		}
+	}
+
 	ret = longhaul_get_ranges();
 	if (ret != 0)
 		return ret;
 
 	if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
-		 (dont_scale_voltage==0))
+		 (scale_voltage != 0))
 		longhaul_setup_voltagescaling();
 
 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -729,8 +788,10 @@
 	kfree(longhaul_table);
 }
 
-module_param (dont_scale_voltage, int, 0644);
-MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+module_param (scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+module_param(ignore_latency, int, 0644);
+MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
 
 MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
@@ -738,4 +799,3 @@
 
 late_initcall(longhaul_init);
 module_exit(longhaul_exit);
-
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index d3a95d7..bc4682a 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -450,17 +450,45 @@
  * Voltage scales. Div/Mod by 1000 to get actual voltage.
  * Which scale to use depends on the VRM type in use.
  */
-static int __initdata vrm85scales[32] = {
-	1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
-	1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
-	1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
-	1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
+
+struct mV_pos {
+	unsigned short mV;
+	unsigned short pos;
 };
 
-static int __initdata mobilevrmscales[32] = {
-	2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-	1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
-	1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-	1075, 1050, 1025, 1000, 975, 950, 925, -1,
+static struct mV_pos __initdata vrm85_mV[32] = {
+	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
+	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
+	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
+	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
+	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
+	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
+	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
+	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
+};
+
+static unsigned char __initdata mV_vrm85[32] = {
+	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
+	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
+	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
+	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
+};
+
+static struct mV_pos __initdata mobilevrm_mV[32] = {
+	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
+	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
+	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
+	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
+	{975, 15},	{950, 14},	{925, 13},	{900, 12},
+	{875, 11},	{850, 10},	{825, 9},	{800, 8},
+	{775, 7},	{750, 6},	{725, 5},	{700, 4},
+	{675, 3},	{650, 2},	{625, 1},	{600, 0}
+};
+
+static unsigned char __initdata mV_mobilevrm[32] = {
+	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
+	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
+	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
+	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
 };
 
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index b77f135..7a93253 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -23,6 +23,7 @@
 
 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <acpi/processor.h>
 #endif
 
@@ -377,6 +378,35 @@
 	return 0;
 }
 
+
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+
+static struct dmi_system_id sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
+
 /*
  * centrino_cpu_init_acpi - register with ACPI P-States library
  *
@@ -398,14 +428,24 @@
 		dprintk(PFX "obtaining ACPI data failed\n");
 		return -EIO;
 	}
+
 	policy->shared_type = p->shared_type;
 	/*
 	 * Will let policy->cpus know about dependency only when software 
 	 * coordination is required.
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		policy->cpus = p->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
 
 	/* verify the acpi_data */
 	if (p->state_count <= 1) {
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 345ffb7..f168220 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -956,6 +956,38 @@
 	return 0;
 }
 
+ /*
+  * This function checks if the entire range <start,end> is mapped with type.
+  *
+  * Note: this function only works correct if the e820 table is sorted and
+  * not-overlapping, which is the case
+  */
+int __init
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
+{
+	u64 start = s;
+	u64 end = e;
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full
+		 * coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
 /*
  * Find the highest page frame number we have available
  */
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 1220dd8..0a362e3 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -237,11 +237,6 @@
 		pci_probe &= ~PCI_PROBE_MMCONF;
 		return NULL;
 	}
-	/* override DMI blacklist */
-	else if (!strcmp(str, "mmconf")) {
-		pci_probe |= PCI_PROBE_MMCONF_FORCE;
-		return NULL;
-	}
 #endif
 	else if (!strcmp(str, "noacpi")) {
 		acpi_noirq_set();
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index ef5a2fa..972180f 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -12,7 +12,6 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
-#include <linux/dmi.h>
 #include <asm/e820.h>
 #include "pci.h"
 
@@ -188,31 +187,9 @@
 	}
 }
 
-static int disable_mcfg(struct dmi_system_id *d)
-{
-	printk("PCI: %s detected. Disabling MCFG.\n", d->ident);
-	pci_probe &= ~PCI_PROBE_MMCONF;
-	return 0;
-}
-
-static struct dmi_system_id __initdata dmi_bad_mcfg[] = {
-	/* Has broken MCFG table that makes the system hang when used */
-        {
-         .callback = disable_mcfg,
-         .ident = "Intel D3C5105 SDV",
-         .matches = {
-                     DMI_MATCH(DMI_BIOS_VENDOR, "Intel"),
-                     DMI_MATCH(DMI_BOARD_NAME, "D26928"),
-                     },
-         },
-         {}
-};
-
 void __init pci_mmcfg_init(void)
 {
-	dmi_check_system(dmi_bad_mcfg);
-
-	if ((pci_probe & (PCI_PROBE_MMCONF_FORCE|PCI_PROBE_MMCONF)) == 0)
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return;
 
 	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
@@ -221,6 +198,15 @@
 	    (pci_mmcfg_config[0].base_address == 0))
 		return;
 
+	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
+			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
+			E820_RESERVED)) {
+		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+				pci_mmcfg_config[0].base_address);
+		printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
+		return;
+	}
+
 	printk(KERN_INFO "PCI: Using MMCONFIG\n");
 	raw_pci_ops = &pci_mmcfg;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index 49a849b..bf4e793 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -16,8 +16,7 @@
 #define PCI_PROBE_CONF1		0x0002
 #define PCI_PROBE_CONF2		0x0004
 #define PCI_PROBE_MMCONF	0x0008
-#define PCI_PROBE_MMCONF_FORCE	0x0010
-#define PCI_PROBE_MASK		0x00ff
+#define PCI_PROBE_MASK		0x000f
 
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 694b0c6..de1ef2f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -417,6 +417,17 @@
           This option enables support for the Maple 970FX Evaluation Board.
 	  For more informations, refer to <http://www.970eval.com>
 
+config PPC_PASEMI
+	depends on PPC_MULTIPLATFORM && PPC64
+	bool "PA Semi SoC-based platforms"
+	default n
+	select MPIC
+	select PPC_UDBG_16550
+	select GENERIC_TBSYNC
+	help
+	  This option enables support for PA Semi's PWRficient line
+	  of SoC processors, including PA6T-1682M
+
 config PPC_CELL
 	bool
 	default n
@@ -436,7 +447,8 @@
 	select UDBG_RTAS_CONSOLE
 
 config UDBG_RTAS_CONSOLE
-	bool
+	bool "RTAS based debug console"
+	depends on PPC_RTAS
 	default n
 
 config XICS
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index e29ef77d..5ad149b 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -18,6 +18,20 @@
 
 	  This option will slow down process creation somewhat.
 
+config HCALL_STATS
+	bool "Hypervisor call instrumentation"
+	depends on PPC_PSERIES && DEBUG_FS
+	help
+	  Adds code to keep track of the number of hypervisor calls made and
+	  the amount of time spent in hypervisor callsr.  Wall time spent in
+	  each call is always calculated, and if available CPU cycles spent
+	  are also calculated.  A directory named hcall_inst is added at the
+	  root of the debugfs filesystem.  Within the hcall_inst directory
+	  are files that contain CPU specific call statistics.
+
+	  This option will add a small amount of overhead to all hypervisor
+	  calls.
+
 config DEBUGGER
 	bool "Enable debugger hooks"
 	depends on DEBUG_KERNEL
@@ -74,6 +88,8 @@
 	  very early during boot. 'xmon=on' will just enable the xmon
 	  debugger hooks.  'xmon=off' will disable the debugger hooks
 	  if CONFIG_XMON_DEFAULT is set.
+	  xmon will print a backtrace on the very first invocation.
+	  'xmon=nobt' will disable this autobacktrace.
 
 config XMON_DEFAULT
 	bool "Enable xmon by default"
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index d961bfe..e737741 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -36,11 +36,16 @@
 $(addprefix $(obj)/,$(zlib) main.o): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader))
 #$(addprefix $(obj)/,main.o): $(addprefix $(obj)/,zlib.h)
 
-src-boot := crt0.S string.S prom.c stdio.c main.c div64.S
+src-boot-$(CONFIG_PPC_MULTIPLATFORM) := of.c
+src-boot := crt0.S string.S stdio.c main.c div64.S $(src-boot-y)
 src-boot += $(zlib)
 src-boot := $(addprefix $(obj)/, $(src-boot))
 obj-boot := $(addsuffix .o, $(basename $(src-boot)))
 
+ifeq ($(call cc-option-yn, -fstack-protector),y)
+BOOTCFLAGS	+= -fno-stack-protector
+endif
+
 BOOTCFLAGS	+= -I$(obj) -I$(srctree)/$(obj)
 
 quiet_cmd_copy_zlib = COPY    $@
diff --git a/arch/powerpc/boot/dts/mpc8349emds.dts b/arch/powerpc/boot/dts/mpc8349emds.dts
index 12f5dbf..efceb34 100644
--- a/arch/powerpc/boot/dts/mpc8349emds.dts
+++ b/arch/powerpc/boot/dts/mpc8349emds.dts
@@ -214,10 +214,10 @@
 					 b800 0 0 4 700 15 8
 
 					/* IDSEL 0x18 */
-					 b000 0 0 1 700 15 8
-					 b000 0 0 2 700 16 8
-					 b000 0 0 3 700 17 8
-					 b000 0 0 4 700 14 8>;
+					 c000 0 0 1 700 15 8
+					 c000 0 0 2 700 16 8
+					 c000 0 0 3 700 17 8
+					 c000 0 0 4 700 14 8>;
 			interrupt-parent = <700>;
 			interrupts = <42 8>;
 			bus-range = <0 0>;
@@ -274,10 +274,10 @@
 					 b800 0 0 4 700 15 8
 
 					/* IDSEL 0x18 */
-					 b000 0 0 1 700 15 8
-					 b000 0 0 2 700 16 8
-					 b000 0 0 3 700 17 8
-					 b000 0 0 4 700 14 8>;
+					 c000 0 0 1 700 15 8
+					 c000 0 0 2 700 16 8
+					 c000 0 0 3 700 17 8
+					 c000 0 0 4 700 14 8>;
 			interrupt-parent = <700>;
 			interrupts = <42 8>;
 			bus-range = <0 0>;
diff --git a/arch/powerpc/boot/flatdevtree.h b/arch/powerpc/boot/flatdevtree.h
new file mode 100644
index 0000000..761c8dc
--- /dev/null
+++ b/arch/powerpc/boot/flatdevtree.h
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef FLATDEVTREE_H
+#define FLATDEVTREE_H
+
+#include "types.h"
+
+/* Definitions used by the flattened device tree */
+#define OF_DT_HEADER            0xd00dfeed      /* marker */
+#define OF_DT_BEGIN_NODE        0x1     /* Start of node, full name */
+#define OF_DT_END_NODE          0x2     /* End node */
+#define OF_DT_PROP              0x3     /* Property: name off, size, content */
+#define OF_DT_NOP               0x4     /* nop */
+#define OF_DT_END               0x9
+
+#define OF_DT_VERSION           0x10
+
+struct boot_param_header {
+	u32 magic;              /* magic word OF_DT_HEADER */
+	u32 totalsize;          /* total size of DT block */
+	u32 off_dt_struct;      /* offset to structure */
+	u32 off_dt_strings;     /* offset to strings */
+	u32 off_mem_rsvmap;     /* offset to memory reserve map */
+	u32 version;            /* format version */
+	u32 last_comp_version;  /* last compatible version */
+	/* version 2 fields below */
+	u32 boot_cpuid_phys;    /* Physical CPU id we're booting on */
+	/* version 3 fields below */
+	u32 dt_strings_size;    /* size of the DT strings block */
+};
+
+#endif /* FLATDEVTREE_H */
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index b66634c..d719bb9 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -14,17 +14,12 @@
 #include "page.h"
 #include "string.h"
 #include "stdio.h"
-#include "prom.h"
 #include "zlib.h"
+#include "ops.h"
+#include "flatdevtree.h"
 
 extern void flush_cache(void *, unsigned long);
 
-
-/* Value picked to match that used by yaboot */
-#define PROG_START	0x01400000	/* only used on 64-bit systems */
-#define RAM_END		(512<<20)	/* Fixme: use OF */
-#define	ONE_MB		0x100000
-
 extern char _start[];
 extern char __bss_start[];
 extern char _end[];
@@ -33,14 +28,6 @@
 extern char _initrd_start[];
 extern char _initrd_end[];
 
-/* A buffer that may be edited by tools operating on a zImage binary so as to
- * edit the command line passed to vmlinux (by setting /chosen/bootargs).
- * The buffer is put in it's own section so that tools may locate it easier.
- */
-static char builtin_cmdline[512]
-	__attribute__((section("__builtin_cmdline")));
-
-
 struct addr_range {
 	unsigned long addr;
 	unsigned long size;
@@ -51,21 +38,16 @@
 static struct addr_range initrd;
 
 static unsigned long elfoffset;
+static int is_64bit;
 
-static char scratch[46912];	/* scratch space for gunzip, from zlib_inflate_workspacesize() */
+/* scratch space for gunzip; 46912 is from zlib_inflate_workspacesize() */
+static char scratch[46912];
 static char elfheader[256];
 
-
-typedef void (*kernel_entry_t)( unsigned long,
-                                unsigned long,
-                                void *,
-				void *);
-
+typedef void (*kernel_entry_t)(unsigned long, unsigned long, void *);
 
 #undef DEBUG
 
-static unsigned long claim_base;
-
 #define HEAD_CRC	2
 #define EXTRA_FIELD	4
 #define ORIG_NAME	8
@@ -123,24 +105,6 @@
 	zlib_inflateEnd(&s);
 }
 
-static unsigned long try_claim(unsigned long size)
-{
-	unsigned long addr = 0;
-
-	for(; claim_base < RAM_END; claim_base += ONE_MB) {
-#ifdef DEBUG
-		printf("    trying: 0x%08lx\n\r", claim_base);
-#endif
-		addr = (unsigned long)claim(claim_base, size, 0);
-		if ((void *)addr != (void *)-1)
-			break;
-	}
-	if (addr == 0)
-		return 0;
-	claim_base = PAGE_ALIGN(claim_base + size);
-	return addr;
-}
-
 static int is_elf64(void *hdr)
 {
 	Elf64_Ehdr *elf64 = hdr;
@@ -169,16 +133,7 @@
 	vmlinux.size = (unsigned long)elf64ph->p_filesz + elfoffset;
 	vmlinux.memsize = (unsigned long)elf64ph->p_memsz + elfoffset;
 
-#if defined(PROG_START)
-	/*
-	 * Maintain a "magic" minimum address. This keeps some older
-	 * firmware platforms running.
-	 */
-
-	if (claim_base < PROG_START)
-		claim_base = PROG_START;
-#endif
-
+	is_64bit = 1;
 	return 1;
 }
 
@@ -212,47 +167,9 @@
 	return 1;
 }
 
-void export_cmdline(void* chosen_handle)
-{
-        int len;
-        char cmdline[2] = { 0, 0 };
-
-	if (builtin_cmdline[0] == 0)
-		return;
-
-        len = getprop(chosen_handle, "bootargs", cmdline, sizeof(cmdline));
-        if (len > 0 && cmdline[0] != 0)
-		return;
-
-	setprop(chosen_handle, "bootargs", builtin_cmdline,
-		strlen(builtin_cmdline) + 1);
-}
-
-
-void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
+static void prep_kernel(unsigned long *a1, unsigned long *a2)
 {
 	int len;
-	kernel_entry_t kernel_entry;
-
-	memset(__bss_start, 0, _end - __bss_start);
-
-	prom = (int (*)(void *)) promptr;
-	chosen_handle = finddevice("/chosen");
-	if (chosen_handle == (void *) -1)
-		exit();
-	if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4)
-		exit();
-
-	printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r", _start, sp);
-
-	/*
-	 * The first available claim_base must be above the end of the
-	 * the loaded kernel wrapper file (_start to _end includes the
-	 * initrd image if it is present) and rounded up to a nice
-	 * 1 MB boundary for good measure.
-	 */
-
-	claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB);
 
 	vmlinuz.addr = (unsigned long)_vmlinux_start;
 	vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start);
@@ -263,43 +180,51 @@
 		gunzip(elfheader, sizeof(elfheader),
 				(unsigned char *)vmlinuz.addr, &len);
 	} else
-		memcpy(elfheader, (const void *)vmlinuz.addr, sizeof(elfheader));
+		memcpy(elfheader, (const void *)vmlinuz.addr,
+		       sizeof(elfheader));
 
 	if (!is_elf64(elfheader) && !is_elf32(elfheader)) {
 		printf("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
 		exit();
 	}
+	if (platform_ops.image_hdr)
+		platform_ops.image_hdr(elfheader);
 
-	/* We need to claim the memsize plus the file offset since gzip
+	/* We need to alloc the memsize plus the file offset since gzip
 	 * will expand the header (file offset), then the kernel, then
 	 * possible rubbish we don't care about. But the kernel bss must
 	 * be claimed (it will be zero'd by the kernel itself)
 	 */
 	printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize);
-	vmlinux.addr = try_claim(vmlinux.memsize);
+	vmlinux.addr = (unsigned long)malloc(vmlinux.memsize);
 	if (vmlinux.addr == 0) {
 		printf("Can't allocate memory for kernel image !\n\r");
 		exit();
 	}
 
 	/*
-	 * Now we try to claim memory for the initrd (and copy it there)
+	 * Now we try to alloc memory for the initrd (and copy it there)
 	 */
 	initrd.size = (unsigned long)(_initrd_end - _initrd_start);
 	initrd.memsize = initrd.size;
 	if ( initrd.size > 0 ) {
-		printf("Allocating 0x%lx bytes for initrd ...\n\r", initrd.size);
-		initrd.addr = try_claim(initrd.size);
+		printf("Allocating 0x%lx bytes for initrd ...\n\r",
+		       initrd.size);
+		initrd.addr = (unsigned long)malloc((u32)initrd.size);
 		if (initrd.addr == 0) {
-			printf("Can't allocate memory for initial ramdisk !\n\r");
+			printf("Can't allocate memory for initial "
+					"ramdisk !\n\r");
 			exit();
 		}
-		a1 = initrd.addr;
-		a2 = initrd.size;
-		printf("initial ramdisk moving 0x%lx <- 0x%lx (0x%lx bytes)\n\r",
-		       initrd.addr, (unsigned long)_initrd_start, initrd.size);
-		memmove((void *)initrd.addr, (void *)_initrd_start, initrd.size);
-		printf("initrd head: 0x%lx\n\r", *((unsigned long *)initrd.addr));
+		*a1 = initrd.addr;
+		*a2 = initrd.size;
+		printf("initial ramdisk moving 0x%lx <- 0x%lx "
+			"(0x%lx bytes)\n\r", initrd.addr,
+			(unsigned long)_initrd_start, initrd.size);
+		memmove((void *)initrd.addr, (void *)_initrd_start,
+			initrd.size);
+		printf("initrd head: 0x%lx\n\r",
+				*((unsigned long *)initrd.addr));
 	}
 
 	/* Eventually gunzip the kernel */
@@ -311,11 +236,10 @@
 			(unsigned char *)vmlinuz.addr, &len);
 		printf("done 0x%lx bytes\n\r", len);
 	} else {
-		memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size);
+		memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,
+			vmlinuz.size);
 	}
 
-	export_cmdline(chosen_handle);
-
 	/* Skip over the ELF header */
 #ifdef DEBUG
 	printf("... skipping 0x%lx bytes of ELF header\n\r",
@@ -324,23 +248,107 @@
 	vmlinux.addr += elfoffset;
 
 	flush_cache((void *)vmlinux.addr, vmlinux.size);
-
-	kernel_entry = (kernel_entry_t)vmlinux.addr;
-#ifdef DEBUG
-	printf( "kernel:\n\r"
-		"        entry addr = 0x%lx\n\r"
-		"        a1         = 0x%lx,\n\r"
-		"        a2         = 0x%lx,\n\r"
-		"        prom       = 0x%lx,\n\r"
-		"        bi_recs    = 0x%lx,\n\r",
-		(unsigned long)kernel_entry, a1, a2,
-		(unsigned long)prom, NULL);
-#endif
-
-	kernel_entry(a1, a2, prom, NULL);
-
-	printf("Error: Linux kernel returned to zImage bootloader!\n\r");
-
-	exit();
 }
 
+void __attribute__ ((weak)) ft_init(void *dt_blob)
+{
+}
+
+/* A buffer that may be edited by tools operating on a zImage binary so as to
+ * edit the command line passed to vmlinux (by setting /chosen/bootargs).
+ * The buffer is put in it's own section so that tools may locate it easier.
+ */
+static char builtin_cmdline[COMMAND_LINE_SIZE]
+	__attribute__((__section__("__builtin_cmdline")));
+
+static void get_cmdline(char *buf, int size)
+{
+	void *devp;
+	int len = strlen(builtin_cmdline);
+
+	buf[0] = '\0';
+
+	if (len > 0) { /* builtin_cmdline overrides dt's /chosen/bootargs */
+		len = min(len, size-1);
+		strncpy(buf, builtin_cmdline, len);
+		buf[len] = '\0';
+	}
+	else if ((devp = finddevice("/chosen")))
+		getprop(devp, "bootargs", buf, size);
+}
+
+static void set_cmdline(char *buf)
+{
+	void *devp;
+
+	if ((devp = finddevice("/chosen")))
+		setprop(devp, "bootargs", buf, strlen(buf) + 1);
+}
+
+/* Section where ft can be tacked on after zImage is built */
+union blobspace {
+	struct boot_param_header hdr;
+	char space[8*1024];
+} dt_blob __attribute__((__section__("__builtin_ft")));
+
+struct platform_ops platform_ops;
+struct dt_ops dt_ops;
+struct console_ops console_ops;
+
+void start(unsigned long a1, unsigned long a2, void *promptr, void *sp)
+{
+	int have_dt = 0;
+	kernel_entry_t kentry;
+	char cmdline[COMMAND_LINE_SIZE];
+
+	memset(__bss_start, 0, _end - __bss_start);
+	memset(&platform_ops, 0, sizeof(platform_ops));
+	memset(&dt_ops, 0, sizeof(dt_ops));
+	memset(&console_ops, 0, sizeof(console_ops));
+
+	/* Override the dt_ops and device tree if there was an flat dev
+	 * tree attached to the zImage.
+	 */
+	if (dt_blob.hdr.magic == OF_DT_HEADER) {
+		have_dt = 1;
+		ft_init(&dt_blob);
+	}
+
+	if (platform_init(promptr))
+		exit();
+	if (console_ops.open && (console_ops.open() < 0))
+		exit();
+	if (platform_ops.fixups)
+		platform_ops.fixups();
+
+	printf("\n\rzImage starting: loaded at 0x%p (sp: 0x%p)\n\r",
+	       _start, sp);
+
+	prep_kernel(&a1, &a2);
+
+	/* If cmdline came from zimage wrapper or if we can edit the one
+	 * in the dt, print it out and edit it, if possible.
+	 */
+	if ((strlen(builtin_cmdline) > 0) || console_ops.edit_cmdline) {
+		get_cmdline(cmdline, COMMAND_LINE_SIZE);
+		printf("\n\rLinux/PowerPC load: %s", cmdline);
+		if (console_ops.edit_cmdline)
+			console_ops.edit_cmdline(cmdline, COMMAND_LINE_SIZE);
+		printf("\n\r");
+		set_cmdline(cmdline);
+	}
+
+	if (console_ops.close)
+		console_ops.close();
+
+	kentry = (kernel_entry_t) vmlinux.addr;
+	if (have_dt)
+		kentry(dt_ops.ft_addr(), 0, NULL);
+	else
+		/* XXX initrd addr/size should be passed in properties */
+		kentry(a1, a2, promptr);
+
+	/* console closed so printf below may not work */
+	printf("Error: Linux kernel returned to zImage boot wrapper!\n\r");
+	exit();
+}
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
new file mode 100644
index 0000000..fd99f78
--- /dev/null
+++ b/arch/powerpc/boot/of.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+#include "types.h"
+#include "elf.h"
+#include "string.h"
+#include "stdio.h"
+#include "page.h"
+#include "ops.h"
+
+typedef void *ihandle;
+typedef void *phandle;
+
+extern char _end[];
+
+/* Value picked to match that used by yaboot */
+#define PROG_START	0x01400000	/* only used on 64-bit systems */
+#define RAM_END		(512<<20)	/* Fixme: use OF */
+#define	ONE_MB		0x100000
+
+int (*prom) (void *);
+
+
+static unsigned long claim_base;
+
+static int call_prom(const char *service, int nargs, int nret, ...)
+{
+	int i;
+	struct prom_args {
+		const char *service;
+		int nargs;
+		int nret;
+		unsigned int args[12];
+	} args;
+	va_list list;
+
+	args.service = service;
+	args.nargs = nargs;
+	args.nret = nret;
+
+	va_start(list, nret);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = va_arg(list, unsigned int);
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	if (prom(&args) < 0)
+		return -1;
+
+	return (nret > 0)? args.args[nargs]: 0;
+}
+
+static int call_prom_ret(const char *service, int nargs, int nret,
+		  unsigned int *rets, ...)
+{
+	int i;
+	struct prom_args {
+		const char *service;
+		int nargs;
+		int nret;
+		unsigned int args[12];
+	} args;
+	va_list list;
+
+	args.service = service;
+	args.nargs = nargs;
+	args.nret = nret;
+
+	va_start(list, rets);
+	for (i = 0; i < nargs; i++)
+		args.args[i] = va_arg(list, unsigned int);
+	va_end(list);
+
+	for (i = 0; i < nret; i++)
+		args.args[nargs+i] = 0;
+
+	if (prom(&args) < 0)
+		return -1;
+
+	if (rets != (void *) 0)
+		for (i = 1; i < nret; ++i)
+			rets[i-1] = args.args[nargs+i];
+
+	return (nret > 0)? args.args[nargs]: 0;
+}
+
+/*
+ * Older OF's require that when claiming a specific range of addresses,
+ * we claim the physical space in the /memory node and the virtual
+ * space in the chosen mmu node, and then do a map operation to
+ * map virtual to physical.
+ */
+static int need_map = -1;
+static ihandle chosen_mmu;
+static phandle memory;
+
+/* returns true if s2 is a prefix of s1 */
+static int string_match(const char *s1, const char *s2)
+{
+	for (; *s2; ++s2)
+		if (*s1++ != *s2)
+			return 0;
+	return 1;
+}
+
+static int check_of_version(void)
+{
+	phandle oprom, chosen;
+	char version[64];
+
+	oprom = finddevice("/openprom");
+	if (oprom == (phandle) -1)
+		return 0;
+	if (getprop(oprom, "model", version, sizeof(version)) <= 0)
+		return 0;
+	version[sizeof(version)-1] = 0;
+	printf("OF version = '%s'\r\n", version);
+	if (!string_match(version, "Open Firmware, 1.")
+	    && !string_match(version, "FirmWorks,3."))
+		return 0;
+	chosen = finddevice("/chosen");
+	if (chosen == (phandle) -1) {
+		chosen = finddevice("/chosen@0");
+		if (chosen == (phandle) -1) {
+			printf("no chosen\n");
+			return 0;
+		}
+	}
+	if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
+		printf("no mmu\n");
+		return 0;
+	}
+	memory = (ihandle) call_prom("open", 1, 1, "/memory");
+	if (memory == (ihandle) -1) {
+		memory = (ihandle) call_prom("open", 1, 1, "/memory@0");
+		if (memory == (ihandle) -1) {
+			printf("no memory node\n");
+			return 0;
+		}
+	}
+	printf("old OF detected\r\n");
+	return 1;
+}
+
+static void *claim(unsigned long virt, unsigned long size, unsigned long align)
+{
+	int ret;
+	unsigned int result;
+
+	if (need_map < 0)
+		need_map = check_of_version();
+	if (align || !need_map)
+		return (void *) call_prom("claim", 3, 1, virt, size, align);
+
+	ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory,
+			    align, size, virt);
+	if (ret != 0 || result == -1)
+		return (void *) -1;
+	ret = call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
+			    align, size, virt);
+	/* 0x12 == coherent + read/write */
+	ret = call_prom("call-method", 6, 1, "map", chosen_mmu,
+			0x12, size, virt, virt);
+	return (void *) virt;
+}
+
+static void *of_try_claim(u32 size)
+{
+	unsigned long addr = 0;
+	static u8 first_time = 1;
+
+	if (first_time) {
+		claim_base = _ALIGN_UP((unsigned long)_end, ONE_MB);
+		first_time = 0;
+	}
+
+	for(; claim_base < RAM_END; claim_base += ONE_MB) {
+#ifdef DEBUG
+		printf("    trying: 0x%08lx\n\r", claim_base);
+#endif
+		addr = (unsigned long)claim(claim_base, size, 0);
+		if ((void *)addr != (void *)-1)
+			break;
+	}
+	if (addr == 0)
+		return NULL;
+	claim_base = PAGE_ALIGN(claim_base + size);
+	return (void *)addr;
+}
+
+static void of_image_hdr(const void *hdr)
+{
+	const Elf64_Ehdr *elf64 = hdr;
+
+	if (elf64->e_ident[EI_CLASS] == ELFCLASS64) {
+		/*
+		 * Maintain a "magic" minimum address. This keeps some older
+		 * firmware platforms running.
+		 */
+		if (claim_base < PROG_START)
+			claim_base = PROG_START;
+	}
+}
+
+static void of_exit(void)
+{
+	call_prom("exit", 0, 0);
+}
+
+/*
+ * OF device tree routines
+ */
+static void *of_finddevice(const char *name)
+{
+	return (phandle) call_prom("finddevice", 1, 1, name);
+}
+
+static int of_getprop(const void *phandle, const char *name, void *buf,
+		const int buflen)
+{
+	return call_prom("getprop", 4, 1, phandle, name, buf, buflen);
+}
+
+static int of_setprop(const void *phandle, const char *name, const void *buf,
+		const int buflen)
+{
+	return call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+}
+
+/*
+ * OF console routines
+ */
+static void *of_stdout_handle;
+
+static int of_console_open(void)
+{
+	void *devp;
+
+	if (((devp = finddevice("/chosen")) != NULL)
+			&& (getprop(devp, "stdout", &of_stdout_handle,
+				sizeof(of_stdout_handle))
+				== sizeof(of_stdout_handle)))
+		return 0;
+
+	return -1;
+}
+
+static void of_console_write(char *buf, int len)
+{
+	call_prom("write", 3, 1, of_stdout_handle, buf, len);
+}
+
+int platform_init(void *promptr)
+{
+	platform_ops.fixups = NULL;
+	platform_ops.image_hdr = of_image_hdr;
+	platform_ops.malloc = of_try_claim;
+	platform_ops.free = NULL;
+	platform_ops.exit = of_exit;
+
+	dt_ops.finddevice = of_finddevice;
+	dt_ops.getprop = of_getprop;
+	dt_ops.setprop = of_setprop;
+	dt_ops.translate_addr = NULL;
+
+	console_ops.open = of_console_open;
+	console_ops.write = of_console_write;
+	console_ops.edit_cmdline = NULL;
+	console_ops.close = NULL;
+	console_ops.data = NULL;
+
+	prom = (int (*)(void *))promptr;
+	return 0;
+}
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
new file mode 100644
index 0000000..135eb4b
--- /dev/null
+++ b/arch/powerpc/boot/ops.h
@@ -0,0 +1,100 @@
+/*
+ * Global definition of all the bootwrapper operations.
+ *
+ * Author: Mark A. Greer <mgreer@mvista.com>
+ *
+ * 2006 (c) MontaVista Software, Inc.  This file is licensed under
+ * the terms of the GNU General Public License version 2.  This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef _PPC_BOOT_OPS_H_
+#define _PPC_BOOT_OPS_H_
+
+#include "types.h"
+
+#define	COMMAND_LINE_SIZE	512
+#define	MAX_PATH_LEN		256
+#define	MAX_PROP_LEN		256 /* What should this be? */
+
+/* Platform specific operations */
+struct platform_ops {
+	void	(*fixups)(void);
+	void	(*image_hdr)(const void *);
+	void *	(*malloc)(u32 size);
+	void	(*free)(void *ptr, u32 size);
+	void	(*exit)(void);
+};
+extern struct platform_ops platform_ops;
+
+/* Device Tree operations */
+struct dt_ops {
+	void *	(*finddevice)(const char *name);
+	int	(*getprop)(const void *node, const char *name, void *buf,
+			const int buflen);
+	int	(*setprop)(const void *node, const char *name,
+			const void *buf, const int buflen);
+	u64	(*translate_addr)(const char *path, const u32 *in_addr,
+			const u32 addr_len);
+	unsigned long (*ft_addr)(void);
+};
+extern struct dt_ops dt_ops;
+
+/* Console operations */
+struct console_ops {
+	int	(*open)(void);
+	void	(*write)(char *buf, int len);
+	void	(*edit_cmdline)(char *buf, int len);
+	void	(*close)(void);
+	void	*data;
+};
+extern struct console_ops console_ops;
+
+/* Serial console operations */
+struct serial_console_data {
+	int		(*open)(void);
+	void		(*putc)(unsigned char c);
+	unsigned char	(*getc)(void);
+	u8		(*tstc)(void);
+	void		(*close)(void);
+};
+
+extern int platform_init(void *promptr);
+extern void simple_alloc_init(void);
+extern void ft_init(void *dt_blob);
+extern int serial_console_init(void);
+
+static inline void *finddevice(const char *name)
+{
+	return (dt_ops.finddevice) ? dt_ops.finddevice(name) : NULL;
+}
+
+static inline int getprop(void *devp, const char *name, void *buf, int buflen)
+{
+	return (dt_ops.getprop) ? dt_ops.getprop(devp, name, buf, buflen) : -1;
+}
+
+static inline int setprop(void *devp, const char *name, void *buf, int buflen)
+{
+	return (dt_ops.setprop) ? dt_ops.setprop(devp, name, buf, buflen) : -1;
+}
+
+static inline void *malloc(u32 size)
+{
+	return (platform_ops.malloc) ? platform_ops.malloc(size) : NULL;
+}
+
+static inline void free(void *ptr, u32 size)
+{
+	if (platform_ops.free)
+		platform_ops.free(ptr, size);
+}
+
+static inline void exit(void)
+{
+	if (platform_ops.exit)
+		platform_ops.exit();
+	for(;;);
+}
+
+#endif /* _PPC_BOOT_OPS_H_ */
diff --git a/arch/powerpc/boot/prom.c b/arch/powerpc/boot/prom.c
deleted file mode 100644
index fa00577..0000000
--- a/arch/powerpc/boot/prom.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <stdarg.h>
-#include <stddef.h>
-#include "string.h"
-#include "stdio.h"
-#include "prom.h"
-
-int (*prom)(void *);
-phandle chosen_handle;
-ihandle stdout;
-
-int call_prom(const char *service, int nargs, int nret, ...)
-{
-	int i;
-	struct prom_args {
-		const char *service;
-		int nargs;
-		int nret;
-		unsigned int args[12];
-	} args;
-	va_list list;
-
-	args.service = service;
-	args.nargs = nargs;
-	args.nret = nret;
-
-	va_start(list, nret);
-	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, unsigned int);
-	va_end(list);
-
-	for (i = 0; i < nret; i++)
-		args.args[nargs+i] = 0;
-
-	if (prom(&args) < 0)
-		return -1;
-
-	return (nret > 0)? args.args[nargs]: 0;
-}
-
-int call_prom_ret(const char *service, int nargs, int nret,
-		  unsigned int *rets, ...)
-{
-	int i;
-	struct prom_args {
-		const char *service;
-		int nargs;
-		int nret;
-		unsigned int args[12];
-	} args;
-	va_list list;
-
-	args.service = service;
-	args.nargs = nargs;
-	args.nret = nret;
-
-	va_start(list, rets);
-	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, unsigned int);
-	va_end(list);
-
-	for (i = 0; i < nret; i++)
-		args.args[nargs+i] = 0;
-
-	if (prom(&args) < 0)
-		return -1;
-
-	if (rets != (void *) 0)
-		for (i = 1; i < nret; ++i)
-			rets[i-1] = args.args[nargs+i];
-
-	return (nret > 0)? args.args[nargs]: 0;
-}
-
-int write(void *handle, void *ptr, int nb)
-{
-	return call_prom("write", 3, 1, handle, ptr, nb);
-}
-
-/*
- * Older OF's require that when claiming a specific range of addresses,
- * we claim the physical space in the /memory node and the virtual
- * space in the chosen mmu node, and then do a map operation to
- * map virtual to physical.
- */
-static int need_map = -1;
-static ihandle chosen_mmu;
-static phandle memory;
-
-/* returns true if s2 is a prefix of s1 */
-static int string_match(const char *s1, const char *s2)
-{
-	for (; *s2; ++s2)
-		if (*s1++ != *s2)
-			return 0;
-	return 1;
-}
-
-static int check_of_version(void)
-{
-	phandle oprom, chosen;
-	char version[64];
-
-	oprom = finddevice("/openprom");
-	if (oprom == (phandle) -1)
-		return 0;
-	if (getprop(oprom, "model", version, sizeof(version)) <= 0)
-		return 0;
-	version[sizeof(version)-1] = 0;
-	printf("OF version = '%s'\r\n", version);
-	if (!string_match(version, "Open Firmware, 1.")
-	    && !string_match(version, "FirmWorks,3."))
-		return 0;
-	chosen = finddevice("/chosen");
-	if (chosen == (phandle) -1) {
-		chosen = finddevice("/chosen@0");
-		if (chosen == (phandle) -1) {
-			printf("no chosen\n");
-			return 0;
-		}
-	}
-	if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
-		printf("no mmu\n");
-		return 0;
-	}
-	memory = (ihandle) call_prom("open", 1, 1, "/memory");
-	if (memory == (ihandle) -1) {
-		memory = (ihandle) call_prom("open", 1, 1, "/memory@0");
-		if (memory == (ihandle) -1) {
-			printf("no memory node\n");
-			return 0;
-		}
-	}
-	printf("old OF detected\r\n");
-	return 1;
-}
-
-void *claim(unsigned long virt, unsigned long size, unsigned long align)
-{
-	int ret;
-	unsigned int result;
-
-	if (need_map < 0)
-		need_map = check_of_version();
-	if (align || !need_map)
-		return (void *) call_prom("claim", 3, 1, virt, size, align);
-	
-	ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory,
-			    align, size, virt);
-	if (ret != 0 || result == -1)
-		return (void *) -1;
-	ret = call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
-			    align, size, virt);
-	/* 0x12 == coherent + read/write */
-	ret = call_prom("call-method", 6, 1, "map", chosen_mmu,
-			0x12, size, virt, virt);
-	return (void *) virt;
-}
diff --git a/arch/powerpc/boot/prom.h b/arch/powerpc/boot/prom.h
deleted file mode 100644
index a57b184..0000000
--- a/arch/powerpc/boot/prom.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _PPC_BOOT_PROM_H_
-#define _PPC_BOOT_PROM_H_
-
-typedef void *phandle;
-typedef void *ihandle;
-
-extern int (*prom) (void *);
-extern phandle chosen_handle;
-extern ihandle stdout;
-
-int	call_prom(const char *service, int nargs, int nret, ...);
-int	call_prom_ret(const char *service, int nargs, int nret,
-		      unsigned int *rets, ...);
-
-extern int write(void *handle, void *ptr, int nb);
-extern void *claim(unsigned long virt, unsigned long size, unsigned long aln);
-
-static inline void exit(void)
-{
-	call_prom("exit", 0, 0);
-}
-
-static inline phandle finddevice(const char *name)
-{
-	return (phandle) call_prom("finddevice", 1, 1, name);
-}
-
-static inline int getprop(void *phandle, const char *name,
-			  void *buf, int buflen)
-{
-	return call_prom("getprop", 4, 1, phandle, name, buf, buflen);
-}
-
-
-static inline int setprop(void *phandle, const char *name,
-			  void *buf, int buflen)
-{
-	return call_prom("setprop", 4, 1, phandle, name, buf, buflen);
-}
-
-#endif				/* _PPC_BOOT_PROM_H_ */
diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
index b5aa522..6d5f638 100644
--- a/arch/powerpc/boot/stdio.c
+++ b/arch/powerpc/boot/stdio.c
@@ -10,7 +10,7 @@
 #include <stddef.h>
 #include "string.h"
 #include "stdio.h"
-#include "prom.h"
+#include "ops.h"
 
 size_t strnlen(const char * s, size_t count)
 {
@@ -320,6 +320,6 @@
 	va_start(args, fmt);
 	n = vsprintf(sprint_buf, fmt, args);
 	va_end(args);
-	write(stdout, sprint_buf, n);
+	console_ops.write(sprint_buf, n);
 	return n;
 }
diff --git a/arch/powerpc/boot/stdio.h b/arch/powerpc/boot/stdio.h
index eb9e16c..73b8a91 100644
--- a/arch/powerpc/boot/stdio.h
+++ b/arch/powerpc/boot/stdio.h
@@ -1,8 +1,16 @@
 #ifndef _PPC_BOOT_STDIO_H_
 #define _PPC_BOOT_STDIO_H_
 
+#include <stdarg.h>
+
+#define	ENOMEM		12	/* Out of Memory */
+#define	EINVAL		22	/* Invalid argument */
+#define ENOSPC		28	/* No space left on device */
+
 extern int printf(const char *fmt, ...);
 
+#define fprintf(fmt, args...)	printf(args)
+
 extern int sprintf(char *buf, const char *fmt, ...);
 
 extern int vsprintf(char *buf, const char *fmt, va_list args);
diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h
new file mode 100644
index 0000000..79d26e7
--- /dev/null
+++ b/arch/powerpc/boot/types.h
@@ -0,0 +1,23 @@
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef unsigned char		u8;
+typedef unsigned short		u16;
+typedef unsigned int		u32;
+typedef unsigned long long	u64;
+
+#define min(x,y) ({ \
+	typeof(x) _x = (x);	\
+	typeof(y) _y = (y);	\
+	(void) (&_x == &_y);	\
+	_x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+	typeof(x) _x = (x);	\
+	typeof(y) _y = (y);	\
+	(void) (&_x == &_y);	\
+	_x > _y ? _x : _y; })
+
+#endif /* _TYPES_H_ */
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index 2860be1..62ba660 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -496,7 +496,7 @@
 # CONFIG_SKY2 is not set
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
+CONFIG_TIGON3=y
 # CONFIG_BNX2 is not set
 # CONFIG_MV643XX_ETH is not set
 
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7d32ad0..8b133af 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -16,7 +16,7 @@
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o binfmt_elf32.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
-				   paca.o cpu_setup_power4.o \
+				   paca.o cpu_setup_ppc970.o \
 				   firmware.o sysfs.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
@@ -51,7 +51,7 @@
 extra-y				+= vmlinux.lds
 
 obj-y				+= time.o prom.o traps.o setup-common.o \
-				   udbg.o misc.o
+				   udbg.o misc.o io.o
 obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o misc_32.o
 obj-$(CONFIG_PPC64)		+= misc_64.o dma_64.o iommu.o
 obj-$(CONFIG_PPC_MULTIPLATFORM)	+= prom_init.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7ee8496..d06f378 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -40,9 +40,10 @@
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #include <asm/lppaca.h>
-#include <asm/iseries/hv_lp_event.h>
 #include <asm/cache.h>
 #include <asm/compat.h>
+#include <asm/mmu.h>
+#include <asm/hvcall.h>
 #endif
 
 #define DEFINE(sym, val) \
@@ -136,11 +137,18 @@
 	DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
+	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 
+	DEFINE(SLBSHADOW_STACKVSID,
+	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
+	DEFINE(SLBSHADOW_STACKESID,
+	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
 	DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
 	DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
+	DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
 #endif /* CONFIG_PPC64 */
 
 	/* RTAS */
@@ -159,6 +167,12 @@
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+	/* hcall statistics */
+	DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+	DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+	DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+	DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
 #endif /* CONFIG_PPC64 */
 	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
 	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
@@ -240,6 +254,7 @@
 	DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value));
 	DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
 	DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
+	DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
 
 #ifndef CONFIG_PPC64
 	DEFINE(pbe_address, offsetof(struct pbe, address));
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index f4e5e14e..995fcef 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -158,35 +158,35 @@
 {
 	unsigned int width, height, depth, pitch;
 	unsigned long address = 0;
-	u32 *prop;
+	const u32 *prop;
 
-	prop = (u32 *)get_property(np, "linux,bootx-width", NULL);
+	prop = get_property(np, "linux,bootx-width", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "width", NULL);
+		prop = get_property(np, "width", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	width = *prop;
-	prop = (u32 *)get_property(np, "linux,bootx-height", NULL);
+	prop = get_property(np, "linux,bootx-height", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "height", NULL);
+		prop = get_property(np, "height", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	height = *prop;
-	prop = (u32 *)get_property(np, "linux,bootx-depth", NULL);
+	prop = get_property(np, "linux,bootx-depth", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "depth", NULL);
+		prop = get_property(np, "depth", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	depth = *prop;
 	pitch = width * ((depth + 7) / 8);
-	prop = (u32 *)get_property(np, "linux,bootx-linebytes", NULL);
+	prop = get_property(np, "linux,bootx-linebytes", NULL);
 	if (prop == NULL)
-		prop = (u32 *)get_property(np, "linebytes", NULL);
+		prop = get_property(np, "linebytes", NULL);
 	if (prop)
 		pitch = *prop;
 	if (pitch == 1)
 		pitch = 0x1000;
-	prop = (u32 *)get_property(np, "address", NULL);
+	prop = get_property(np, "address", NULL);
 	if (prop)
 		address = *prop;
 
@@ -214,11 +214,11 @@
 
 int __init btext_find_display(int allow_nonstdout)
 {
-	char *name;
+	const char *name;
 	struct device_node *np = NULL; 
 	int rc = -ENODEV;
 
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	name = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (name != NULL) {
 		np = of_find_node_by_path(name);
 		if (np != NULL) {
diff --git a/arch/powerpc/kernel/cpu_setup_power4.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
similarity index 73%
rename from arch/powerpc/kernel/cpu_setup_power4.S
rename to arch/powerpc/kernel/cpu_setup_ppc970.S
index 76e97aa..6525948 100644
--- a/arch/powerpc/kernel/cpu_setup_power4.S
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -16,27 +16,12 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 
-_GLOBAL(__970_cpu_preinit)
-	/*
-	 * Do nothing if not running in HV mode
-	 */
+_GLOBAL(__cpu_preinit_ppc970)
+	/* Do nothing if not running in HV mode */
 	mfmsr	r0
 	rldicl.	r0,r0,4,63
 	beqlr
 
-	/*
-	 * Deal only with PPC970 and PPC970FX.
-	 */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bnelr
-1:
-
 	/* Make sure HID4:rm_ci is off before MMU is turned off, that large
 	 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
 	 * HID5:DCBZ32_ill
@@ -72,23 +57,6 @@
 	isync
 	blr
 
-_GLOBAL(__setup_cpu_ppc970)
-	mfspr	r0,SPRN_HID0
-	li	r11,5			/* clear DOZE and SLEEP */
-	rldimi	r0,r11,52,8		/* set NAP and DPM */
-	li	r11,0
-	rldimi	r0,r11,32,31		/* clear EN_ATTN */
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	sync
-	isync
-	blr
-
 /* Definitions for the table use to save CPU states */
 #define CS_HID0		0
 #define CS_HID1		8
@@ -103,33 +71,30 @@
 	.balign	L1_CACHE_BYTES,0
 	.text
 
-/* Called in normal context to backup CPU 0 state. This
- * does not include cache settings. This function is also
- * called for machine sleep. This does not include the MMU
- * setup, BATs, etc... but rather the "special" registers
- * like HID0, HID1, HID4, etc...
- */
-_GLOBAL(__save_cpu_setup)
-	/* Some CR fields are volatile, we back it up all */
-	mfcr	r7
 
-	/* Get storage ptr */
-	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
-
-	/* We only deal with 970 for now */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bne	2f
-
-1:	/* skip if not running in HV mode */
+_GLOBAL(__setup_cpu_ppc970)
+	/* Do nothing if not running in HV mode */
 	mfmsr	r0
 	rldicl.	r0,r0,4,63
-	beq	2f
+	beqlr
+
+	mfspr	r0,SPRN_HID0
+	li	r11,5			/* clear DOZE and SLEEP */
+	rldimi	r0,r11,52,8		/* set NAP and DPM */
+	li	r11,0
+	rldimi	r0,r11,32,31		/* clear EN_ATTN */
+	mtspr	SPRN_HID0,r0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	mfspr	r0,SPRN_HID0
+	sync
+	isync
+
+	/* Save away cpu state */
+	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
 
 	/* Save HID0,1,4 and 5 */
 	mfspr	r3,SPRN_HID0
@@ -141,35 +106,19 @@
 	mfspr	r3,SPRN_HID5
 	std	r3,CS_HID5(r5)
 
-2:
-	mtcr	r7
 	blr
 
 /* Called with no MMU context (typically MSR:IR/DR off) to
  * restore CPU state as backed up by the previous
  * function. This does not include cache setting
  */
-_GLOBAL(__restore_cpu_setup)
-	/* Get storage ptr (FIXME when using anton reloc as we
-	 * are running with translation disabled here
-	 */
-	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
-
-	/* We only deal with 970 for now */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39
-	beq	1f
-	cmpwi	r0,0x3c
-	beq	1f
-	cmpwi	r0,0x44
-	bnelr
-
-1:	/* skip if not running in HV mode */
+_GLOBAL(__restore_cpu_ppc970)
+	/* Do nothing if not running in HV mode */
 	mfmsr	r0
 	rldicl.	r0,r0,4,63
 	beqlr
 
+	LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
 	/* Before accessing memory, we make sure rm_ci is clear */
 	li	r0,0
 	mfspr	r3,SPRN_HID4
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 272e436..190a57e 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -39,7 +39,10 @@
 extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
 #endif /* CONFIG_PPC32 */
+#ifdef CONFIG_PPC64
 extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_ppc970(void);
+#endif /* CONFIG_PPC64 */
 
 /* This table only contains "desktop" CPUs, it need to be filled with embedded
  * ones as well...
@@ -55,6 +58,9 @@
 #define COMMON_USER_POWER6	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
 				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
 				 PPC_FEATURE_TRUE_LE)
+#define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_HAS_ALTIVEC_COMP)
 #define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
 				 PPC_FEATURE_BOOKE)
 
@@ -184,6 +190,7 @@
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
 		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
 		.oprofile_cpu_type	= "ppc64/970",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
@@ -199,6 +206,7 @@
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
 		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
 		.oprofile_cpu_type	= "ppc64/970",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
@@ -214,6 +222,7 @@
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
 		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
 		.oprofile_cpu_type	= "ppc64/970",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
@@ -280,6 +289,17 @@
 		.dcache_bsize		= 128,
 		.platform		= "ppc-cell-be",
 	},
+	{	/* PA Semi PA6T */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00900000,
+		.cpu_name		= "PA6T",
+		.cpu_features		= CPU_FTRS_PA6T,
+		.cpu_user_features	= COMMON_USER_PA6T,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 6,
+		.platform		= "pa6t",
+	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
@@ -929,6 +949,7 @@
 			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
+		.platform		= "ppc405",
 	},
 	{	/* 405EP */
 		.pvr_mask		= 0xffff0000,
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 371973b..2f6f5a7 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -80,7 +80,7 @@
 }
 __setup("savemaxmem=", parse_savemaxmem);
 
-/*
+/**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
  * @buf: target memory address for the copy; this can be in kernel address
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 36aaa76..6c168f6 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -35,10 +35,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->dma_supported(dev, mask);
-	BUG();
-	return 0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->dma_supported(dev, mask);
 }
 EXPORT_SYMBOL(dma_supported);
 
@@ -66,10 +65,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-	BUG();
-	return NULL;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
 }
 EXPORT_SYMBOL(dma_alloc_coherent);
 
@@ -78,10 +76,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
 }
 EXPORT_SYMBOL(dma_free_coherent);
 
@@ -90,10 +87,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->map_single(dev, cpu_addr, size, direction);
-	BUG();
-	return (dma_addr_t)0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->map_single(dev, cpu_addr, size, direction);
 }
 EXPORT_SYMBOL(dma_map_single);
 
@@ -102,10 +98,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->unmap_single(dev, dma_addr, size, direction);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->unmap_single(dev, dma_addr, size, direction);
 }
 EXPORT_SYMBOL(dma_unmap_single);
 
@@ -115,11 +110,10 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->map_single(dev,
-				(page_address(page) + offset), size, direction);
-	BUG();
-	return (dma_addr_t)0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->map_single(dev, page_address(page) + offset, size,
+			direction);
 }
 EXPORT_SYMBOL(dma_map_page);
 
@@ -128,10 +122,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->unmap_single(dev, dma_address, size, direction);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->unmap_single(dev, dma_address, size, direction);
 }
 EXPORT_SYMBOL(dma_unmap_page);
 
@@ -140,10 +133,9 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		return dma_ops->map_sg(dev, sg, nents, direction);
-	BUG();
-	return 0;
+	BUG_ON(!dma_ops);
+
+	return dma_ops->map_sg(dev, sg, nents, direction);
 }
 EXPORT_SYMBOL(dma_map_sg);
 
@@ -152,9 +144,8 @@
 {
 	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
 
-	if (dma_ops)
-		dma_ops->unmap_sg(dev, sg, nhwentries, direction);
-	else
-		BUG();
+	BUG_ON(!dma_ops);
+
+	dma_ops->unmap_sg(dev, sg, nhwentries, direction);
 }
 EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 54d9f5c..2cd872b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -375,6 +375,14 @@
 	ld	r7,KSP_VSID(r4)	/* Get new stack's VSID */
 	oris	r0,r6,(SLB_ESID_V)@h
 	ori	r0,r0,(SLB_NUM_BOLTED-1)@l
+
+	/* Update the last bolted SLB */
+	ld	r9,PACA_SLBSHADOWPTR(r13)
+	li	r12,0
+	std	r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
+	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
+
 	slbie	r6
 	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
 	slbmte	r7,r0
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 6ff3cf5..3065b47 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -132,7 +132,7 @@
 	bne	100b
 
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
-	LOAD_REG_IMMEDIATE(r4, .pSeries_secondary_smp_init)
+	LOAD_REG_IMMEDIATE(r4, .generic_secondary_smp_init)
 	mtctr	r4
 	mr	r3,r24
 	bctr
@@ -1484,19 +1484,17 @@
         . = 0x8000
 
 /*
- * On pSeries, secondary processors spin in the following code.
+ * On pSeries and most other platforms, secondary processors spin
+ * in the following code.
  * At entry, r3 = this processor's number (physical cpu id)
  */
-_GLOBAL(pSeries_secondary_smp_init)
+_GLOBAL(generic_secondary_smp_init)
 	mr	r24,r3
 	
 	/* turn on 64-bit mode */
 	bl	.enable_64b_mode
 	isync
 
-	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_setup
-
 	/* Set up a paca value for this processor. Since we have the
 	 * physical cpu id in r24, we need to search the pacas to find
 	 * which logical id maps to our physical one.
@@ -1522,15 +1520,28 @@
 					/* start.			 */
 	sync
 
-	/* Create a temp kernel stack for use before relocation is on.	*/
+#ifndef CONFIG_SMP
+	b	3b			/* Never go on non-SMP		 */
+#else
+	cmpwi	0,r23,0
+	beq	3b			/* Loop until told to go	 */
+
+	/* See if we need to call a cpu state restore handler */
+	LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
+	ld	r23,0(r23)
+	ld	r23,CPU_SPEC_RESTORE(r23)
+	cmpdi	0,r23,0
+	beq	4f
+	ld	r23,0(r23)
+	mtctr	r23
+	bctrl
+
+4:	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0
-#ifdef CONFIG_SMP
-	bne	.__secondary_start
+	b	.__secondary_start
 #endif
-	b 	3b			/* Loop until told to go	 */
 
 #ifdef CONFIG_PPC_ISERIES
 _STATIC(__start_initialization_iSeries)
@@ -1611,7 +1622,16 @@
 	bl	.enable_64b_mode
 
 	/* Setup some critical 970 SPRs before switching MMU off */
-	bl	.__970_cpu_preinit
+	mfspr	r0,SPRN_PVR
+	srwi	r0,r0,16
+	cmpwi	r0,0x39		/* 970 */
+	beq	1f
+	cmpwi	r0,0x3c		/* 970FX */
+	beq	1f
+	cmpwi	r0,0x44		/* 970MP */
+	bne	2f
+1:	bl	.__cpu_preinit_ppc970
+2:
 
 	/* Switch off MMU if not already */
 	LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
@@ -1728,7 +1748,7 @@
 _GLOBAL(copy_and_flush)
 	addi	r5,r5,-8
 	addi	r6,r6,-8
-4:	li	r0,16			/* Use the least common		*/
+4:	li	r0,8			/* Use the smallest common	*/
 					/* denominator cache line	*/
 					/* size.  This results in	*/
 					/* extra cache line flushes	*/
@@ -1782,7 +1802,7 @@
 	isync
 
 	/* Copy some CPU settings from CPU 0 */
-	bl	.__restore_cpu_setup
+	bl	.__restore_cpu_ppc970
 
 	/* pSeries do that early though I don't think we really need it */
 	mfmsr	r3
@@ -1932,12 +1952,6 @@
 	mr	r5,r26
 	bl	.identify_cpu
 
-	/* Save some low level config HIDs of CPU0 to be copied to
-	 * other CPUs later on, or used for suspend/resume
-	 */
-	bl	.__save_cpu_setup
-	sync
-
 	/* Do very early kernel initializations, including initial hash table,
 	 * stab and slb setup before we turn on relocation.	*/
 
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 68e5ab0..124dbcb 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -167,7 +167,7 @@
 		   NULL);
 
 static struct ibmebus_dev* __devinit ibmebus_register_device_common(
-	struct ibmebus_dev *dev, char *name)
+	struct ibmebus_dev *dev, const char *name)
 {
 	int err = 0;
 
@@ -194,10 +194,10 @@
 	struct device_node *dn)
 {
 	struct ibmebus_dev *dev;
-	char *loc_code;
+	const char *loc_code;
 	int length;
 
-	loc_code = (char *)get_property(dn, "ibm,loc-code", NULL);
+	loc_code = get_property(dn, "ibm,loc-code", NULL);
 	if (!loc_code) {
                 printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n",
 		       __FUNCTION__, dn->name ? dn->name : "<unknown>");
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
new file mode 100644
index 0000000..e981806
--- /dev/null
+++ b/arch/powerpc/kernel/io.c
@@ -0,0 +1,131 @@
+/*
+ * I/O string operations
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *    Copyright (C) 2006 IBM Corporation
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ *
+ * Rewritten in C by Stephen Rothwell.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/firmware.h>
+#include <asm/bug.h>
+
+void _insb(volatile u8 __iomem *port, void *buf, long count)
+{
+	u8 *tbuf = buf;
+	u8 tmp;
+
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		asm volatile("eieio");
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insb);
+
+void _outsb(volatile u8 __iomem *port, const void *buf, long count)
+{
+	const u8 *tbuf = buf;
+
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsb);
+
+void _insw_ns(volatile u16 __iomem *port, void *buf, long count)
+{
+	u16 *tbuf = buf;
+	u16 tmp;
+
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		asm volatile("eieio");
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insw_ns);
+
+void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
+{
+	const u16 *tbuf = buf;
+
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsw_ns);
+
+void _insl_ns(volatile u32 __iomem *port, void *buf, long count)
+{
+	u32 *tbuf = buf;
+	u32 tmp;
+
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		tmp = *port;
+		asm volatile("eieio");
+		*tbuf++ = tmp;
+	} while (--count != 0);
+	asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+}
+EXPORT_SYMBOL(_insl_ns);
+
+void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
+{
+	const u32 *tbuf = buf;
+
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	if (unlikely(count <= 0))
+		return;
+	asm volatile("sync");
+	do {
+		*port = *tbuf++;
+	} while (--count != 0);
+	asm volatile("sync");
+}
+EXPORT_SYMBOL(_outsl_ns);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 12c5971..b443233 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -52,6 +52,7 @@
 #include <linux/radix-tree.h>
 #include <linux/mutex.h>
 #include <linux/bootmem.h>
+#include <linux/pci.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -875,12 +876,14 @@
 	else
 		return -1;
 }
+EXPORT_SYMBOL(pci_enable_msi);
 
 void pci_disable_msi(struct pci_dev * pdev)
 {
 	if (ppc_md.disable_msi)
 		ppc_md.disable_msi(pdev);
 }
+EXPORT_SYMBOL(pci_disable_msi);
 
 void pci_scan_msi_device(struct pci_dev *dev) {}
 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) {return -1;}
@@ -888,6 +891,8 @@
 void msi_remove_pci_irq_vectors(struct pci_dev *dev) {}
 void disable_msi_mode(struct pci_dev *dev, int pos, int type) {}
 void pci_no_msi(void) {}
+EXPORT_SYMBOL(pci_enable_msix);
+EXPORT_SYMBOL(pci_disable_msix);
 
 #endif
 
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 40a3929..5e6ddfa 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -39,16 +39,17 @@
 				  phys_addr_t taddr, unsigned long irq,
 				  upf_t flags, int irq_check_parent)
 {
-	u32 *clk, *spd, clock = BASE_BAUD * 16;
+	const u32 *clk, *spd;
+	u32 clock = BASE_BAUD * 16;
 	int index;
 
 	/* get clock freq. if present */
-	clk = (u32 *)get_property(np, "clock-frequency", NULL);
+	clk = get_property(np, "clock-frequency", NULL);
 	if (clk && *clk)
 		clock = *clk;
 
 	/* get default speed if present */
-	spd = (u32 *)get_property(np, "current-speed", NULL);
+	spd = get_property(np, "current-speed", NULL);
 
 	/* If we have a location index, then try to use it */
 	if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
@@ -113,7 +114,7 @@
 				      struct device_node *soc_dev)
 {
 	u64 addr;
-	u32 *addrp;
+	const u32 *addrp;
 	upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
 	struct device_node *tsi = of_get_parent(np);
 
@@ -144,15 +145,15 @@
 static int __init add_legacy_isa_port(struct device_node *np,
 				      struct device_node *isa_brg)
 {
-	u32 *reg;
-	char *typep;
+	const u32 *reg;
+	const char *typep;
 	int index = -1;
 	u64 taddr;
 
 	DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
 
 	/* Get the ISA port number */
-	reg = (u32 *)get_property(np, "reg", NULL);
+	reg = get_property(np, "reg", NULL);
 	if (reg == NULL)
 		return -1;
 
@@ -163,7 +164,7 @@
 	/* Now look for an "ibm,aix-loc" property that gives us ordering
 	 * if any...
 	 */
-	typep = (char *)get_property(np, "ibm,aix-loc", NULL);
+	typep = get_property(np, "ibm,aix-loc", NULL);
 
 	/* If we have a location index, then use it */
 	if (typep && *typep == 'S')
@@ -188,7 +189,7 @@
 				      struct device_node *pci_dev)
 {
 	u64 addr, base;
-	u32 *addrp;
+	const u32 *addrp;
 	unsigned int flags;
 	int iotype, index = -1, lindex = 0;
 
@@ -227,7 +228,7 @@
 	 * we get to their "reg" property
 	 */
 	if (np != pci_dev) {
-		u32 *reg = (u32 *)get_property(np, "reg", NULL);
+		const u32 *reg = get_property(np, "reg", NULL);
 		if (reg && (*reg < 4))
 			index = lindex = *reg;
 	}
@@ -285,13 +286,13 @@
 void __init find_legacy_serial_ports(void)
 {
 	struct device_node *np, *stdout = NULL;
-	char *path;
+	const char *path;
 	int index;
 
 	DBG(" -> find_legacy_serial_port()\n");
 
 	/* Now find out if one of these is out firmware console */
-	path = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	path = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (path != NULL) {
 		stdout = of_find_node_by_path(path);
 		if (stdout)
@@ -491,8 +492,8 @@
 {
 	struct device_node *prom_stdout = NULL;
 	int speed = 0, offset = 0;
-	char *name;
-	u32 *spd;
+	const char *name;
+	const u32 *spd;
 
 	DBG(" -> check_legacy_serial_console()\n");
 
@@ -513,7 +514,7 @@
 	}
 	/* We are getting a weird phandle from OF ... */
 	/* ... So use the full path instead */
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	name = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (name == NULL) {
 		DBG(" no linux,stdout-path !\n");
 		return -ENODEV;
@@ -525,12 +526,12 @@
 	}
 	DBG("stdout is %s\n", prom_stdout->full_name);
 
-	name = (char *)get_property(prom_stdout, "name", NULL);
+	name = get_property(prom_stdout, "name", NULL);
 	if (!name) {
 		DBG(" stdout package has no name !\n");
 		goto not_found;
 	}
-	spd = (u32 *)get_property(prom_stdout, "current-speed", NULL);
+	spd = get_property(prom_stdout, "current-speed", NULL);
 	if (spd)
 		speed = *spd;
 
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 23f34da..41c05dc 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -32,7 +32,6 @@
 #include <asm/rtas.h>
 #include <asm/system.h>
 #include <asm/time.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/prom.h>
 #include <asm/vdso_datapage.h>
 
@@ -183,8 +182,14 @@
 			      unsigned long *resource)
 {
 	unsigned long rc;
-	rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated,
-			      aggregation, resource);
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_GET_PPP, retbuf);
+
+	*entitled = retbuf[0];
+	*unallocated = retbuf[1];
+	*aggregation = retbuf[2];
+	*resource = retbuf[3];
 
 	log_plpar_hcall_return(rc, "H_GET_PPP");
 
@@ -194,8 +199,12 @@
 static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
 {
 	unsigned long rc;
-	unsigned long dummy;
-	rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_PIC, retbuf);
+
+	*pool_idle_time = retbuf[0];
+	*num_procs = retbuf[1];
 
 	if (rc != H_AUTHORITY)
 		log_plpar_hcall_return(rc, "H_PIC");
@@ -310,12 +319,11 @@
 	int partition_potential_processors;
 	int partition_active_processors;
 	struct device_node *rtas_node;
-	int *lrdrp = NULL;
+	const int *lrdrp = NULL;
 
 	rtas_node = find_path_device("/rtas");
 	if (rtas_node)
-		lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity",
-		                            NULL);
+		lrdrp = get_property(rtas_node, "ibm,lrdr-capacity", NULL);
 
 	if (lrdrp == NULL) {
 		partition_potential_processors = vdso_data->processorCount;
@@ -520,7 +528,8 @@
 	const char *model = "";
 	const char *system_id = "";
 	const char *tmp;
-	unsigned int *lp_index_ptr, lp_index = 0;
+	const unsigned int *lp_index_ptr;
+	unsigned int lp_index = 0;
 
 	seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
 
@@ -540,8 +549,7 @@
 			if (firmware_has_feature(FW_FEATURE_ISERIES))
 				system_id += 4;
 		}
-		lp_index_ptr = (unsigned int *)
-			get_property(rootdn, "ibm,partition-no", NULL);
+		lp_index_ptr = get_property(rootdn, "ibm,partition-no", NULL);
 		if (lp_index_ptr)
 			lp_index = *lp_index_ptr;
 	}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index be58985..a24b09c 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -31,8 +31,8 @@
 	unsigned long begin, end;	/* limits of segment */
 	unsigned long low, high;	/* limits of blocked memory range */
 	struct device_node *node;
-	unsigned long *basep;
-	unsigned int *sizep;
+	const unsigned long *basep;
+	const unsigned int *sizep;
 
 	if (!ppc_md.hpte_clear_all)
 		return -ENOENT;
@@ -72,10 +72,8 @@
 	/* We also should not overwrite the tce tables */
 	for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
 			node = of_find_node_by_type(node, "pci")) {
-		basep = (unsigned long *)get_property(node, "linux,tce-base",
-							NULL);
-		sizep = (unsigned int *)get_property(node, "linux,tce-size",
-							NULL);
+		basep = get_property(node, "linux,tce-base", NULL);
+		sizep = get_property(node, "linux,tce-size", NULL);
 		if (basep == NULL || sizep == NULL)
 			continue;
 
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index f770805..330c9dc 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -43,162 +43,3 @@
 	add	r3,r3,r5
 	mtlr	r0
 	blr
-
-/*
- * I/O string operations
- *
- * insb(port, buf, len)
- * outsb(port, buf, len)
- * insw(port, buf, len)
- * outsw(port, buf, len)
- * insl(port, buf, len)
- * outsl(port, buf, len)
- * insw_ns(port, buf, len)
- * outsw_ns(port, buf, len)
- * insl_ns(port, buf, len)
- * outsl_ns(port, buf, len)
- *
- * The *_ns versions don't do byte-swapping.
- */
-_GLOBAL(_insb)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,1
-	blelr-
-00:	lbz	r5,0(r3)
-	eieio
-	stbu	r5,1(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsb)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,1
-	blelr-
-	sync
-00:	lbzu	r5,1(r4)
-	stb	r5,0(r3)
-	bdnz	00b
-	sync
-	blr
-
-_GLOBAL(_insw)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhbrx	r5,0,r3
-	eieio
-	sthu	r5,2(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-	sync
-00:	lhzu	r5,2(r4)
-	sthbrx	r5,0,r3
-	bdnz	00b
-	sync
-	blr
-
-_GLOBAL(_insl)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwbrx	r5,0,r3
-	eieio
-	stwu	r5,4(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-_GLOBAL(_outsl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-	sync
-00:	lwzu	r5,4(r4)
-	stwbrx	r5,0,r3
-	bdnz	00b
-	sync
-	blr
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_insw)
-#endif
-_GLOBAL(_insw_ns)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhz	r5,0(r3)
-	eieio
-	sthu	r5,2(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_outsw)
-#endif
-_GLOBAL(_outsw_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-	sync
-00:	lhzu	r5,2(r4)
-	sth	r5,0(r3)
-	bdnz	00b
-	sync
-	blr
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_insl)
-#endif
-_GLOBAL(_insl_ns)
-	sync
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwz	r5,0(r3)
-	eieio
-	stwu	r5,4(r4)
-	bdnz	00b
-	twi	0,r5,0
-	isync
-	blr
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__ide_mm_outsl)
-#endif
-_GLOBAL(_outsl_ns)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-	sync
-00:	lwzu	r5,4(r4)
-	stw	r5,0(r3)
-	bdnz	00b
-	sync
-	blr
-
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
index 3262b73..397c83e 100644
--- a/arch/powerpc/kernel/of_device.c
+++ b/arch/powerpc/kernel/of_device.c
@@ -189,27 +189,9 @@
 int of_device_register(struct of_device *ofdev)
 {
 	int rc;
-	struct of_device **odprop;
 
 	BUG_ON(ofdev->node == NULL);
 
-	odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL);
-	if (!odprop) {
-		struct property *new_prop;
-	
-		new_prop = kmalloc(sizeof(struct property) + sizeof(struct of_device *),
-			GFP_KERNEL);
-		if (new_prop == NULL)
-			return -ENOMEM;
-		new_prop->name = "linux,device";
-		new_prop->length = sizeof(sizeof(struct of_device *));
-		new_prop->value = (unsigned char *)&new_prop[1];
-		odprop = (struct of_device **)new_prop->value;
-		*odprop = NULL;
-		prom_add_property(ofdev->node, new_prop);
-	}
-	*odprop = ofdev;
-
 	rc = device_register(&ofdev->dev);
 	if (rc)
 		return rc;
@@ -221,14 +203,8 @@
 
 void of_device_unregister(struct of_device *ofdev)
 {
-	struct of_device **odprop;
-
 	device_remove_file(&ofdev->dev, &dev_attr_devspec);
 
-	odprop = (struct of_device **)get_property(ofdev->node, "linux,device", NULL);
-	if (odprop)
-		*odprop = NULL;
-
 	device_unregister(&ofdev->dev);
 }
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index c68741f..55f1a25 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -17,6 +17,7 @@
 #include <asm/lppaca.h>
 #include <asm/iseries/it_lp_reg_save.h>
 #include <asm/paca.h>
+#include <asm/mmu.h>
 
 
 /* This symbol is provided by the linker - let it fill in the paca
@@ -45,6 +46,17 @@
 	},
 };
 
+/*
+ * 3 persistent SLBs are registered here.  The buffer will be zero
+ * initially, hence will all be invaild until we actually write them.
+ */
+struct slb_shadow slb_shadow[] __cacheline_aligned = {
+	[0 ... (NR_CPUS-1)] = {
+		.persistent = SLB_NUM_BOLTED,
+		.buffer_length = sizeof(struct slb_shadow),
+	},
+};
+
 /* The Paca is an array with one entry per processor.  Each contains an
  * lppaca, which contains the information shared between the
  * hypervisor and Linux.
@@ -59,7 +71,8 @@
 	.lock_token = 0x8000,						    \
 	.paca_index = (number),		/* Paca Index */		    \
 	.kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL,		    \
-	.hw_cpu_id = 0xffff,
+	.hw_cpu_id = 0xffff,						    \
+	.slb_shadow_ptr = &slb_shadow[number],
 
 #ifdef CONFIG_PPC_ISERIES
 #define PACA_INIT_ISERIES(number)					    \
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 09b1e1b..9b49f86 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -633,12 +633,12 @@
 static void
 make_one_node_map(struct device_node* node, u8 pci_bus)
 {
-	int *bus_range;
+	const int *bus_range;
 	int len;
 
 	if (pci_bus >= pci_bus_count)
 		return;
-	bus_range = (int *) get_property(node, "bus-range", &len);
+	bus_range = get_property(node, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, "
 		       "assuming it starts at 0\n", node->full_name);
@@ -648,13 +648,13 @@
 
 	for (node=node->child; node != 0;node = node->sibling) {
 		struct pci_dev* dev;
-		unsigned int *class_code, *reg;
+		const unsigned int *class_code, *reg;
 	
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
 			continue;
-		reg = (unsigned int *)get_property(node, "reg", NULL);
+		reg = get_property(node, "reg", NULL);
 		if (!reg)
 			continue;
 		dev = pci_find_slot(pci_bus, ((reg[0] >> 8) & 0xff));
@@ -669,7 +669,7 @@
 {
 	int i;
 	struct pci_controller* hose;
-	u8* of_prop_map;
+	struct property *map_prop;
 
 	pci_to_OF_bus_map = (u8*)kmalloc(pci_bus_count, GFP_KERNEL);
 	if (!pci_to_OF_bus_map) {
@@ -691,9 +691,12 @@
 			continue;
 		make_one_node_map(node, hose->first_busno);
 	}
-	of_prop_map = get_property(find_path_device("/"), "pci-OF-bus-map", NULL);
-	if (of_prop_map)
-		memcpy(of_prop_map, pci_to_OF_bus_map, pci_bus_count);
+	map_prop = of_find_property(find_path_device("/"),
+			"pci-OF-bus-map", NULL);
+	if (map_prop) {
+		BUG_ON(pci_bus_count > map_prop->length);
+		memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count);
+	}
 #ifdef DEBUG
 	printk("PCI->OF bus map:\n");
 	for (i=0; i<pci_bus_count; i++) {
@@ -712,7 +715,7 @@
 	struct device_node* sub_node;
 
 	for (; node != 0;node = node->sibling) {
-		unsigned int *class_code;
+		const unsigned int *class_code;
 	
 		if (filter(node, data))
 			return node;
@@ -722,7 +725,7 @@
 		 * a fake root for all functions of a multi-function device,
 		 * we go down them as well.
 		 */
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if ((!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) &&
 			strcmp(node->name, "multifunc-device"))
@@ -737,10 +740,10 @@
 static int
 scan_OF_pci_childs_iterator(struct device_node* node, void* data)
 {
-	unsigned int *reg;
+	const unsigned int *reg;
 	u8* fdata = (u8*)data;
 	
-	reg = (unsigned int *) get_property(node, "reg", NULL);
+	reg = get_property(node, "reg", NULL);
 	if (reg && ((reg[0] >> 8) & 0xff) == fdata[1]
 		&& ((reg[0] >> 16) & 0xff) == fdata[0])
 		return 1;
@@ -841,7 +844,7 @@
 int
 pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn)
 {
-	unsigned int *reg;
+	const unsigned int *reg;
 	struct pci_controller* hose;
 	struct pci_dev* dev = NULL;
 	
@@ -854,7 +857,7 @@
 	if (!scan_OF_pci_childs(((struct device_node*)hose->arch_data)->child,
 			find_OF_pci_device_filter, (void *)node))
 		return -ENODEV;
-	reg = (unsigned int *) get_property(node, "reg", NULL);
+	reg = get_property(node, "reg", NULL);
 	if (!reg)
 		return -ENODEV;
 	*bus = (reg[0] >> 16) & 0xff;
@@ -885,8 +888,8 @@
 			   struct device_node *dev, int primary)
 {
 	static unsigned int static_lc_ranges[256] __initdata;
-	unsigned int *dt_ranges, *lc_ranges, *ranges, *prev;
-	unsigned int size;
+	const unsigned int *dt_ranges;
+	unsigned int *lc_ranges, *ranges, *prev, size;
 	int rlen = 0, orig_rlen;
 	int memno = 0;
 	struct resource *res;
@@ -897,7 +900,7 @@
 	 * that can have more than 3 ranges, fortunately using contiguous
 	 * addresses -- BenH
 	 */
-	dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+	dt_ranges = get_property(dev, "ranges", &rlen);
 	if (!dt_ranges)
 		return;
 	/* Sanity check, though hopefully that never happens */
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 138134c..c1b1e14 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -185,34 +185,6 @@
 	spin_unlock(&hose_spinlock);
 }
 
-static void add_linux_pci_domain(struct device_node *dev,
-				 struct pci_controller *phb)
-{
-	struct property *of_prop;
-	unsigned int size;
-
-	of_prop = (struct property *)
-		get_property(dev, "linux,pci-domain", &size);
-	if (of_prop != NULL)
-		return;
-	WARN_ON(of_prop && size < sizeof(int));
-	if (of_prop && size < sizeof(int))
-		of_prop = NULL;
-	size = sizeof(struct property) + sizeof(int);
-	if (of_prop == NULL) {
-		if (mem_init_done)
-			of_prop = kmalloc(size, GFP_KERNEL);
-		else
-			of_prop = alloc_bootmem(size);
-	}
-	memset(of_prop, 0, sizeof(struct property));
-	of_prop->name = "linux,pci-domain";
-	of_prop->length = sizeof(int);
-	of_prop->value = (unsigned char *)&of_prop[1];
-	*((int *)of_prop->value) = phb->global_number;
-	prom_add_property(dev, of_prop);
-}
-
 struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
 {
 	struct pci_controller *phb;
@@ -226,22 +198,13 @@
 	pci_setup_pci_controller(phb);
 	phb->arch_data = dev;
 	phb->is_dynamic = mem_init_done;
-	if (dev) {
+	if (dev)
 		PHB_SET_NODE(phb, of_node_to_nid(dev));
-		add_linux_pci_domain(dev, phb);
-	}
 	return phb;
 }
 
 void pcibios_free_controller(struct pci_controller *phb)
 {
-	if (phb->arch_data) {
-		struct device_node *np = phb->arch_data;
-		int *domain = (int *)get_property(np,
-						  "linux,pci-domain", NULL);
-		if (domain)
-			*domain = -1;
-	}
 	if (phb->is_dynamic)
 		kfree(phb);
 }
@@ -283,10 +246,10 @@
 #ifdef CONFIG_PPC_MULTIPLATFORM
 static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
 {
-	u32 *prop;
+	const u32 *prop;
 	int len;
 
-	prop = (u32 *) get_property(np, name, &len);
+	prop = get_property(np, name, &len);
 	if (prop && len >= 4)
 		return *prop;
 	return def;
@@ -315,10 +278,11 @@
 	u64 base, size;
 	unsigned int flags;
 	struct resource *res;
-	u32 *addrs, i;
+	const u32 *addrs;
+	u32 i;
 	int proplen;
 
-	addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+	addrs = get_property(node, "assigned-addresses", &proplen);
 	if (!addrs)
 		return;
 	DBG("    parse addresses (%d bytes) @ %p\n", proplen, addrs);
@@ -418,7 +382,7 @@
 				  struct pci_bus *bus)
 {
 	struct device_node *child = NULL;
-	u32 *reg;
+	const u32 *reg;
 	int reglen, devfn;
 	struct pci_dev *dev;
 
@@ -426,7 +390,7 @@
 
 	while ((child = of_get_next_child(node, child)) != NULL) {
 		DBG("  * %s\n", child->full_name);
-		reg = (u32 *) get_property(child, "reg", &reglen);
+		reg = get_property(child, "reg", &reglen);
 		if (reg == NULL || reglen < 20)
 			continue;
 		devfn = (reg[0] >> 8) & 0xff;
@@ -450,7 +414,7 @@
 			 	struct pci_dev *dev)
 {
 	struct pci_bus *bus;
-	u32 *busrange, *ranges;
+	const u32 *busrange, *ranges;
 	int len, i, mode;
 	struct resource *res;
 	unsigned int flags;
@@ -459,13 +423,13 @@
 	DBG("of_scan_pci_bridge(%s)\n", node->full_name);
 
 	/* parse bus-range property */
-	busrange = (u32 *) get_property(node, "bus-range", &len);
+	busrange = get_property(node, "bus-range", &len);
 	if (busrange == NULL || len != 8) {
 		printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
 		       node->full_name);
 		return;
 	}
-	ranges = (u32 *) get_property(node, "ranges", &len);
+	ranges = get_property(node, "ranges", &len);
 	if (ranges == NULL) {
 		printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
 		       node->full_name);
@@ -929,13 +893,13 @@
 		unsigned int size;
 	};
 
-	struct isa_range *range;
+	const struct isa_range *range;
 	unsigned long pci_addr;
 	unsigned int isa_addr;
 	unsigned int size;
 	int rlen = 0;
 
-	range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+	range = get_property(isa_node, "ranges", &rlen);
 	if (range == NULL || (rlen < sizeof(struct isa_range))) {
 		printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
 		       "mapping 64k\n");
@@ -976,7 +940,8 @@
 void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
 					    struct device_node *dev, int prim)
 {
-	unsigned int *ranges, pci_space;
+	const unsigned int *ranges;
+	unsigned int pci_space;
 	unsigned long size;
 	int rlen = 0;
 	int memno = 0;
@@ -994,7 +959,7 @@
 	 *			(size depending on dev->n_addr_cells)
 	 *   cells 4+5 or 5+6:	the size of the range
 	 */
-	ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+	ranges = get_property(dev, "ranges", &rlen);
 	if (ranges == NULL)
 		return;
 	hose->io_base_phys = 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 1c18953..68df018 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -40,8 +40,8 @@
 static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 {
 	struct pci_controller *phb = data;
-	int *type = (int *)get_property(dn, "ibm,pci-config-space-type", NULL);
-	u32 *regs;
+	const int *type = get_property(dn, "ibm,pci-config-space-type", NULL);
+	const u32 *regs;
 	struct pci_dn *pdn;
 
 	if (mem_init_done)
@@ -54,14 +54,14 @@
 	dn->data = pdn;
 	pdn->node = dn;
 	pdn->phb = phb;
-	regs = (u32 *)get_property(dn, "reg", NULL);
+	regs = get_property(dn, "reg", NULL);
 	if (regs) {
 		/* First register entry is addr (00BBSS00)  */
 		pdn->busno = (regs[0] >> 16) & 0xff;
 		pdn->devfn = (regs[0] >> 8) & 0xff;
 	}
 	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
-		u32 *busp = (u32 *)get_property(dn, "linux,subbus", NULL);
+		const u32 *busp = get_property(dn, "linux,subbus", NULL);
 		if (busp)
 			pdn->bussubno = *busp;
 	}
@@ -96,10 +96,11 @@
 
 	/* We started with a phb, iterate all childs */
 	for (dn = start->child; dn; dn = nextdn) {
-		u32 *classp, class;
+		const u32 *classp;
+		u32 class;
 
 		nextdn = NULL;
-		classp = (u32 *)get_property(dn, "class-code", NULL);
+		classp = get_property(dn, "class-code", NULL);
 		class = classp ? *classp : 0;
 
 		if (pre && ((ret = pre(dn, data)) != NULL))
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 39d3bfc..807193a 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -91,25 +91,10 @@
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
-
-#ifndef  __powerpc64__
-EXPORT_SYMBOL(__ide_mm_insl);
-EXPORT_SYMBOL(__ide_mm_outsw);
-EXPORT_SYMBOL(__ide_mm_insw);
-EXPORT_SYMBOL(__ide_mm_outsl);
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(copy_4K_page);
 #endif
 
-EXPORT_SYMBOL(_insb);
-EXPORT_SYMBOL(_outsb);
-EXPORT_SYMBOL(_insw);
-EXPORT_SYMBOL(_outsw);
-EXPORT_SYMBOL(_insl);
-EXPORT_SYMBOL(_outsl);
-EXPORT_SYMBOL(_insw_ns);
-EXPORT_SYMBOL(_outsw_ns);
-EXPORT_SYMBOL(_insl_ns);
-EXPORT_SYMBOL(_outsl_ns);
-
 #if defined(CONFIG_PPC32) && (defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE))
 EXPORT_SYMBOL(ppc_ide_md);
 #endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index a1787ff..eb913f8 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -757,24 +757,9 @@
 static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
 {
 	cell_t *p = *cellp;
-	unsigned long r;
 
-	/* Ignore more than 2 cells */
-	while (s > sizeof(unsigned long) / 4) {
-		p++;
-		s--;
-	}
-	r = *p++;
-#ifdef CONFIG_PPC64
-	if (s > 1) {
-		r <<= 32;
-		r |= *(p++);
-		s--;
-	}
-#endif
-
-	*cellp = p;
-	return r;
+	*cellp = p + s;
+	return of_read_ulong(p, s);
 }
 
 
@@ -942,11 +927,11 @@
 int
 prom_n_addr_cells(struct device_node* np)
 {
-	int* ip;
+	const int *ip;
 	do {
 		if (np->parent)
 			np = np->parent;
-		ip = (int *) get_property(np, "#address-cells", NULL);
+		ip = get_property(np, "#address-cells", NULL);
 		if (ip != NULL)
 			return *ip;
 	} while (np->parent);
@@ -958,11 +943,11 @@
 int
 prom_n_size_cells(struct device_node* np)
 {
-	int* ip;
+	const int* ip;
 	do {
 		if (np->parent)
 			np = np->parent;
-		ip = (int *) get_property(np, "#size-cells", NULL);
+		ip = get_property(np, "#size-cells", NULL);
 		if (ip != NULL)
 			return *ip;
 	} while (np->parent);
@@ -1034,7 +1019,7 @@
 	const char* cp;
 	int cplen, l;
 
-	cp = (char *) get_property(device, "compatible", &cplen);
+	cp = get_property(device, "compatible", &cplen);
 	if (cp == NULL)
 		return 0;
 	while (cplen > 0) {
@@ -1449,7 +1434,7 @@
 {
 	struct device_node *parent = of_get_parent(node);
 	int err = 0;
-	phandle *ibm_phandle;
+	const phandle *ibm_phandle;
 
 	node->name = get_property(node, "name", NULL);
 	node->type = get_property(node, "device_type", NULL);
@@ -1466,8 +1451,7 @@
 		return -ENODEV;
 
 	/* fix up new node's linux_phandle field */
-	if ((ibm_phandle = (unsigned int *)get_property(node,
-							"ibm,phandle", NULL)))
+	if ((ibm_phandle = get_property(node, "ibm,phandle", NULL)))
 		node->linux_phandle = *ibm_phandle;
 
 out:
@@ -1528,7 +1512,7 @@
  * Find a property with a given name for a given node
  * and return the value.
  */
-void *get_property(struct device_node *np, const char *name, int *lenp)
+const void *get_property(struct device_node *np, const char *name, int *lenp)
 {
 	struct property *pp = of_find_property(np,name,lenp);
 	return pp ? pp->value : NULL;
@@ -1658,16 +1642,16 @@
 	hardid = get_hard_smp_processor_id(cpu);
 
 	for_each_node_by_type(np, "cpu") {
-		u32 *intserv;
+		const u32 *intserv;
 		unsigned int plen, t;
 
 		/* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
 		 * fallback to "reg" property and assume no threads
 		 */
-		intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s",
-					      &plen);
+		intserv = get_property(np, "ibm,ppc-interrupt-server#s",
+				&plen);
 		if (intserv == NULL) {
-			u32 *reg = (u32 *)get_property(np, "reg", NULL);
+			const u32 *reg = get_property(np, "reg", NULL);
 			if (reg == NULL)
 				continue;
 			if (*reg == hardid) {
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 4394e54..b9176163 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2033,16 +2033,22 @@
 #endif
 
 #ifdef CONFIG_PPC_CHRP
-/* Pegasos lacks the "ranges" property in the isa node */
+/* Pegasos and BriQ lacks the "ranges" property in the isa node */
 static void __init fixup_device_tree_chrp(void)
 {
 	phandle isa;
 	u32 isa_ranges[6];
+	u32 rloc = 0x01006000; /* IO space; PCI device = 12 */
 	char *name;
 	int rc;
 
 	name = "/pci@80000000/isa@c";
 	isa = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(isa)) {
+		name = "/pci@ff500000/isa@6";
+		isa = call_prom("finddevice", 1, 1, ADDR(name));
+		rloc = 0x01003000; /* IO space; PCI device = 6 */
+	}
 	if (!PHANDLE_VALID(isa))
 		return;
 
@@ -2054,7 +2060,7 @@
 
 	isa_ranges[0] = 0x1;
 	isa_ranges[1] = 0x0;
-	isa_ranges[2] = 0x01006000;
+	isa_ranges[2] = rloc;
 	isa_ranges[3] = 0x0;
 	isa_ranges[4] = 0x0;
 	isa_ranges[5] = 0x00010000;
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index a10825a..603dff3 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -27,7 +27,7 @@
 
 /* Debug utility */
 #ifdef DEBUG
-static void of_dump_addr(const char *s, u32 *addr, int na)
+static void of_dump_addr(const char *s, const u32 *addr, int na)
 {
 	printk("%s", s);
 	while(na--)
@@ -35,7 +35,7 @@
 	printk("\n");
 }
 #else
-static void of_dump_addr(const char *s, u32 *addr, int na) { }
+static void of_dump_addr(const char *s, const u32 *addr, int na) { }
 #endif
 
 
@@ -46,9 +46,10 @@
 	int		(*match)(struct device_node *parent);
 	void		(*count_cells)(struct device_node *child,
 				       int *addrc, int *sizec);
-	u64		(*map)(u32 *addr, u32 *range, int na, int ns, int pna);
+	u64		(*map)(u32 *addr, const u32 *range,
+				int na, int ns, int pna);
 	int		(*translate)(u32 *addr, u64 offset, int na);
-	unsigned int	(*get_flags)(u32 *addr);
+	unsigned int	(*get_flags)(const u32 *addr);
 };
 
 
@@ -65,7 +66,8 @@
 		*sizec = prom_n_size_cells(dev);
 }
 
-static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna)
+static u64 of_bus_default_map(u32 *addr, const u32 *range,
+		int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
@@ -93,7 +95,7 @@
 	return 0;
 }
 
-static unsigned int of_bus_default_get_flags(u32 *addr)
+static unsigned int of_bus_default_get_flags(const u32 *addr)
 {
 	return IORESOURCE_MEM;
 }
@@ -118,7 +120,7 @@
 		*sizec = 2;
 }
 
-static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna)
+static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
@@ -143,7 +145,7 @@
 	return of_bus_default_translate(addr + 1, offset, na - 1);
 }
 
-static unsigned int of_bus_pci_get_flags(u32 *addr)
+static unsigned int of_bus_pci_get_flags(const u32 *addr)
 {
 	unsigned int flags = 0;
 	u32 w = addr[0];
@@ -178,7 +180,7 @@
 		*sizec = 1;
 }
 
-static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna)
+static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
@@ -203,7 +205,7 @@
 	return of_bus_default_translate(addr + 1, offset, na - 1);
 }
 
-static unsigned int of_bus_isa_get_flags(u32 *addr)
+static unsigned int of_bus_isa_get_flags(const u32 *addr)
 {
 	unsigned int flags = 0;
 	u32 w = addr[0];
@@ -268,7 +270,7 @@
 			    struct of_bus *pbus, u32 *addr,
 			    int na, int ns, int pna)
 {
-	u32 *ranges;
+	const u32 *ranges;
 	unsigned int rlen;
 	int rone;
 	u64 offset = OF_BAD_ADDR;
@@ -285,7 +287,7 @@
 	 * to translate addresses that aren't supposed to be translated in
 	 * the first place. --BenH.
 	 */
-	ranges = (u32 *)get_property(parent, "ranges", &rlen);
+	ranges = get_property(parent, "ranges", &rlen);
 	if (ranges == NULL || rlen == 0) {
 		offset = of_read_number(addr, na);
 		memset(addr, 0, pna * 4);
@@ -328,7 +330,7 @@
  * that can be mapped to a cpu physical address). This is not really specified
  * that way, but this is traditionally the way IBM at least do things
  */
-u64 of_translate_address(struct device_node *dev, u32 *in_addr)
+u64 of_translate_address(struct device_node *dev, const u32 *in_addr)
 {
 	struct device_node *parent = NULL;
 	struct of_bus *bus, *pbus;
@@ -405,10 +407,10 @@
 }
 EXPORT_SYMBOL(of_translate_address);
 
-u32 *of_get_address(struct device_node *dev, int index, u64 *size,
+const u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 		    unsigned int *flags)
 {
-	u32 *prop;
+	const u32 *prop;
 	unsigned int psize;
 	struct device_node *parent;
 	struct of_bus *bus;
@@ -425,7 +427,7 @@
 		return NULL;
 
 	/* Get "reg" or "assigned-addresses" property */
-	prop = (u32 *)get_property(dev, bus->addresses, &psize);
+	prop = get_property(dev, bus->addresses, &psize);
 	if (prop == NULL)
 		return NULL;
 	psize /= 4;
@@ -443,10 +445,10 @@
 }
 EXPORT_SYMBOL(of_get_address);
 
-u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
+const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
 			unsigned int *flags)
 {
-	u32 *prop;
+	const u32 *prop;
 	unsigned int psize;
 	struct device_node *parent;
 	struct of_bus *bus;
@@ -467,7 +469,7 @@
 		return NULL;
 
 	/* Get "reg" or "assigned-addresses" property */
-	prop = (u32 *)get_property(dev, bus->addresses, &psize);
+	prop = get_property(dev, bus->addresses, &psize);
 	if (prop == NULL)
 		return NULL;
 	psize /= 4;
@@ -485,7 +487,7 @@
 }
 EXPORT_SYMBOL(of_get_pci_address);
 
-static int __of_address_to_resource(struct device_node *dev, u32 *addrp,
+static int __of_address_to_resource(struct device_node *dev, const u32 *addrp,
 				    u64 size, unsigned int flags,
 				    struct resource *r)
 {
@@ -516,7 +518,7 @@
 int of_address_to_resource(struct device_node *dev, int index,
 			   struct resource *r)
 {
-	u32		*addrp;
+	const u32	*addrp;
 	u64		size;
 	unsigned int	flags;
 
@@ -530,7 +532,7 @@
 int of_pci_address_to_resource(struct device_node *dev, int bar,
 			       struct resource *r)
 {
-	u32		*addrp;
+	const u32	*addrp;
 	u64		size;
 	unsigned int	flags;
 
@@ -541,13 +543,14 @@
 }
 EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
 
-void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop,
+void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size)
 {
-	u32 *dma_window, cells;
-	unsigned char *prop;
+	const u32 *dma_window;
+	u32 cells;
+	const unsigned char *prop;
 
-	dma_window = (u32 *)dma_window_prop;
+	dma_window = dma_window_prop;
 
 	/* busno is always one cell */
 	*busno = *(dma_window++);
@@ -576,13 +579,13 @@
 static struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
-	phandle *parp;
+	const phandle *parp;
 
 	if (!of_node_get(child))
 		return NULL;
 
 	do {
-		parp = (phandle *)get_property(child, "interrupt-parent", NULL);
+		parp = get_property(child, "interrupt-parent", NULL);
 		if (parp == NULL)
 			p = of_get_parent(child);
 		else {
@@ -639,11 +642,11 @@
 
 }
 
-int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 ointsize,
-		   u32 *addr, struct of_irq *out_irq)
+int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize,
+		const u32 *addr, struct of_irq *out_irq)
 {
 	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
-	u32 *tmp, *imap, *imask;
+	const u32 *tmp, *imap, *imask;
 	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
 	int imaplen, match, i;
 
@@ -657,7 +660,7 @@
 	 * is none, we are nice and just walk up the tree
 	 */
 	do {
-		tmp = (u32 *)get_property(ipar, "#interrupt-cells", NULL);
+		tmp = get_property(ipar, "#interrupt-cells", NULL);
 		if (tmp != NULL) {
 			intsize = *tmp;
 			break;
@@ -681,7 +684,7 @@
 	 */
 	old = of_node_get(ipar);
 	do {
-		tmp = (u32 *)get_property(old, "#address-cells", NULL);
+		tmp = get_property(old, "#address-cells", NULL);
 		tnode = of_get_parent(old);
 		of_node_put(old);
 		old = tnode;
@@ -708,7 +711,7 @@
 		}
 
 		/* Now look for an interrupt-map */
-		imap = (u32 *)get_property(ipar, "interrupt-map", &imaplen);
+		imap = get_property(ipar, "interrupt-map", &imaplen);
 		/* No interrupt map, check for an interrupt parent */
 		if (imap == NULL) {
 			DBG(" -> no map, getting parent\n");
@@ -718,7 +721,7 @@
 		imaplen /= sizeof(u32);
 
 		/* Look for a mask */
-		imask = (u32 *)get_property(ipar, "interrupt-map-mask", NULL);
+		imask = get_property(ipar, "interrupt-map-mask", NULL);
 
 		/* If we were passed no "reg" property and we attempt to parse
 		 * an interrupt-map, then #address-cells must be 0.
@@ -765,14 +768,14 @@
 			/* Get #interrupt-cells and #address-cells of new
 			 * parent
 			 */
-			tmp = (u32 *)get_property(newpar, "#interrupt-cells",
+			tmp = get_property(newpar, "#interrupt-cells",
 						  NULL);
 			if (tmp == NULL) {
 				DBG(" -> parent lacks #interrupt-cells !\n");
 				goto fail;
 			}
 			newintsize = *tmp;
-			tmp = (u32 *)get_property(newpar, "#address-cells",
+			tmp = get_property(newpar, "#address-cells",
 						  NULL);
 			newaddrsize = (tmp == NULL) ? 0 : *tmp;
 
@@ -818,14 +821,14 @@
 static int of_irq_map_oldworld(struct device_node *device, int index,
 			       struct of_irq *out_irq)
 {
-	u32 *ints;
+	const u32 *ints;
 	int intlen;
 
 	/*
 	 * Old machines just have a list of interrupt numbers
 	 * and no interrupt-controller nodes.
 	 */
-	ints = (u32 *) get_property(device, "AAPL,interrupts", &intlen);
+	ints = get_property(device, "AAPL,interrupts", &intlen);
 	if (ints == NULL)
 		return -EINVAL;
 	intlen /= sizeof(u32);
@@ -850,7 +853,8 @@
 int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
 {
 	struct device_node *p;
-	u32 *intspec, *tmp, intsize, intlen, *addr;
+	const u32 *intspec, *tmp, *addr;
+	u32 intsize, intlen;
 	int res;
 
 	DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
@@ -860,13 +864,13 @@
 		return of_irq_map_oldworld(device, index, out_irq);
 
 	/* Get the interrupts property */
-	intspec = (u32 *)get_property(device, "interrupts", &intlen);
+	intspec = get_property(device, "interrupts", &intlen);
 	if (intspec == NULL)
 		return -EINVAL;
 	intlen /= sizeof(u32);
 
 	/* Get the reg property (if any) */
-	addr = (u32 *)get_property(device, "reg", NULL);
+	addr = get_property(device, "reg", NULL);
 
 	/* Look for the interrupt parent. */
 	p = of_irq_find_parent(device);
@@ -874,7 +878,7 @@
 		return -EINVAL;
 
 	/* Get size of interrupt specifier */
-	tmp = (u32 *)get_property(p, "#interrupt-cells", NULL);
+	tmp = get_property(p, "#interrupt-cells", NULL);
 	if (tmp == NULL) {
 		of_node_put(p);
 		return -EINVAL;
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 9c9ad1f..2fe82ab 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -246,12 +246,12 @@
 
 static int ppc_rtas_find_all_sensors(void);
 static void ppc_rtas_process_sensor(struct seq_file *m,
-	struct individual_sensor *s, int state, int error, char *loc);
+	struct individual_sensor *s, int state, int error, const char *loc);
 static char *ppc_rtas_process_error(int error);
 static void get_location_code(struct seq_file *m,
-	struct individual_sensor *s, char *loc);
-static void check_location_string(struct seq_file *m, char *c);
-static void check_location(struct seq_file *m, char *c);
+	struct individual_sensor *s, const char *loc);
+static void check_location_string(struct seq_file *m, const char *c);
+static void check_location(struct seq_file *m, const char *c);
 
 static int __init proc_rtas_init(void)
 {
@@ -446,11 +446,11 @@
 	for (i=0; i<sensors.quant; i++) {
 		struct individual_sensor *p = &sensors.sensor[i];
 		char rstr[64];
-		char *loc;
+		const char *loc;
 		int llen, offs;
 
 		sprintf (rstr, SENSOR_PREFIX"%04d", p->token);
-		loc = (char *) get_property(rtas_node, rstr, &llen);
+		loc = get_property(rtas_node, rstr, &llen);
 
 		/* A sensor may have multiple instances */
 		for (j = 0, offs = 0; j <= p->quant; j++) {
@@ -474,10 +474,10 @@
 
 static int ppc_rtas_find_all_sensors(void)
 {
-	unsigned int *utmp;
+	const unsigned int *utmp;
 	int len, i;
 
-	utmp = (unsigned int *) get_property(rtas_node, "rtas-sensors", &len);
+	utmp = get_property(rtas_node, "rtas-sensors", &len);
 	if (utmp == NULL) {
 		printk (KERN_ERR "error: could not get rtas-sensors\n");
 		return 1;
@@ -530,7 +530,7 @@
  */
 
 static void ppc_rtas_process_sensor(struct seq_file *m,
-	struct individual_sensor *s, int state, int error, char *loc)
+	struct individual_sensor *s, int state, int error, const char *loc)
 {
 	/* Defined return vales */
 	const char * key_switch[]        = { "Off\t", "Normal\t", "Secure\t", 
@@ -682,7 +682,7 @@
 
 /* ****************************************************************** */
 
-static void check_location(struct seq_file *m, char *c)
+static void check_location(struct seq_file *m, const char *c)
 {
 	switch (c[0]) {
 		case LOC_PLANAR:
@@ -719,7 +719,7 @@
  * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ]
  * the '.' may be an abbrevation
  */
-static void check_location_string(struct seq_file *m, char *c)
+static void check_location_string(struct seq_file *m, const char *c)
 {
 	while (*c) {
 		if (isalpha(*c) || *c == '.')
@@ -733,7 +733,8 @@
 
 /* ****************************************************************** */
 
-static void get_location_code(struct seq_file *m, struct individual_sensor *s, char *loc)
+static void get_location_code(struct seq_file *m, struct individual_sensor *s,
+		const char *loc)
 {
 	if (!loc || !*loc) {
 		seq_printf(m, "---");/* does not have a location */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 77f1e06..6ef80d4 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -177,10 +177,12 @@
 void rtas_progress(char *s, unsigned short hex)
 {
 	struct device_node *root;
-	int width, *p;
+	int width;
+	const int *p;
 	char *os;
 	static int display_character, set_indicator;
-	static int display_width, display_lines, *row_width, form_feed;
+	static int display_width, display_lines, form_feed;
+	const static int *row_width;
 	static DEFINE_SPINLOCK(progress_lock);
 	static int current_line;
 	static int pending_newline = 0;  /* did last write end with unprinted newline? */
@@ -191,16 +193,16 @@
 	if (display_width == 0) {
 		display_width = 0x10;
 		if ((root = find_path_device("/rtas"))) {
-			if ((p = (unsigned int *)get_property(root,
+			if ((p = get_property(root,
 					"ibm,display-line-length", NULL)))
 				display_width = *p;
-			if ((p = (unsigned int *)get_property(root,
+			if ((p = get_property(root,
 					"ibm,form-feed", NULL)))
 				form_feed = *p;
-			if ((p = (unsigned int *)get_property(root,
+			if ((p = get_property(root,
 					"ibm,display-number-of-lines", NULL)))
 				display_lines = *p;
-			row_width = (unsigned int *)get_property(root,
+			row_width = get_property(root,
 					"ibm,display-truncation-length", NULL);
 		}
 		display_character = rtas_token("display-character");
@@ -293,10 +295,10 @@
 
 int rtas_token(const char *service)
 {
-	int *tokp;
+	const int *tokp;
 	if (rtas.dev == NULL)
 		return RTAS_UNKNOWN_SERVICE;
-	tokp = (int *) get_property(rtas.dev, service, NULL);
+	tokp = get_property(rtas.dev, service, NULL);
 	return tokp ? *tokp : RTAS_UNKNOWN_SERVICE;
 }
 EXPORT_SYMBOL(rtas_token);
@@ -626,6 +628,9 @@
 {
 	int status;
 
+	if (panic_timeout)
+		return;
+
 	if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term"))
 		return;
 
@@ -687,15 +692,14 @@
 	int i;
 	long state;
 	long rc;
-	unsigned long dummy;
-
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	struct rtas_suspend_me_data data;
 
 	/* Make sure the state is valid */
-	rc = plpar_hcall(H_VASI_STATE,
-			 ((u64)args->args[0] << 32) | args->args[1],
-			 0, 0, 0,
-			 &state, &dummy, &dummy);
+	rc = plpar_hcall(H_VASI_STATE, retbuf,
+			 ((u64)args->args[0] << 32) | args->args[1]);
+
+	state = retbuf[0];
 
 	if (rc) {
 		printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
@@ -845,15 +849,15 @@
 	 */
 	rtas.dev = of_find_node_by_name(NULL, "rtas");
 	if (rtas.dev) {
-		u32 *basep, *entryp;
-		u32 *sizep;
+		const u32 *basep, *entryp, *sizep;
 
-		basep = (u32 *)get_property(rtas.dev, "linux,rtas-base", NULL);
-		sizep = (u32 *)get_property(rtas.dev, "rtas-size", NULL);
+		basep = get_property(rtas.dev, "linux,rtas-base", NULL);
+		sizep = get_property(rtas.dev, "rtas-size", NULL);
 		if (basep != NULL && sizep != NULL) {
 			rtas.base = *basep;
 			rtas.size = *sizep;
-			entryp = (u32 *)get_property(rtas.dev, "linux,rtas-entry", NULL);
+			entryp = get_property(rtas.dev,
+					"linux,rtas-entry", NULL);
 			if (entryp == NULL) /* Ugh */
 				rtas.entry = rtas.base;
 			else
@@ -909,6 +913,11 @@
 	basep = of_get_flat_dt_prop(node, "get-term-char", NULL);
 	if (basep)
 		rtas_getchar_token = *basep;
+
+	if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE &&
+	    rtas_getchar_token != RTAS_UNKNOWN_SERVICE)
+		udbg_init_rtas_console();
+
 #endif
 
 	/* break now */
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index cda0226..b4a0de7 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -57,7 +57,7 @@
 
 static int of_device_available(struct device_node * dn)
 {
-        char * status;
+        const char *status;
 
         status = get_property(dn, "status", NULL);
 
@@ -81,8 +81,7 @@
 	if (!config_access_valid(pdn, where))
 		return PCIBIOS_BAD_REGISTER_NUMBER;
 
-	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
-		(pdn->devfn << 8) | (where & 0xff);
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
 	buid = pdn->phb->buid;
 	if (buid) {
 		ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval,
@@ -134,8 +133,7 @@
 	if (!config_access_valid(pdn, where))
 		return PCIBIOS_BAD_REGISTER_NUMBER;
 
-	addr = ((where & 0xf00) << 20) | (pdn->busno << 16) |
-		(pdn->devfn << 8) | (where & 0xff);
+	addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
 	buid = pdn->phb->buid;
 	if (buid) {
 		ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr,
@@ -178,7 +176,7 @@
 
 int is_python(struct device_node *dev)
 {
-	char *model = (char *)get_property(dev, "model", NULL);
+	const char *model = get_property(dev, "model", NULL);
 
 	if (model && strstr(model, "Python"))
 		return 1;
@@ -234,7 +232,7 @@
 unsigned long __devinit get_phb_buid (struct device_node *phb)
 {
 	int addr_cells;
-	unsigned int *buid_vals;
+	const unsigned int *buid_vals;
 	unsigned int len;
 	unsigned long buid;
 
@@ -247,7 +245,7 @@
 	if (phb->parent->parent)
 		return 0;
 
-	buid_vals = (unsigned int *) get_property(phb, "reg", &len);
+	buid_vals = get_property(phb, "reg", &len);
 	if (buid_vals == NULL)
 		return 0;
 
@@ -264,10 +262,10 @@
 static int phb_set_bus_ranges(struct device_node *dev,
 			      struct pci_controller *phb)
 {
-	int *bus_range;
+	const int *bus_range;
 	unsigned int len;
 
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		return 1;
  	}
@@ -325,15 +323,15 @@
 	 * in chosen.
 	 */
 	if (of_chosen) {
-		int *prop;
+		const int *prop;
 
-		prop = (int *)get_property(of_chosen, "linux,pci-probe-only",
-					   NULL);
+		prop = get_property(of_chosen,
+				"linux,pci-probe-only", NULL);
 		if (prop)
 			pci_probe_only = *prop;
 
-		prop = (int *)get_property(of_chosen,
-					   "linux,pci-assign-all-buses", NULL);
+		prop = get_property(of_chosen,
+				"linux,pci-assign-all-buses", NULL);
 		if (prop)
 			pci_assign_all_buses = *prop;
 	}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 499c386..0af3fc1 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -304,19 +304,21 @@
 void __init check_for_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
-	unsigned long *prop;
+	const unsigned int *prop;
+	int len;
 
 	DBG(" -> check_for_initrd()\n");
 
 	if (of_chosen) {
-		prop = (unsigned long *)get_property(of_chosen,
-				"linux,initrd-start", NULL);
+		prop = get_property(of_chosen, "linux,initrd-start", &len);
 		if (prop != NULL) {
-			initrd_start = (unsigned long)__va(*prop);
-			prop = (unsigned long *)get_property(of_chosen,
-					"linux,initrd-end", NULL);
+			initrd_start = (unsigned long)
+				__va(of_read_ulong(prop, len / 4));
+			prop = get_property(of_chosen,
+					"linux,initrd-end", &len);
 			if (prop != NULL) {
-				initrd_end = (unsigned long)__va(*prop);
+				initrd_end = (unsigned long)
+					__va(of_read_ulong(prop, len / 4));
 				initrd_below_start_ok = 1;
 			} else
 				initrd_start = 0;
@@ -366,15 +368,14 @@
 	int cpu = 0;
 
 	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
-		int *intserv;
+		const int *intserv;
 		int j, len = sizeof(u32), nthreads = 1;
 
-		intserv = (int *)get_property(dn, "ibm,ppc-interrupt-server#s",
-					      &len);
+		intserv = get_property(dn, "ibm,ppc-interrupt-server#s", &len);
 		if (intserv)
 			nthreads = len / sizeof(int);
 		else {
-			intserv = (int *) get_property(dn, "reg", NULL);
+			intserv = get_property(dn, "reg", NULL);
 			if (!intserv)
 				intserv = &cpu;	/* assume logical == phys */
 		}
@@ -395,13 +396,12 @@
 	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
 	    (dn = of_find_node_by_path("/rtas"))) {
 		int num_addr_cell, num_size_cell, maxcpus;
-		unsigned int *ireg;
+		const unsigned int *ireg;
 
 		num_addr_cell = prom_n_addr_cells(dn);
 		num_size_cell = prom_n_size_cells(dn);
 
-		ireg = (unsigned int *)
-			get_property(dn, "ibm,lrdr-capacity", NULL);
+		ireg = get_property(dn, "ibm,lrdr-capacity", NULL);
 
 		if (!ireg)
 			goto out;
@@ -444,6 +444,8 @@
 
 int __initdata do_early_xmon;
 #ifdef CONFIG_XMON
+extern int xmon_no_auto_backtrace;
+
 static int __init early_xmon(char *p)
 {
 	/* ensure xmon is enabled */
@@ -452,6 +454,8 @@
 			xmon_init(1);
 		if (strncmp(p, "off", 3) == 0)
 			xmon_init(0);
+		if (strncmp(p, "nobt", 4) == 0)
+			xmon_no_auto_backtrace = 1;
 		if (strncmp(p, "early", 5) != 0)
 			return 0;
 	}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index fd1785e..00d6b8a 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -56,7 +56,6 @@
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/lmb.h>
-#include <asm/iseries/it_lp_naca.h>
 #include <asm/firmware.h>
 #include <asm/xmon.h>
 #include <asm/udbg.h>
@@ -79,10 +78,10 @@
  * before we've read this from the device tree.
  */
 struct ppc64_caches ppc64_caches = {
-	.dline_size = 0x80,
-	.log_dline_size = 7,
-	.iline_size = 0x80,
-	.log_iline_size = 7
+	.dline_size = 0x40,
+	.log_dline_size = 6,
+	.iline_size = 0x40,
+	.log_iline_size = 6
 };
 EXPORT_SYMBOL_GPL(ppc64_caches);
 
@@ -107,7 +106,7 @@
 static void check_smt_enabled(void)
 {
 	struct device_node *dn;
-	char *smt_option;
+	const char *smt_option;
 
 	/* Allow the command line to overrule the OF option */
 	if (smt_enabled_cmdline)
@@ -116,7 +115,7 @@
 	dn = of_find_node_by_path("/options");
 
 	if (dn) {
-		smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL);
+		smt_option = get_property(dn, "ibm,smt-enabled", NULL);
 
                 if (smt_option) {
 			if (!strcmp(smt_option, "on"))
@@ -293,7 +292,7 @@
 		 */
 
 		if ( num_cpus == 1 ) {
-			u32 *sizep, *lsizep;
+			const u32 *sizep, *lsizep;
 			u32 size, lsize;
 			const char *dc, *ic;
 
@@ -308,10 +307,10 @@
 
 			size = 0;
 			lsize = cur_cpu_spec->dcache_bsize;
-			sizep = (u32 *)get_property(np, "d-cache-size", NULL);
+			sizep = get_property(np, "d-cache-size", NULL);
 			if (sizep != NULL)
 				size = *sizep;
-			lsizep = (u32 *) get_property(np, dc, NULL);
+			lsizep = get_property(np, dc, NULL);
 			if (lsizep != NULL)
 				lsize = *lsizep;
 			if (sizep == 0 || lsizep == 0)
@@ -325,10 +324,10 @@
 
 			size = 0;
 			lsize = cur_cpu_spec->icache_bsize;
-			sizep = (u32 *)get_property(np, "i-cache-size", NULL);
+			sizep = get_property(np, "i-cache-size", NULL);
 			if (sizep != NULL)
 				size = *sizep;
-			lsizep = (u32 *)get_property(np, ic, NULL);
+			lsizep = get_property(np, ic, NULL);
 			if (lsizep != NULL)
 				lsize = *lsizep;
 			if (sizep == 0 || lsizep == 0)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index fec228c..406f308 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -60,7 +60,7 @@
 static int __init smt_setup(void)
 {
 	struct device_node *options;
-	unsigned int *val;
+	const unsigned int *val;
 	unsigned int cpu;
 
 	if (!cpu_has_feature(CPU_FTR_SMT))
@@ -70,8 +70,7 @@
 	if (!options)
 		return -ENODEV;
 
-	val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay",
-					   NULL);
+	val = get_property(options, "ibm,smt-snooze-delay", NULL);
 	if (!smt_snooze_cmdline && val) {
 		for_each_possible_cpu(cpu)
 			per_cpu(smt_snooze_delay, cpu) = *val;
@@ -231,7 +230,7 @@
 	if (cur_cpu_spec->num_pmcs >= 8)
 		sysdev_create_file(s, &attr_pmc8);
 
-	if (cpu_has_feature(CPU_FTR_SMT))
+	if (cpu_has_feature(CPU_FTR_PURR))
 		sysdev_create_file(s, &attr_purr);
 }
 
@@ -273,7 +272,7 @@
 	if (cur_cpu_spec->num_pmcs >= 8)
 		sysdev_remove_file(s, &attr_pmc8);
 
-	if (cpu_has_feature(CPU_FTR_SMT))
+	if (cpu_has_feature(CPU_FTR_PURR))
 		sysdev_remove_file(s, &attr_purr);
 }
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a124499..7a3c3f7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -860,19 +860,17 @@
 static int __init get_freq(char *name, int cells, unsigned long *val)
 {
 	struct device_node *cpu;
-	unsigned int *fp;
+	const unsigned int *fp;
 	int found = 0;
 
 	/* The cpu node should have timebase and clock frequency properties */
 	cpu = of_find_node_by_type(NULL, "cpu");
 
 	if (cpu) {
-		fp = (unsigned int *)get_property(cpu, name, NULL);
+		fp = get_property(cpu, name, NULL);
 		if (fp) {
 			found = 1;
-			*val = 0;
-			while (cells--)
-				*val = (*val << 32) | *fp++;
+			*val = of_read_ulong(fp, cells);
 		}
 
 		of_node_put(cpu);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 9b352bd..d9f10f2f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -598,6 +598,9 @@
 #define INST_STSWI		0x7c0005aa
 #define INST_STSWX		0x7c00052a
 
+#define INST_POPCNTB		0x7c0000f4
+#define INST_POPCNTB_MASK	0xfc0007fe
+
 static int emulate_string_inst(struct pt_regs *regs, u32 instword)
 {
 	u8 rT = (instword >> 21) & 0x1f;
@@ -666,6 +669,23 @@
 	return 0;
 }
 
+static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
+{
+	u32 ra,rs;
+	unsigned long tmp;
+
+	ra = (instword >> 16) & 0x1f;
+	rs = (instword >> 21) & 0x1f;
+
+	tmp = regs->gpr[rs];
+	tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
+	tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
+	tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+	regs->gpr[ra] = tmp;
+
+	return 0;
+}
+
 static int emulate_instruction(struct pt_regs *regs)
 {
 	u32 instword;
@@ -703,6 +723,11 @@
 	if ((instword & INST_STRING_GEN_MASK) == INST_STRING)
 		return emulate_string_inst(regs, instword);
 
+	/* Emulate the popcntb (Population Count Bytes) instruction. */
+	if ((instword & INST_POPCNTB_MASK) == INST_POPCNTB) {
+		return emulate_popcntb_inst(regs, instword);
+	}
+
 	return -EINVAL;
 }
 
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index fad8580..cb87e71 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -77,7 +77,7 @@
 	} else
 #endif
 	{
-		unsigned char *dma_window;
+		const unsigned char *dma_window;
 		struct iommu_table *tbl;
 		unsigned long offset, size;
 
@@ -217,7 +217,7 @@
 struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
 {
 	struct vio_dev *viodev;
-	unsigned int *unit_address;
+	const unsigned int *unit_address;
 
 	/* we need the 'device_type' property, in order to match with drivers */
 	if (of_node->type == NULL) {
@@ -227,7 +227,7 @@
 		return NULL;
 	}
 
-	unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
+	unit_address = get_property(of_node, "reg", NULL);
 	if (unit_address == NULL) {
 		printk(KERN_WARNING "%s: node %s missing 'reg'\n",
 				__FUNCTION__,
@@ -249,7 +249,7 @@
 	viodev->type = of_node->type;
 	viodev->unit_address = *unit_address;
 	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
-		unit_address = (unsigned int *)get_property(of_node,
+		unit_address = get_property(of_node,
 				"linux,unit_address", NULL);
 		if (unit_address != NULL)
 			viodev->unit_address = *unit_address;
@@ -423,7 +423,7 @@
 {
 	const struct vio_dev *vio_dev = to_vio_dev(dev);
 	struct device_node *dn = dev->platform_data;
-	char *cp;
+	const char *cp;
 	int length;
 
 	if (!num_envp)
@@ -431,7 +431,7 @@
 
 	if (!dn)
 		return -ENODEV;
-	cp = (char *)get_property(dn, "compatible", &length);
+	cp = get_property(dn, "compatible", &length);
 	if (!cp)
 		return -ENODEV;
 
@@ -493,11 +493,11 @@
  */
 struct vio_dev *vio_find_node(struct device_node *vnode)
 {
-	uint32_t *unit_address;
+	const uint32_t *unit_address;
 	char kobj_name[BUS_ID_SIZE];
 
 	/* construct the kobject name from the device node */
-	unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
+	unit_address = get_property(vnode, "reg", NULL);
 	if (!unit_address)
 		return NULL;
 	snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ff70964..336dd19 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -14,7 +14,6 @@
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o \
 			   strcase.o
-obj-$(CONFIG_PPC_ISERIES) += e2a.o
 obj-$(CONFIG_XMON)	+= sstep.o
 
 ifeq ($(CONFIG_PPC64),y)
diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c
deleted file mode 100644
index 4b72ed8..0000000
--- a/arch/powerpc/lib/e2a.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- *  EBCDIC to ASCII conversion
- *
- * This function moved here from arch/powerpc/platforms/iseries/viopath.c 
- *
- * (C) Copyright 2000-2004 IBM Corporation
- *
- * This program is free software;  you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) anyu later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include <linux/module.h>
-
-unsigned char e2a(unsigned char x)
-{
-	switch (x) {
-	case 0xF0:
-		return '0';
-	case 0xF1:
-		return '1';
-	case 0xF2:
-		return '2';
-	case 0xF3:
-		return '3';
-	case 0xF4:
-		return '4';
-	case 0xF5:
-		return '5';
-	case 0xF6:
-		return '6';
-	case 0xF7:
-		return '7';
-	case 0xF8:
-		return '8';
-	case 0xF9:
-		return '9';
-	case 0xC1:
-		return 'A';
-	case 0xC2:
-		return 'B';
-	case 0xC3:
-		return 'C';
-	case 0xC4:
-		return 'D';
-	case 0xC5:
-		return 'E';
-	case 0xC6:
-		return 'F';
-	case 0xC7:
-		return 'G';
-	case 0xC8:
-		return 'H';
-	case 0xC9:
-		return 'I';
-	case 0xD1:
-		return 'J';
-	case 0xD2:
-		return 'K';
-	case 0xD3:
-		return 'L';
-	case 0xD4:
-		return 'M';
-	case 0xD5:
-		return 'N';
-	case 0xD6:
-		return 'O';
-	case 0xD7:
-		return 'P';
-	case 0xD8:
-		return 'Q';
-	case 0xD9:
-		return 'R';
-	case 0xE2:
-		return 'S';
-	case 0xE3:
-		return 'T';
-	case 0xE4:
-		return 'U';
-	case 0xE5:
-		return 'V';
-	case 0xE6:
-		return 'W';
-	case 0xE7:
-		return 'X';
-	case 0xE8:
-		return 'Y';
-	case 0xE9:
-		return 'Z';
-	}
-	return ' ';
-}
-EXPORT_SYMBOL(e2a);
-
-unsigned char* strne2a(unsigned char *dest, const unsigned char *src, size_t n)
-{
-	int i;
-
-	n = strnlen(src, n);
-
-	for (i = 0; i < n; i++)
-		dest[i] = e2a(src[i]);
-
-	return dest;
-}
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 077bed7..80b482c 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
 #include <asm/hvcall.h>
 #include <asm/iseries/hv_call.h>
 #include <asm/smp.h>
+#include <asm/firmware.h>
 
 void __spin_yield(raw_spinlock_t *lock)
 {
@@ -39,13 +40,12 @@
 	rmb();
 	if (lock->slock != lock_value)
 		return;		/* something has changed */
-#ifdef CONFIG_PPC_ISERIES
-	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
-		((u64)holder_cpu << 32) | yield_count);
-#else
-	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
-			   yield_count);
-#endif
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+			((u64)holder_cpu << 32) | yield_count);
+	else
+		plpar_hcall_norets(H_CONFER,
+			get_hard_smp_processor_id(holder_cpu), yield_count);
 }
 
 /*
@@ -69,13 +69,12 @@
 	rmb();
 	if (rw->lock != lock_value)
 		return;		/* something has changed */
-#ifdef CONFIG_PPC_ISERIES
-	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
-		((u64)holder_cpu << 32) | yield_count);
-#else
-	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
-			   yield_count);
-#endif
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+			((u64)holder_cpu << 32) | yield_count);
+	else
+		plpar_hcall_norets(H_CONFER,
+			get_hard_smp_processor_id(holder_cpu), yield_count);
 }
 #endif
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fbe2393..6c0f1c7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -159,12 +159,12 @@
 {
 	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
 	struct device_node *cpu_node = NULL;
-	unsigned int *interrupt_server, *reg;
+	const unsigned int *interrupt_server, *reg;
 	int len;
 
 	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
 		/* Try interrupt server first */
-		interrupt_server = (unsigned int *)get_property(cpu_node,
+		interrupt_server = get_property(cpu_node,
 					"ibm,ppc-interrupt-server#s", &len);
 
 		len = len / sizeof(u32);
@@ -175,8 +175,7 @@
 					return cpu_node;
 			}
 		} else {
-			reg = (unsigned int *)get_property(cpu_node,
-							   "reg", &len);
+			reg = get_property(cpu_node, "reg", &len);
 			if (reg && (len > 0) && (reg[0] == hw_cpuid))
 				return cpu_node;
 		}
@@ -186,9 +185,9 @@
 }
 
 /* must hold reference to node during call */
-static int *of_get_associativity(struct device_node *dev)
+static const int *of_get_associativity(struct device_node *dev)
 {
-	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
+	return get_property(dev, "ibm,associativity", NULL);
 }
 
 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
@@ -197,7 +196,7 @@
 static int of_node_to_nid_single(struct device_node *device)
 {
 	int nid = -1;
-	unsigned int *tmp;
+	const unsigned int *tmp;
 
 	if (min_common_depth == -1)
 		goto out;
@@ -255,7 +254,7 @@
 static int __init find_min_common_depth(void)
 {
 	int depth;
-	unsigned int *ref_points;
+	const unsigned int *ref_points;
 	struct device_node *rtas_root;
 	unsigned int len;
 
@@ -270,7 +269,7 @@
 	 * configuration (should be all 0's) and the second is for a normal
 	 * NUMA configuration.
 	 */
-	ref_points = (unsigned int *)get_property(rtas_root,
+	ref_points = get_property(rtas_root,
 			"ibm,associativity-reference-points", &len);
 
 	if ((len >= 1) && ref_points) {
@@ -297,7 +296,7 @@
 	of_node_put(memory);
 }
 
-static unsigned long __devinit read_n_cells(int n, unsigned int **buf)
+static unsigned long __devinit read_n_cells(int n, const unsigned int **buf)
 {
 	unsigned long result = 0;
 
@@ -435,15 +434,13 @@
 		unsigned long size;
 		int nid;
 		int ranges;
-		unsigned int *memcell_buf;
+		const unsigned int *memcell_buf;
 		unsigned int len;
 
-		memcell_buf = (unsigned int *)get_property(memory,
+		memcell_buf = get_property(memory,
 			"linux,usable-memory", &len);
 		if (!memcell_buf || len <= 0)
-			memcell_buf =
-				(unsigned int *)get_property(memory, "reg",
-					&len);
+			memcell_buf = get_property(memory, "reg", &len);
 		if (!memcell_buf || len <= 0)
 			continue;
 
@@ -787,10 +784,10 @@
 	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
 		unsigned long start, size;
 		int ranges;
-		unsigned int *memcell_buf;
+		const unsigned int *memcell_buf;
 		unsigned int len;
 
-		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+		memcell_buf = get_property(memory, "reg", &len);
 		if (!memcell_buf || len <= 0)
 			continue;
 
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index de0c884..d373391 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,8 @@
 #include <asm/paca.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <linux/compiler.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -50,9 +52,32 @@
 	return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags;
 }
 
-static inline void create_slbe(unsigned long ea, unsigned long flags,
-			       unsigned long entry)
+static inline void slb_shadow_update(unsigned long esid, unsigned long vsid,
+				     unsigned long entry)
 {
+	/*
+	 * Clear the ESID first so the entry is not valid while we are
+	 * updating it.
+	 */
+	get_slb_shadow()->save_area[entry].esid = 0;
+	barrier();
+	get_slb_shadow()->save_area[entry].vsid = vsid;
+	barrier();
+	get_slb_shadow()->save_area[entry].esid = esid;
+
+}
+
+static inline void create_shadowed_slbe(unsigned long ea, unsigned long flags,
+					unsigned long entry)
+{
+	/*
+	 * Updating the shadow buffer before writing the SLB ensures
+	 * we don't get a stale entry here if we get preempted by PHYP
+	 * between these two statements.
+	 */
+	slb_shadow_update(mk_esid_data(ea, entry), mk_vsid_data(ea, flags),
+			  entry);
+
 	asm volatile("slbmte  %0,%1" :
 		     : "r" (mk_vsid_data(ea, flags)),
 		       "r" (mk_esid_data(ea, entry))
@@ -77,6 +102,10 @@
 	if ((ksp_esid_data & ESID_MASK) == PAGE_OFFSET)
 		ksp_esid_data &= ~SLB_ESID_V;
 
+	/* Only third entry (stack) may change here so only resave that */
+	slb_shadow_update(ksp_esid_data,
+			  mk_vsid_data(ksp_esid_data, lflags), 2);
+
 	/* We need to do this all in asm, so we're sure we don't touch
 	 * the stack between the slbia and rebolting it. */
 	asm volatile("isync\n"
@@ -209,9 +238,9 @@
 	asm volatile("isync":::"memory");
 	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
 	asm volatile("isync; slbia; isync":::"memory");
-	create_slbe(PAGE_OFFSET, lflags, 0);
+	create_shadowed_slbe(PAGE_OFFSET, lflags, 0);
 
-	create_slbe(VMALLOC_START, vflags, 1);
+	create_shadowed_slbe(VMALLOC_START, vflags, 1);
 
 	/* We don't bolt the stack for the time being - we're in boot,
 	 * so the stack is in the bolted segment.  By the time it goes
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index f6eef78..b58baa6 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -146,6 +146,7 @@
 		psize = mmu_huge_psize;
 #else
 		BUG();
+		psize = pte_pagesize_index(pte); /* shutup gcc */
 #endif
 	} else
 		psize = pte_pagesize_index(pte);
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
index cf3967a..969fbb6 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_itx.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -60,8 +60,8 @@
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np != 0) {
-		unsigned int *fp =
-		    (int *)get_property(np, "clock-frequency", NULL);
+		const unsigned int *fp =
+			get_property(np, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/83xx/mpc834x_sys.c b/arch/powerpc/platforms/83xx/mpc834x_sys.c
index 32df239..6771961 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_sys.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_sys.c
@@ -57,8 +57,8 @@
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np != 0) {
-		unsigned int *fp =
-		    (int *)get_property(np, "clock-frequency", NULL);
+		const unsigned int *fp =
+			get_property(np, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/83xx/pci.c b/arch/powerpc/platforms/83xx/pci.c
index 5d84a9c..4557ac5 100644
--- a/arch/powerpc/platforms/83xx/pci.c
+++ b/arch/powerpc/platforms/83xx/pci.c
@@ -59,7 +59,7 @@
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1, has_address = 0;
 	phys_addr_t immr = get_immrbase();
 
@@ -69,7 +69,7 @@
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *)get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 9d2acfb..cae6b73 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -121,9 +121,9 @@
 
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 1d357d3..4c1fede 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -241,9 +241,9 @@
 
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/85xx/pci.c b/arch/powerpc/platforms/85xx/pci.c
index 1d51f32..05930ee 100644
--- a/arch/powerpc/platforms/85xx/pci.c
+++ b/arch/powerpc/platforms/85xx/pci.c
@@ -41,7 +41,7 @@
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1, has_address = 0;
 	phys_addr_t immr = get_immrbase();
 
@@ -51,7 +51,7 @@
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 5e583cf..b637e81 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -347,9 +347,9 @@
 
 	np = of_find_node_by_type(NULL, "cpu");
 	if (np != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(np, "clock-frequency", NULL);
+		fp = get_property(np, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/86xx/pci.c b/arch/powerpc/platforms/86xx/pci.c
index a8c8f0a..481e18e 100644
--- a/arch/powerpc/platforms/86xx/pci.c
+++ b/arch/powerpc/platforms/86xx/pci.c
@@ -153,7 +153,7 @@
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int has_address = 0;
 	int primary = 0;
 
@@ -163,7 +163,7 @@
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int))
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 5cf46dc..e58fa95 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -13,5 +13,6 @@
 obj-$(CONFIG_PPC_PSERIES)	+= pseries/
 obj-$(CONFIG_PPC_ISERIES)	+= iseries/
 obj-$(CONFIG_PPC_MAPLE)		+= maple/
+obj-$(CONFIG_PPC_PASEMI)		+= pasemi/
 obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index ce696c1..3f3859d 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -97,7 +97,7 @@
 		struct cbe_regs_map *map = &cbe_regs_maps[cbe_regs_map_count++];
 
 		/* That hack must die die die ! */
-		struct address_prop {
+		const struct address_prop {
 			unsigned long address;
 			unsigned int len;
 		} __attribute__((packed)) *prop;
@@ -114,13 +114,11 @@
 			if (cbe_thread_map[i].cpu_node == cpu)
 				cbe_thread_map[i].regs = map;
 
-		prop = (struct address_prop *)get_property(cpu, "pervasive",
-							   NULL);
+		prop = get_property(cpu, "pervasive", NULL);
 		if (prop != NULL)
 			map->pmd_regs = ioremap(prop->address, prop->len);
 
-		prop = (struct address_prop *)get_property(cpu, "iic",
-							   NULL);
+		prop = get_property(cpu, "iic", NULL);
 		if (prop != NULL)
 			map->iic_regs = ioremap(prop->address, prop->len);
 	}
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index d7bbb61..6b57a47 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -89,17 +89,17 @@
 /* Get an IRQ number from the pending state register of the IIC */
 static unsigned int iic_get_irq(struct pt_regs *regs)
 {
-  	struct cbe_iic_pending_bits pending;
- 	struct iic *iic;
+	struct cbe_iic_pending_bits pending;
+	struct iic *iic;
 
- 	iic = &__get_cpu_var(iic);
- 	*(unsigned long *) &pending =
- 		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
- 	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
- 	BUG_ON(iic->eoi_ptr > 15);
+	iic = &__get_cpu_var(iic);
+	*(unsigned long *) &pending =
+		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
 	if (pending.flags & CBE_IIC_IRQ_VALID)
 		return irq_linear_revmap(iic->host,
- 					 iic_pending_to_hwnum(pending));
+					 iic_pending_to_hwnum(pending));
 	return NO_IRQ;
 }
 
@@ -250,16 +250,15 @@
 	struct resource r0, r1;
 	struct irq_host *host;
 	int found = 0;
- 	u32 *np;
+	const u32 *np;
 
 	for (dn = NULL;
 	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
 		if (!device_is_compatible(dn,
 				     "IBM,CBEA-Internal-Interrupt-Controller"))
 			continue;
- 		np = (u32 *)get_property(dn, "ibm,interrupt-server-ranges",
-					 NULL);
- 		if (np == NULL) {
+		np = get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		if (np == NULL) {
 			printk(KERN_WARNING "IIC: CPU association not found\n");
 			of_node_put(dn);
 			return -ENODEV;
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index a35004e..d2b20eb 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -308,15 +308,16 @@
 
 static void iommu_devnode_setup(struct device_node *d)
 {
-	unsigned int *ioid;
-	unsigned long *dma_window, map_start, map_size, token;
+	const unsigned int *ioid;
+	unsigned long map_start, map_size, token;
+	const unsigned long *dma_window;
 	struct cell_iommu *iommu;
 
-	ioid = (unsigned int *)get_property(d, "ioid", NULL);
+	ioid = get_property(d, "ioid", NULL);
 	if (!ioid)
 		pr_debug("No ioid entry found !\n");
 
-	dma_window = (unsigned long *)get_property(d, "ibm,dma-window", NULL);
+	dma_window = get_property(d, "ibm,dma-window", NULL);
 	if (!dma_window)
 		pr_debug("No ibm,dma-window entry found !\n");
 
@@ -371,8 +372,9 @@
 
 static int cell_map_iommu(void)
 {
-	unsigned int num_nodes = 0, *node_id;
-	unsigned long *base, *mmio_base;
+	unsigned int num_nodes = 0;
+	const unsigned int *node_id;
+	const unsigned long *base, *mmio_base;
 	struct device_node *dn;
 	struct cell_iommu *iommu = NULL;
 
@@ -381,7 +383,7 @@
 	for(dn = of_find_node_by_type(NULL, "cpu");
 	    dn;
 	    dn = of_find_node_by_type(dn, "cpu")) {
-		node_id = (unsigned int *)get_property(dn, "node-id", NULL);
+		node_id = get_property(dn, "node-id", NULL);
 
 		if (num_nodes < *node_id)
 			num_nodes = *node_id;
@@ -396,9 +398,9 @@
 	    dn;
 	    dn = of_find_node_by_type(dn, "cpu")) {
 
-		node_id = (unsigned int *)get_property(dn, "node-id", NULL);
-		base = (unsigned long *)get_property(dn, "ioc-cache", NULL);
-		mmio_base = (unsigned long *)get_property(dn, "ioc-translation", NULL);
+		node_id = get_property(dn, "node-id", NULL);
+		base = get_property(dn, "ioc-cache", NULL);
+		mmio_base = get_property(dn, "ioc-translation", NULL);
 
 		if (!base || !mmio_base || !node_id)
 			return cell_map_iommu_hardcoded(num_nodes);
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 282987d..22c228a 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -150,10 +150,6 @@
 	    !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
 		return 0;
 
-#ifdef CONFIG_UDBG_RTAS_CONSOLE
-	udbg_init_rtas_console();
-#endif
-
 	hpte_init_native();
 
 	return 1;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 46aef06..1c0acba 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -57,7 +57,7 @@
  */
 static cpumask_t of_spin_map;
 
-extern void pSeries_secondary_smp_init(unsigned long);
+extern void generic_secondary_smp_init(unsigned long);
 
 /**
  * smp_startup_cpu() - start the given cpu
@@ -74,7 +74,7 @@
 {
 	int status;
 	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       pSeries_secondary_smp_init));
+					       generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 15217bb..742a032 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -240,7 +240,7 @@
 static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 {
 	unsigned int virq;
-	u32 *imap, *tmp;
+	const u32 *imap, *tmp;
 	int imaplen, intsize, unit;
 	struct device_node *iic;
 	struct irq_host *iic_host;
@@ -258,25 +258,25 @@
 #endif
 
 	/* Now do the horrible hacks */
-	tmp = (u32 *)get_property(pic->of_node, "#interrupt-cells", NULL);
+	tmp = get_property(pic->of_node, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
-	imap = (u32 *)get_property(pic->of_node, "interrupt-map", &imaplen);
+	imap = get_property(pic->of_node, "interrupt-map", &imaplen);
 	if (imap == NULL || imaplen < (intsize + 1))
 		return NO_IRQ;
 	iic = of_find_node_by_phandle(imap[intsize]);
 	if (iic == NULL)
 		return NO_IRQ;
 	imap += intsize + 1;
-	tmp = (u32 *)get_property(iic, "#interrupt-cells", NULL);
+	tmp = get_property(iic, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
 	/* Assume unit is last entry of interrupt specifier */
 	unit = imap[intsize - 1];
 	/* Ok, we have a unit, now let's try to get the node */
-	tmp = (u32 *)get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	tmp = get_property(iic, "ibm,interrupt-server-ranges", NULL);
 	if (tmp == NULL) {
 		of_node_put(iic);
 		return NO_IRQ;
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index d06042d..3bd36d4 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -488,10 +488,10 @@
 
 static int __init find_spu_node_id(struct device_node *spe)
 {
-	unsigned int *id;
+	const unsigned int *id;
 	struct device_node *cpu;
 	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
+	id = get_property(cpu, "node-id", NULL);
 	return id ? *id : 0;
 }
 
@@ -500,7 +500,7 @@
 {
 	static DEFINE_MUTEX(add_spumem_mutex);
 
-	struct address_prop {
+	const struct address_prop {
 		unsigned long address;
 		unsigned int len;
 	} __attribute__((packed)) *p;
@@ -511,7 +511,7 @@
 	struct zone *zone;
 	int ret;
 
-	p = (void*)get_property(spe, prop, &proplen);
+	p = get_property(spe, prop, &proplen);
 	WARN_ON(proplen != sizeof (*p));
 
 	start_pfn = p->address >> PAGE_SHIFT;
@@ -531,12 +531,12 @@
 static void __iomem * __init map_spe_prop(struct spu *spu,
 		struct device_node *n, const char *name)
 {
-	struct address_prop {
+	const struct address_prop {
 		unsigned long address;
 		unsigned int len;
 	} __attribute__((packed)) *prop;
 
-	void *p;
+	const void *p;
 	int proplen;
 	void* ret = NULL;
 	int err = 0;
@@ -570,14 +570,14 @@
 {
 	struct irq_host *host;
 	unsigned int isrc;
-	u32 *tmp;
+	const u32 *tmp;
 
 	host = iic_get_irq_host(spu->node);
 	if (host == NULL)
 		return -ENODEV;
 
 	/* Get the interrupt source from the device-tree */
-	tmp = (u32 *)get_property(np, "isrc", NULL);
+	tmp = get_property(np, "isrc", NULL);
 	if (!tmp)
 		return -ENODEV;
 	spu->isrc = isrc = tmp[0];
@@ -593,7 +593,7 @@
 
 static int __init spu_map_device(struct spu *spu, struct device_node *node)
 {
-	char *prop;
+	const char *prop;
 	int ret;
 
 	ret = -ENODEV;
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index 150f67d..0dd4a64 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -67,13 +67,14 @@
 void __init chrp_nvram_init(void)
 {
 	struct device_node *nvram;
-	unsigned int *nbytes_p, proplen;
+	const unsigned int *nbytes_p;
+	unsigned int proplen;
 
 	nvram = of_find_node_by_type(NULL, "nvram");
 	if (nvram == NULL)
 		return;
 
-	nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
+	nbytes_p = get_property(nvram, "#bytes", &proplen);
 	if (nbytes_p == NULL || proplen != sizeof(unsigned int))
 		return;
 
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 6802cdc3..0f43405 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -214,11 +214,11 @@
 chrp_find_bridges(void)
 {
 	struct device_node *dev;
-	int *bus_range;
+	const int *bus_range;
 	int len, index = -1;
 	struct pci_controller *hose;
-	unsigned int *dma;
-	char *model, *machine;
+	const unsigned int *dma;
+	const char *model, *machine;
 	int is_longtrail = 0, is_mot = 0, is_pegasos = 0;
 	struct device_node *root = find_path_device("/");
 	struct resource r;
@@ -246,7 +246,7 @@
 			       dev->full_name);
 			continue;
 		}
-		bus_range = (int *) get_property(dev, "bus-range", &len);
+		bus_range = get_property(dev, "bus-range", &len);
 		if (bus_range == NULL || len < 2 * sizeof(int)) {
 			printk(KERN_WARNING "Can't get bus-range for %s\n",
 				dev->full_name);
@@ -257,7 +257,7 @@
 		else
 			printk(KERN_INFO "PCI buses %d..%d",
 			       bus_range[0], bus_range[1]);
-		printk(" controlled by %s", dev->type);
+		printk(" controlled by %s", dev->full_name);
 		if (!is_longtrail)
 			printk(" at %llx", (unsigned long long)r.start);
 		printk("\n");
@@ -289,6 +289,19 @@
 			setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc);
 		} else if (is_pegasos == 2) {
 			setup_peg2(hose, dev);
+		} else if (!strncmp(model, "IBM,CPC710", 10)) {
+			setup_indirect_pci(hose,
+					   r.start + 0x000f8000,
+					   r.start + 0x000f8010);
+			if (index == 0) {
+				dma = get_property(dev, "system-dma-base",&len);
+				if (dma && len >= sizeof(*dma)) {
+					dma = (unsigned int *)
+						(((unsigned long)dma) +
+						len - sizeof(*dma));
+						pci_dram_offset = *dma;
+				}
+			}
 		} else {
 			printk("No methods for %s (model %s), using RTAS\n",
 			       dev->full_name, model);
@@ -299,15 +312,35 @@
 
 		/* check the first bridge for a property that we can
 		   use to set pci_dram_offset */
-		dma = (unsigned int *)
-			get_property(dev, "ibm,dma-ranges", &len);
+		dma = get_property(dev, "ibm,dma-ranges", &len);
 		if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) {
 			pci_dram_offset = dma[2] - dma[3];
 			printk("pci_dram_offset = %lx\n", pci_dram_offset);
 		}
 	}
-
-	/* Do not fixup interrupts from OF tree on pegasos */
-	if (is_pegasos)
-		ppc_md.pcibios_fixup = NULL;
 }
+
+/* SL82C105 IDE Control/Status Register */
+#define SL82C105_IDECSR                0x40
+
+/* Fixup for Winbond ATA quirk, required for briq */
+void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105)
+{
+	u8 progif;
+
+	/* If non-briq machines need that fixup too, please speak up */
+	if (!machine_is(chrp) || _chrp_type != _CHRP_briq)
+		return;
+
+	if ((sl82c105->class & 5) != 5) {
+		printk("W83C553: Switching SL82C105 IDE to PCI native mode\n");
+		/* Enable SL82C105 PCI native IDE mode */
+		pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif);
+		pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05);
+		sl82c105->class |= 0x05;
+		/* Disable SL82C105 second port */
+		pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003);
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+		chrp_pci_fixup_winbond_ata);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 9c08ff3..488dbd9 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -74,6 +74,9 @@
 
 extern unsigned long loops_per_jiffy;
 
+/* To be replaced by RTAS when available */
+static unsigned int *briq_SPOR;
+
 #ifdef CONFIG_SMP
 extern struct smp_ops_t chrp_smp_ops;
 #endif
@@ -92,6 +95,15 @@
 	"Disabled", "Write-Through", "Copy-Back", "Transparent Mode"
 };
 
+static const char *chrp_names[] = {
+	"Unknown",
+	"","","",
+	"Motorola",
+	"IBM or Longtrail",
+	"Genesi Pegasos",
+	"Total Impact Briq"
+};
+
 void chrp_show_cpuinfo(struct seq_file *m)
 {
 	int i, sdramen;
@@ -214,8 +226,7 @@
 	/* Enable L2 cache if needed */
 	np = find_type_devices("cpu");
 	if (np != NULL) {
-		unsigned int *l2cr = (unsigned int *)
-			get_property (np, "l2cr", NULL);
+		const unsigned int *l2cr = get_property(np, "l2cr", NULL);
 		if (l2cr == NULL) {
 			printk ("Pegasos l2cr : no cpu l2cr property found\n");
 			return;
@@ -229,10 +240,18 @@
 	}
 }
 
+static void briq_restart(char *cmd)
+{
+	local_irq_disable();
+	if (briq_SPOR)
+		out_be32(briq_SPOR, 0);
+	for(;;);
+}
+
 void __init chrp_setup_arch(void)
 {
 	struct device_node *root = find_path_device ("/");
-	char *machine = NULL;
+	const char *machine = NULL;
 
 	/* init to some ~sane value until calibrate_delay() runs */
 	loops_per_jiffy = 50000000/HZ;
@@ -245,11 +264,16 @@
 		_chrp_type = _CHRP_IBM;
 	} else if (machine && strncmp(machine, "MOT", 3) == 0) {
 		_chrp_type = _CHRP_Motorola;
+	} else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) {
+		_chrp_type = _CHRP_briq;
+		/* Map the SPOR register on briq and change the restart hook */
+		briq_SPOR = (unsigned int *)ioremap(0xff0000e8, 4);
+		ppc_md.restart = briq_restart;
 	} else {
 		/* Let's assume it is an IBM chrp if all else fails */
 		_chrp_type = _CHRP_IBM;
 	}
-	printk("chrp type = %x\n", _chrp_type);
+	printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]);
 
 	rtas_initialize();
 	if (rtas_token("display-character") >= 0)
@@ -328,7 +352,7 @@
 	struct device_node *np, *root;
 	int len, i, j;
 	int isu_size, idu_size;
-	unsigned int *iranges, *opprop = NULL;
+	const unsigned int *iranges, *opprop = NULL;
 	int oplen = 0;
 	unsigned long opaddr;
 	int na = 1;
@@ -338,8 +362,7 @@
 		return;
 	root = of_find_node_by_path("/");
 	if (root) {
-		opprop = (unsigned int *) get_property
-			(root, "platform-open-pic", &oplen);
+		opprop = get_property(root, "platform-open-pic", &oplen);
 		na = prom_n_addr_cells(root);
 	}
 	if (opprop && oplen >= na * sizeof(unsigned int)) {
@@ -356,7 +379,7 @@
 
 	printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
 
-	iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len);
+	iranges = get_property(np, "interrupt-ranges", &len);
 	if (iranges == NULL)
 		len = 0;	/* non-distributed mpic */
 	else
@@ -442,8 +465,8 @@
 	 * from anyway
 	 */
 	for (np = find_devices("pci"); np != NULL; np = np->next) {
-		unsigned int *addrp = (unsigned int *)
-			get_property(np, "8259-interrupt-acknowledge", NULL);
+		const unsigned int *addrp = get_property(np,
+				"8259-interrupt-acknowledge", NULL);
 
 		if (addrp == NULL)
 			continue;
@@ -502,7 +525,7 @@
 chrp_init2(void)
 {
 	struct device_node *device;
-	unsigned int *p = NULL;
+	const unsigned int *p = NULL;
 
 #ifdef CONFIG_NVRAM
 	chrp_nvram_init();
@@ -520,8 +543,7 @@
 	 */
 	device = find_devices("rtas");
 	if (device)
-		p = (unsigned int *) get_property
-			(device, "rtas-event-scan-rate", NULL);
+		p = get_property(device, "rtas-event-scan-rate", NULL);
 	if (p && *p) {
 		/*
 		 * Arrange to call chrp_event_scan at least *p times
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 5d393eb..e4f2b9d 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -95,7 +95,7 @@
 {
 	struct pci_controller *hose;
 	struct device_node *node;
-	unsigned int *interrupt;
+	const unsigned int *interrupt;
 	int busnr;
 	int len;
 	u8 slot;
@@ -112,7 +112,7 @@
 	if (!node)
 		printk(KERN_ERR "No pci node found\n");
 
-	interrupt = (unsigned int *) get_property(node, "interrupt-map", &len);
+	interrupt = get_property(node, "interrupt-map", &len);
 	slot = find_slot_by_devfn(interrupt, dev->devfn);
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 	if (pin == 0 || pin > 4)
@@ -141,9 +141,9 @@
 
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != 0) {
-		unsigned int *fp;
+		const unsigned int *fp;
 
-		fp = (int *)get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != 0)
 			loops_per_jiffy = *fp / HZ;
 		else
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index 3d957a3..887b688 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -3,13 +3,17 @@
 	depends on PPC_ISERIES
 
 config VIOCONS
-	tristate "iSeries Virtual Console Support"
+	tristate "iSeries Virtual Console Support (Obsolete)"
+	help
+	  This is the old virtual console driver for legacy iSeries.
+	  You should use the iSeries Hypervisor Virtual Console
+	  support instead.
 
 config VIODASD
 	tristate "iSeries Virtual I/O disk support"
 	help
 	  If you are running on an iSeries system and you want to use
- 	  virtual disks created and managed by OS/400, say Y.
+	  virtual disks created and managed by OS/400, say Y.
 
 config VIOCD
 	tristate "iSeries Virtual I/O CD support"
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index d194140..e305dee 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -1,5 +1,6 @@
 /*
- *    Copyright (c) 2005-2006 Michael Ellerman, IBM Corporation
+ *    Copyright (C) 2005-2006 Michael Ellerman, IBM Corporation
+ *    Copyright (C) 2000-2004, IBM Corporation
  *
  *    Description:
  *      This file contains all the routines to build a flattened device
@@ -33,13 +34,13 @@
 #include <asm/iseries/hv_types.h>
 #include <asm/iseries/hv_lp_config.h>
 #include <asm/iseries/hv_call_xm.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/udbg.h>
 
 #include "processor_vpd.h"
 #include "call_hpt.h"
 #include "call_pci.h"
 #include "pci.h"
+#include "it_exp_vpd_panel.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -76,6 +77,43 @@
 static char __initdata device_type_vdevice[] = "vdevice";
 static char __initdata device_type_vscsi[] = "vscsi";
 
+
+/* EBCDIC to ASCII conversion routines */
+
+static unsigned char __init e2a(unsigned char x)
+{
+	switch (x) {
+	case 0x81 ... 0x89:
+		return x - 0x81 + 'a';
+	case 0x91 ... 0x99:
+		return x - 0x91 + 'j';
+	case 0xA2 ... 0xA9:
+		return x - 0xA2 + 's';
+	case 0xC1 ... 0xC9:
+		return x - 0xC1 + 'A';
+	case 0xD1 ... 0xD9:
+		return x - 0xD1 + 'J';
+	case 0xE2 ... 0xE9:
+		return x - 0xE2 + 'S';
+	case 0xF0 ... 0xF9:
+		return x - 0xF0 + '0';
+	}
+	return ' ';
+}
+
+static unsigned char * __init strne2a(unsigned char *dest,
+		const unsigned char *src, size_t n)
+{
+	int i;
+
+	n = strnlen(src, n);
+
+	for (i = 0; i < n; i++)
+		dest[i] = e2a(src[i]);
+
+	return dest;
+}
+
 static struct iseries_flat_dt * __init dt_init(void)
 {
 	struct iseries_flat_dt *dt;
@@ -298,7 +336,8 @@
 	dt_prop_u32(dt, "#address-cells", 1);
 	dt_prop_u32(dt, "#size-cells", 0);
 
-	dt_do_vdevice(dt, "vty", reg, -1, device_type_serial, NULL, 1);
+	dt_do_vdevice(dt, "vty", reg, -1, device_type_serial,
+			"IBM,iSeries-vty", 1);
 	reg++;
 
 	dt_do_vdevice(dt, "v-scsi", reg, -1, device_type_vscsi,
diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c
index 663a1af..f0475f0 100644
--- a/arch/powerpc/platforms/iseries/hvlpconfig.c
+++ b/arch/powerpc/platforms/iseries/hvlpconfig.c
@@ -18,9 +18,22 @@
 
 #include <linux/module.h>
 #include <asm/iseries/hv_lp_config.h>
+#include "it_lp_naca.h"
 
 HvLpIndex HvLpConfig_getLpIndex_outline(void)
 {
 	return HvLpConfig_getLpIndex();
 }
 EXPORT_SYMBOL(HvLpConfig_getLpIndex_outline);
+
+HvLpIndex HvLpConfig_getLpIndex(void)
+{
+	return itLpNaca.xLpIndex;
+}
+EXPORT_SYMBOL(HvLpConfig_getLpIndex);
+
+HvLpIndex HvLpConfig_getPrimaryLpIndex(void)
+{
+	return itLpNaca.xPrimaryLpIndex;
+}
+EXPORT_SYMBOL_GPL(HvLpConfig_getPrimaryLpIndex);
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index e3bd201..f4cbbcf 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -88,6 +88,23 @@
 }
 
 /*
+ * Structure passed to HvCallXm_getTceTableParms
+ */
+struct iommu_table_cb {
+	unsigned long	itc_busno;	/* Bus number for this tce table */
+	unsigned long	itc_start;	/* Will be NULL for secondary */
+	unsigned long	itc_totalsize;	/* Size (in pages) of whole table */
+	unsigned long	itc_offset;	/* Index into real tce table of the
+					   start of our section */
+	unsigned long	itc_size;	/* Size (in pages) of our section */
+	unsigned long	itc_index;	/* Index of this tce table */
+	unsigned short	itc_maxtables;	/* Max num of tables for partition */
+	unsigned char	itc_virtbus;	/* Flag to indicate virtual bus */
+	unsigned char	itc_slotno;	/* IOA Tce Slot Index */
+	unsigned char	itc_rsvd[4];
+};
+
+/*
  * Call Hv with the architected data structure to get TCE table info.
  * info. Put the returned data into the Linux representation of the
  * TCE table data.
@@ -162,7 +179,7 @@
 {
 	struct iommu_table *tbl;
 	struct pci_dn *pdn = PCI_DN(dn);
-	u32 *lsn = (u32 *)get_property(dn, "linux,logical-slot-number", NULL);
+	const u32 *lsn = get_property(dn, "linux,logical-slot-number", NULL);
 
 	BUG_ON(lsn == NULL);
 
diff --git a/include/asm-powerpc/iseries/it_exp_vpd_panel.h b/arch/powerpc/platforms/iseries/it_exp_vpd_panel.h
similarity index 89%
rename from include/asm-powerpc/iseries/it_exp_vpd_panel.h
rename to arch/powerpc/platforms/iseries/it_exp_vpd_panel.h
index 304a609..6de9097 100644
--- a/include/asm-powerpc/iseries/it_exp_vpd_panel.h
+++ b/arch/powerpc/platforms/iseries/it_exp_vpd_panel.h
@@ -15,8 +15,8 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H
-#define _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H
+#ifndef _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H
+#define _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H
 
 /*
  *	This struct maps the panel information
@@ -48,4 +48,4 @@
 
 extern struct ItExtVpdPanel	xItExtVpdPanel;
 
-#endif /* _ASM_POWERPC_ISERIES_IT_EXT_VPD_PANEL_H */
+#endif /* _PLATFORMS_ISERIES_IT_EXT_VPD_PANEL_H */
diff --git a/include/asm-powerpc/iseries/it_lp_naca.h b/arch/powerpc/platforms/iseries/it_lp_naca.h
similarity index 95%
rename from include/asm-powerpc/iseries/it_lp_naca.h
rename to arch/powerpc/platforms/iseries/it_lp_naca.h
index 4fdcf05..9bbf589 100644
--- a/include/asm-powerpc/iseries/it_lp_naca.h
+++ b/arch/powerpc/platforms/iseries/it_lp_naca.h
@@ -15,8 +15,8 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
-#ifndef _ASM_POWERPC_ISERIES_IT_LP_NACA_H
-#define _ASM_POWERPC_ISERIES_IT_LP_NACA_H
+#ifndef _PLATFORMS_ISERIES_IT_LP_NACA_H
+#define _PLATFORMS_ISERIES_IT_LP_NACA_H
 
 #include <linux/types.h>
 
@@ -77,4 +77,4 @@
 #define ITLPNACA_HWSYNCEDTBS	0x20	/* Hardware synced TBs */
 #define ITLPNACA_HMTINT		0x10	/* Utilize MHT for interrupts */
 
-#endif /* _ASM_POWERPC_ISERIES_IT_LP_NACA_H */
+#endif /* _PLATFORMS_ISERIES_IT_LP_NACA_H */
diff --git a/arch/powerpc/platforms/iseries/lpardata.c b/arch/powerpc/platforms/iseries/lpardata.c
index a776944..8162049 100644
--- a/arch/powerpc/platforms/iseries/lpardata.c
+++ b/arch/powerpc/platforms/iseries/lpardata.c
@@ -13,12 +13,10 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/abs_addr.h>
-#include <asm/iseries/it_lp_naca.h>
 #include <asm/lppaca.h>
 #include <asm/iseries/it_lp_reg_save.h>
 #include <asm/paca.h>
 #include <asm/iseries/lpar_map.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/iseries/it_lp_queue.h>
 
 #include "naca.h"
@@ -27,6 +25,8 @@
 #include "ipl_parms.h"
 #include "processor_vpd.h"
 #include "release_data.h"
+#include "it_exp_vpd_panel.h"
+#include "it_lp_naca.h"
 
 /* The HvReleaseData is the root of the information shared between
  * the hypervisor and Linux.
@@ -127,14 +127,12 @@
 		(u64)instruction_access_slb_iSeries /* 0x480 I-SLB */
 	}
 };
-EXPORT_SYMBOL(itLpNaca);
 
 /* May be filled in by the hypervisor so cannot end up in the BSS */
 struct ItIplParmsReal xItIplParmsReal __attribute__((__section__(".data")));
 
 /* May be filled in by the hypervisor so cannot end up in the BSS */
 struct ItExtVpdPanel xItExtVpdPanel __attribute__((__section__(".data")));
-EXPORT_SYMBOL(xItExtVpdPanel);
 
 #define maxPhysicalProcessors 32
 
diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c
index 2a9f81e..98c1c24 100644
--- a/arch/powerpc/platforms/iseries/lpevents.c
+++ b/arch/powerpc/platforms/iseries/lpevents.c
@@ -20,7 +20,7 @@
 #include <asm/iseries/it_lp_queue.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_call_event.h>
-#include <asm/iseries/it_lp_naca.h>
+#include "it_lp_naca.h"
 
 /*
  * The LpQueue is used to pass event data from the hypervisor to
diff --git a/arch/powerpc/platforms/iseries/main_store.h b/arch/powerpc/platforms/iseries/main_store.h
index 74f6889..1a7a3f5 100644
--- a/arch/powerpc/platforms/iseries/main_store.h
+++ b/arch/powerpc/platforms/iseries/main_store.h
@@ -61,9 +61,9 @@
 };
 
 /* Main Store VPD for Power4 */
-struct IoHriMainStoreChipInfo1 {
-	u32	chipMfgID	__attribute((packed));
-	char	chipECLevel[4]	__attribute((packed));
+struct __attribute((packed)) IoHriMainStoreChipInfo1 {
+	u32	chipMfgID;
+	char	chipECLevel[4];
 };
 
 struct IoHriMainStoreVpdIdData {
@@ -73,72 +73,72 @@
 	char	serialNumber[12];
 };
 
-struct IoHriMainStoreVpdFruData {
-	char	fruLabel[8]	__attribute((packed));
-	u8	numberOfSlots	__attribute((packed));
-	u8	pluggingType	__attribute((packed));
-	u16	slotMapIndex	__attribute((packed));
+struct	__attribute((packed)) IoHriMainStoreVpdFruData {
+	char	fruLabel[8];
+	u8	numberOfSlots;
+	u8	pluggingType;
+	u16	slotMapIndex;
 };
 
-struct IoHriMainStoreAdrRangeBlock {
-	void	*blockStart      __attribute((packed));
-	void	*blockEnd        __attribute((packed));
-	u32	blockProcChipId __attribute((packed));
+struct  __attribute((packed)) IoHriMainStoreAdrRangeBlock {
+	void	*blockStart;
+	void	*blockEnd;
+	u32	blockProcChipId;
 };
 
 #define MaxAreaAdrRangeBlocks 4
 
-struct IoHriMainStoreArea4 {
-	u32	msVpdFormat			__attribute((packed));
-	u8	containedVpdType		__attribute((packed));
-	u8	reserved1			__attribute((packed));
-	u16	reserved2			__attribute((packed));
+struct __attribute((packed)) IoHriMainStoreArea4 {
+	u32	msVpdFormat;
+	u8	containedVpdType;
+	u8	reserved1;
+	u16	reserved2;
 
-	u64	msExists			__attribute((packed));
-	u64	msFunctional			__attribute((packed));
+	u64	msExists;
+	u64	msFunctional;
 
-	u32	memorySize			__attribute((packed));
-	u32	procNodeId			__attribute((packed));
+	u32	memorySize;
+	u32	procNodeId;
 
-	u32	numAdrRangeBlocks		__attribute((packed));
-	struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks]	__attribute((packed));
+	u32	numAdrRangeBlocks;
+	struct IoHriMainStoreAdrRangeBlock xAdrRangeBlock[MaxAreaAdrRangeBlocks];
 
-	struct IoHriMainStoreChipInfo1	chipInfo0	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo1	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo2	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo3	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo4	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo5	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo6	__attribute((packed));
-	struct IoHriMainStoreChipInfo1	chipInfo7	__attribute((packed));
+	struct IoHriMainStoreChipInfo1	chipInfo0;
+	struct IoHriMainStoreChipInfo1	chipInfo1;
+	struct IoHriMainStoreChipInfo1	chipInfo2;
+	struct IoHriMainStoreChipInfo1	chipInfo3;
+	struct IoHriMainStoreChipInfo1	chipInfo4;
+	struct IoHriMainStoreChipInfo1	chipInfo5;
+	struct IoHriMainStoreChipInfo1	chipInfo6;
+	struct IoHriMainStoreChipInfo1	chipInfo7;
 
-	void	*msRamAreaArray			__attribute((packed));
-	u32	msRamAreaArrayNumEntries	__attribute((packed));
-	u32	msRamAreaArrayEntrySize		__attribute((packed));
+	void	*msRamAreaArray;
+	u32	msRamAreaArrayNumEntries;
+	u32	msRamAreaArrayEntrySize;
 
-	u32	numaDimmExists			__attribute((packed));
-	u32	numaDimmFunctional		__attribute((packed));
-	void	*numaDimmArray			__attribute((packed));
-	u32	numaDimmArrayNumEntries		__attribute((packed));
-	u32	numaDimmArrayEntrySize		__attribute((packed));
+	u32	numaDimmExists;
+	u32	numaDimmFunctional;
+	void	*numaDimmArray;
+	u32	numaDimmArrayNumEntries;
+	u32	numaDimmArrayEntrySize;
 
-	struct IoHriMainStoreVpdIdData idData	__attribute((packed));
+	struct IoHriMainStoreVpdIdData idData;
 
-	u64	powerData			__attribute((packed));
-	u64	cardAssemblyPartNum		__attribute((packed));
-	u64	chipSerialNum			__attribute((packed));
+	u64	powerData;
+	u64	cardAssemblyPartNum;
+	u64	chipSerialNum;
 
-	u64	reserved3			__attribute((packed));
-	char	reserved4[16]			__attribute((packed));
+	u64	reserved3;
+	char	reserved4[16];
 
-	struct IoHriMainStoreVpdFruData fruData	__attribute((packed));
+	struct IoHriMainStoreVpdFruData fruData;
 
-	u8	vpdPortNum			__attribute((packed));
-	u8	reserved5			__attribute((packed));
-	u8	frameId				__attribute((packed));
-	u8	rackUnit			__attribute((packed));
-	char	asciiKeywordVpd[256]		__attribute((packed));
-	u32	reserved6			__attribute((packed));
+	u8	vpdPortNum;
+	u8	reserved5;
+	u8	frameId;
+	u8	rackUnit;
+	char	asciiKeywordVpd[256];
+	u32	reserved6;
 };
 
 
diff --git a/arch/powerpc/platforms/iseries/pci.c b/arch/powerpc/platforms/iseries/pci.c
index 35bcc98..3eb1206 100644
--- a/arch/powerpc/platforms/iseries/pci.c
+++ b/arch/powerpc/platforms/iseries/pci.c
@@ -34,6 +34,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/iommu.h>
 #include <asm/abs_addr.h>
+#include <asm/firmware.h>
 
 #include <asm/iseries/hv_call_xm.h>
 #include <asm/iseries/mf.h>
@@ -176,12 +177,12 @@
 	}
 	while ((node = of_get_next_child(root, node)) != NULL) {
 		HvBusNumber bus;
-		u32 *busp;
+		const u32 *busp;
 
 		if ((node->type == NULL) || (strcmp(node->type, "pci") != 0))
 			continue;
 
-		busp = (u32 *)get_property(node, "bus-range", NULL);
+		busp = get_property(node, "bus-range", NULL);
 		if (busp == NULL)
 			continue;
 		bus = *busp;
@@ -221,10 +222,9 @@
 
 		if (node != NULL) {
 			struct pci_dn *pdn = PCI_DN(node);
-			u32 *agent;
+			const u32 *agent;
 
-			agent = (u32 *)get_property(node, "linux,agent-id",
-					NULL);
+			agent = get_property(node, "linux,agent-id", NULL);
 			if ((pdn != NULL) && (agent != NULL)) {
 				u8 irq = iSeries_allocate_IRQ(pdn->busno, 0,
 						pdn->bussubno);
@@ -271,46 +271,6 @@
 }
 
 /*
- * I/0 Memory copy MUST use mmio commands on iSeries
- * To do; For performance, include the hv call directly
- */
-void iSeries_memset_io(volatile void __iomem *dest, char c, size_t Count)
-{
-	u8 ByteValue = c;
-	long NumberOfBytes = Count;
-
-	while (NumberOfBytes > 0) {
-		iSeries_Write_Byte(ByteValue, dest++);
-		-- NumberOfBytes;
-	}
-}
-EXPORT_SYMBOL(iSeries_memset_io);
-
-void iSeries_memcpy_toio(volatile void __iomem *dest, void *source, size_t count)
-{
-	char *src = source;
-	long NumberOfBytes = count;
-
-	while (NumberOfBytes > 0) {
-		iSeries_Write_Byte(*src++, dest++);
-		-- NumberOfBytes;
-	}
-}
-EXPORT_SYMBOL(iSeries_memcpy_toio);
-
-void iSeries_memcpy_fromio(void *dest, const volatile void __iomem *src, size_t count)
-{
-	char *dst = dest;
-	long NumberOfBytes = count;
-
-	while (NumberOfBytes > 0) {
-		*dst++ = iSeries_Read_Byte(src++);
-		-- NumberOfBytes;
-	}
-}
-EXPORT_SYMBOL(iSeries_memcpy_fromio);
-
-/*
  * Look down the chain to find the matching Device Device
  */
 static struct device_node *find_Device_Node(int bus, int devfn)
@@ -492,7 +452,7 @@
  * iSeries_Read_Word = Read Word  (16 bit)
  * iSeries_Read_Long = Read Long  (32 bit)
  */
-u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
+static u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -519,9 +479,8 @@
 
 	return (u8)ret.value;
 }
-EXPORT_SYMBOL(iSeries_Read_Byte);
 
-u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
+static u16 iSeries_Read_Word(const volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -549,9 +508,8 @@
 
 	return swab16((u16)ret.value);
 }
-EXPORT_SYMBOL(iSeries_Read_Word);
 
-u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
+static u32 iSeries_Read_Long(const volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -579,7 +537,6 @@
 
 	return swab32((u32)ret.value);
 }
-EXPORT_SYMBOL(iSeries_Read_Long);
 
 /*
  * Write MM I/O Instructions for the iSeries
@@ -588,7 +545,7 @@
  * iSeries_Write_Word = Write Word(16 bit)
  * iSeries_Write_Long = Write Long(32 bit)
  */
-void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
+static void iSeries_Write_Byte(u8 data, volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -613,9 +570,8 @@
 		rc = HvCall4(HvCallPciBarStore8, dsa, BarOffset, data, 0);
 	} while (CheckReturnCode("WWB", DevNode, &retry, rc) != 0);
 }
-EXPORT_SYMBOL(iSeries_Write_Byte);
 
-void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
+static void iSeries_Write_Word(u16 data, volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -640,9 +596,8 @@
 		rc = HvCall4(HvCallPciBarStore16, dsa, BarOffset, swab16(data), 0);
 	} while (CheckReturnCode("WWW", DevNode, &retry, rc) != 0);
 }
-EXPORT_SYMBOL(iSeries_Write_Word);
 
-void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
+static void iSeries_Write_Long(u32 data, volatile void __iomem *IoAddress)
 {
 	u64 BarOffset;
 	u64 dsa;
@@ -667,4 +622,224 @@
 		rc = HvCall4(HvCallPciBarStore32, dsa, BarOffset, swab32(data), 0);
 	} while (CheckReturnCode("WWL", DevNode, &retry, rc) != 0);
 }
-EXPORT_SYMBOL(iSeries_Write_Long);
+
+extern unsigned char __raw_readb(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned char __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readb);
+
+extern unsigned short __raw_readw(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned short __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readw);
+
+extern unsigned int __raw_readl(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned int __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readl);
+
+extern unsigned long __raw_readq(const volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return *(volatile unsigned long __force *)addr;
+}
+EXPORT_SYMBOL(__raw_readq);
+
+extern void __raw_writeb(unsigned char v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned char __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writeb);
+
+extern void __raw_writew(unsigned short v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned short __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writew);
+
+extern void __raw_writel(unsigned int v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned int __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writel);
+
+extern void __raw_writeq(unsigned long v, volatile void __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	*(volatile unsigned long __force *)addr = v;
+}
+EXPORT_SYMBOL(__raw_writeq);
+
+int in_8(const volatile unsigned char __iomem *addr)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		return iSeries_Read_Byte(addr);
+	return __in_8(addr);
+}
+EXPORT_SYMBOL(in_8);
+
+void out_8(volatile unsigned char __iomem *addr, int val)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		iSeries_Write_Byte(val, addr);
+	else
+		__out_8(addr, val);
+}
+EXPORT_SYMBOL(out_8);
+
+int in_le16(const volatile unsigned short __iomem *addr)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		return iSeries_Read_Word(addr);
+	return __in_le16(addr);
+}
+EXPORT_SYMBOL(in_le16);
+
+int in_be16(const volatile unsigned short __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_be16(addr);
+}
+EXPORT_SYMBOL(in_be16);
+
+void out_le16(volatile unsigned short __iomem *addr, int val)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		iSeries_Write_Word(val, addr);
+	else
+		__out_le16(addr, val);
+}
+EXPORT_SYMBOL(out_le16);
+
+void out_be16(volatile unsigned short __iomem *addr, int val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_be16(addr, val);
+}
+EXPORT_SYMBOL(out_be16);
+
+unsigned in_le32(const volatile unsigned __iomem *addr)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		return iSeries_Read_Long(addr);
+	return __in_le32(addr);
+}
+EXPORT_SYMBOL(in_le32);
+
+unsigned in_be32(const volatile unsigned __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_be32(addr);
+}
+EXPORT_SYMBOL(in_be32);
+
+void out_le32(volatile unsigned __iomem *addr, int val)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES))
+		iSeries_Write_Long(val, addr);
+	else
+		__out_le32(addr, val);
+}
+EXPORT_SYMBOL(out_le32);
+
+void out_be32(volatile unsigned __iomem *addr, int val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_be32(addr, val);
+}
+EXPORT_SYMBOL(out_be32);
+
+unsigned long in_le64(const volatile unsigned long __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_le64(addr);
+}
+EXPORT_SYMBOL(in_le64);
+
+unsigned long in_be64(const volatile unsigned long __iomem *addr)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	return __in_be64(addr);
+}
+EXPORT_SYMBOL(in_be64);
+
+void out_le64(volatile unsigned long __iomem *addr, unsigned long val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_le64(addr, val);
+}
+EXPORT_SYMBOL(out_le64);
+
+void out_be64(volatile unsigned long __iomem *addr, unsigned long val)
+{
+	BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
+
+	__out_be64(addr, val);
+}
+EXPORT_SYMBOL(out_be64);
+
+void memset_io(volatile void __iomem *addr, int c, unsigned long n)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+		volatile char __iomem *d = addr;
+
+		while (n-- > 0) {
+			iSeries_Write_Byte(c, d++);
+		}
+	} else
+		eeh_memset_io(addr, c, n);
+}
+EXPORT_SYMBOL(memset_io);
+
+void memcpy_fromio(void *dest, const volatile void __iomem *src,
+                                 unsigned long n)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+		char *d = dest;
+		const volatile char __iomem *s = src;
+
+		while (n-- > 0) {
+			*d++ = iSeries_Read_Byte(s++);
+		}
+	} else
+		eeh_memcpy_fromio(dest, src, n);
+}
+EXPORT_SYMBOL(memcpy_fromio);
+
+void memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
+{
+	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
+		const char *s = src;
+		volatile char __iomem *d = dest;
+
+		while (n-- > 0) {
+			iSeries_Write_Byte(*s++, d++);
+		}
+	} else
+		eeh_memcpy_toio(dest, src, n);
+}
+EXPORT_SYMBOL(memcpy_toio);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index c9605d7..7f19530 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -59,6 +59,7 @@
 #include "irq.h"
 #include "vpd_areas.h"
 #include "processor_vpd.h"
+#include "it_lp_naca.h"
 #include "main_store.h"
 #include "call_sm.h"
 #include "call_hpt.h"
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index 622a301..9baa4ee 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -41,8 +41,8 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/prom.h>
 #include <asm/iseries/hv_types.h>
-#include <asm/iseries/it_exp_vpd_panel.h>
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_lp_config.h>
 #include <asm/iseries/mf.h>
@@ -116,6 +116,8 @@
 	dma_addr_t handle;
 	HvLpEvent_Rc hvrc;
 	DECLARE_MUTEX_LOCKED(Semaphore);
+	struct device_node *node;
+	const char *sysid;
 
 	buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -143,20 +145,26 @@
 
 	buf[HW_PAGE_SIZE-1] = '\0';
 	seq_printf(m, "%s", buf);
-	seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
-	seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
-		   e2a(xItExtVpdPanel.mfgID[2]),
-		   e2a(xItExtVpdPanel.mfgID[3]),
-		   e2a(xItExtVpdPanel.systemSerial[1]),
-		   e2a(xItExtVpdPanel.systemSerial[2]),
-		   e2a(xItExtVpdPanel.systemSerial[3]),
-		   e2a(xItExtVpdPanel.systemSerial[4]),
-		   e2a(xItExtVpdPanel.systemSerial[5]));
 
 	dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
 			 DMA_FROM_DEVICE);
 	kfree(buf);
 
+	seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
+
+	node = of_find_node_by_path("/");
+	sysid = NULL;
+	if (node != NULL)
+		sysid = get_property(node, "system-id", NULL);
+
+	if (sysid == NULL)
+		seq_printf(m, "SRLNBR=<UNKNOWN>\n");
+	else
+		/* Skip "IBM," on front of serial number, see dt.c */
+		seq_printf(m, "SRLNBR=%s\n", sysid + 4);
+
+	of_node_put(node);
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/iseries/vpdinfo.c b/arch/powerpc/platforms/iseries/vpdinfo.c
index 23a6d1e..9f83878 100644
--- a/arch/powerpc/platforms/iseries/vpdinfo.c
+++ b/arch/powerpc/platforms/iseries/vpdinfo.c
@@ -188,7 +188,7 @@
 {
 	u8 *TagPtr = VpdData;
 	int DataLen = VpdDataLen - 3;
-	u8 PhbId;
+	u8 PhbId = 0xff;
 
 	while ((*TagPtr != VpdEndOfAreaTag) && (DataLen > 0)) {
 		int AreaLen = *(TagPtr + 1) + (*(TagPtr + 2) * 256);
@@ -205,15 +205,16 @@
 	}
 }
 
-static void __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent,
+static int __init iSeries_Get_Location_Code(u16 bus, HvAgentId agent,
 		u8 *frame, char card[4])
 {
+	int status = 0;
 	int BusVpdLen = 0;
 	u8 *BusVpdPtr = kmalloc(BUS_VPDSIZE, GFP_KERNEL);
 
 	if (BusVpdPtr == NULL) {
 		printk("PCI: Bus VPD Buffer allocation failure.\n");
-		return;
+		return 0;
 	}
 	BusVpdLen = HvCallPci_getBusVpd(bus, iseries_hv_addr(BusVpdPtr),
 					BUS_VPDSIZE);
@@ -228,8 +229,10 @@
 		goto out_free;
 	}
 	iSeries_Parse_Vpd(BusVpdPtr, BusVpdLen, agent, frame, card);
+	status = 1;
 out_free:
 	kfree(BusVpdPtr);
+	return status;
 }
 
 /*
@@ -246,7 +249,7 @@
 	struct device_node *DevNode = PciDev->sysdata;
 	struct pci_dn *pdn;
 	u16 bus;
-	u8 frame;
+	u8 frame = 0;
 	char card[4];
 	HvSubBusNumber subbus;
 	HvAgentId agent;
@@ -262,10 +265,11 @@
 	subbus = pdn->bussubno;
 	agent = ISERIES_PCI_AGENTID(ISERIES_GET_DEVICE_FROM_SUBBUS(subbus),
 			ISERIES_GET_FUNCTION_FROM_SUBBUS(subbus));
-	iSeries_Get_Location_Code(bus, agent, &frame, card);
 
-	printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s  ",
-			count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor,
-			frame, card);
-	printk("0x%04X\n", (int)(PciDev->class >> 8));
+	if (iSeries_Get_Location_Code(bus, agent, &frame, card)) {
+		printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, "
+			"Card %4s  0x%04X\n", count, bus,
+			PCI_SLOT(PciDev->devfn), PciDev->vendor, frame,
+			card, (int)(PciDev->class >> 8));
+	}
 }
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 63a1670..c3aa46b 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -38,16 +38,16 @@
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
 	for (; node != 0;node = node->sibling) {
-		int * bus_range;
-		unsigned int *class_code;
+		const int *bus_range;
+		const unsigned int *class_code;
 		int len;
 
 		/* For PCI<->PCI bridges or CardBus bridges, we go down */
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
 			continue;
-		bus_range = (int *) get_property(node, "bus-range", &len);
+		bus_range = get_property(node, "bus-range", &len);
 		if (bus_range != NULL && len > 2 * sizeof(int)) {
 			if (bus_range[1] > higher)
 				higher = bus_range[1];
@@ -65,30 +65,36 @@
  */
 static void __init fixup_bus_range(struct device_node *bridge)
 {
-	int * bus_range;
+	int *bus_range;
+	struct property *prop;
 	int len;
 
 	/* Lookup the "bus-range" property for the hose */
-	bus_range = (int *) get_property(bridge, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int)) {
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL  || prop->value == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s\n",
 			       bridge->full_name);
 		return;
 	}
+	bus_range = (int *)prop->value;
 	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
 }
 
 
-#define U3_AGP_CFA0(devfn, off)	\
-	((1 << (unsigned long)PCI_SLOT(dev_fn)) \
-	| (((unsigned long)PCI_FUNC(dev_fn)) << 8) \
-	| (((unsigned long)(off)) & 0xFCUL))
+static unsigned long u3_agp_cfa0(u8 devfn, u8 off)
+{
+	return (1 << (unsigned long)PCI_SLOT(devfn)) |
+		((unsigned long)PCI_FUNC(devfn) << 8) |
+		((unsigned long)off & 0xFCUL);
+}
 
-#define U3_AGP_CFA1(bus, devfn, off)	\
-	((((unsigned long)(bus)) << 16) \
-	|(((unsigned long)(devfn)) << 8) \
-	|(((unsigned long)(off)) & 0xFCUL) \
-	|1UL)
+static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return ((unsigned long)bus << 16) |
+		((unsigned long)devfn << 8) |
+		((unsigned long)off & 0xFCUL) |
+		1UL;
+}
 
 static unsigned long u3_agp_cfg_access(struct pci_controller* hose,
 				       u8 bus, u8 dev_fn, u8 offset)
@@ -98,9 +104,9 @@
 	if (bus == hose->first_busno) {
 		if (dev_fn < (11 << 3))
 			return 0;
-		caddr = U3_AGP_CFA0(dev_fn, offset);
+		caddr = u3_agp_cfa0(dev_fn, offset);
 	} else
-		caddr = U3_AGP_CFA1(bus, dev_fn, offset);
+		caddr = u3_agp_cfa1(bus, dev_fn, offset);
 
 	/* Uninorth will return garbage if we don't read back the value ! */
 	do {
@@ -182,13 +188,15 @@
 	u3_agp_write_config
 };
 
+static unsigned long u3_ht_cfa0(u8 devfn, u8 off)
+{
+	return (devfn << 8) | off;
+}
 
-#define U3_HT_CFA0(devfn, off)		\
-		((((unsigned long)devfn) << 8) | offset)
-#define U3_HT_CFA1(bus, devfn, off)	\
-		(U3_HT_CFA0(devfn, off) \
-		+ (((unsigned long)bus) << 16) \
-		+ 0x01000000UL)
+static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off)
+{
+	return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL;
+}
 
 static unsigned long u3_ht_cfg_access(struct pci_controller* hose,
 				      u8 bus, u8 devfn, u8 offset)
@@ -196,9 +204,9 @@
 	if (bus == hose->first_busno) {
 		if (PCI_SLOT(devfn) == 0)
 			return 0;
-		return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset);
+		return ((unsigned long)hose->cfg_data) + u3_ht_cfa0(devfn, offset);
 	} else
-		return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset);
+		return ((unsigned long)hose->cfg_data) + u3_ht_cfa1(bus, devfn, offset);
 }
 
 static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
@@ -211,6 +219,9 @@
 	if (hose == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
 	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
 	if (!addr)
 		return PCIBIOS_DEVICE_NOT_FOUND;
@@ -243,6 +254,9 @@
 	if (hose == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
+	if (offset > 0xff)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
 	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
 	if (!addr)
 		return PCIBIOS_DEVICE_NOT_FOUND;
@@ -314,12 +328,12 @@
 	int len;
 	struct pci_controller *hose;
 	char* disp_name;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1;
 
 	DBG("Adding PCI host bridge %s\n", dev->full_name);
 
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
 		dev->full_name);
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 57567df..fe6b9bf 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -99,8 +99,7 @@
 static void maple_restart(char *cmd)
 {
 	unsigned int maple_nvram_base;
-	unsigned int maple_nvram_offset;
-	unsigned int maple_nvram_command;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
 	struct device_node *sp;
 
 	maple_nvram_base = maple_find_nvram_base();
@@ -113,14 +112,12 @@
 		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
 		goto fail;
 	}
-	maple_nvram_offset = *(unsigned int*) get_property(sp,
-			"restart-addr", NULL);
-	maple_nvram_command = *(unsigned int*) get_property(sp,
-			"restart-value", NULL);
+	maple_nvram_offset = get_property(sp, "restart-addr", NULL);
+	maple_nvram_command = get_property(sp, "restart-value", NULL);
 	of_node_put(sp);
 
 	/* send command */
-	outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset);
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
 	for (;;) ;
  fail:
 	printk(KERN_EMERG "Maple: Manual Restart Required\n");
@@ -129,8 +126,7 @@
 static void maple_power_off(void)
 {
 	unsigned int maple_nvram_base;
-	unsigned int maple_nvram_offset;
-	unsigned int maple_nvram_command;
+	const unsigned int *maple_nvram_offset, *maple_nvram_command;
 	struct device_node *sp;
 
 	maple_nvram_base = maple_find_nvram_base();
@@ -143,14 +139,12 @@
 		printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
 		goto fail;
 	}
-	maple_nvram_offset = *(unsigned int*) get_property(sp,
-			"power-off-addr", NULL);
-	maple_nvram_command = *(unsigned int*) get_property(sp,
-			"power-off-value", NULL);
+	maple_nvram_offset = get_property(sp, "power-off-addr", NULL);
+	maple_nvram_command = get_property(sp, "power-off-value", NULL);
 	of_node_put(sp);
 
 	/* send command */
-	outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset);
+	outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
 	for (;;) ;
  fail:
 	printk(KERN_EMERG "Maple: Manual Power-Down Required\n");
@@ -211,7 +205,7 @@
 static void __init maple_init_IRQ(void)
 {
 	struct device_node *root, *np, *mpic_node = NULL;
-	unsigned int *opprop;
+	const unsigned int *opprop;
 	unsigned long openpic_addr = 0;
 	int naddr, n, i, opplen, has_isus = 0;
 	struct mpic *mpic;
@@ -241,8 +235,7 @@
 	/* Find address list in /platform-open-pic */
 	root = of_find_node_by_path("/");
 	naddr = prom_n_addr_cells(root);
-	opprop = (unsigned int *) get_property(root, "platform-open-pic",
-					       &opplen);
+	opprop = get_property(root, "platform-open-pic", &opplen);
 	if (opprop != 0) {
 		openpic_addr = of_read_number(opprop, naddr);
 		has_isus = (opplen > naddr);
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
new file mode 100644
index 0000000..1be1a99
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -0,0 +1 @@
+obj-y	+= setup.o pci.o time.o
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
new file mode 100644
index 0000000..fd71d72
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -0,0 +1,8 @@
+#ifndef _PASEMI_PASEMI_H
+#define _PASEMI_PASEMI_H
+
+extern unsigned long pas_get_boot_time(void);
+extern void pas_pci_init(void);
+extern void pas_pcibios_fixup(void);
+
+#endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
new file mode 100644
index 0000000..4679c52
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/pci.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+
+#include <asm/ppc-pci.h>
+
+#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
+
+#define CONFIG_OFFSET_VALID(off) ((off) < 4096)
+
+static unsigned long pa_pxp_cfg_addr(struct pci_controller *hose,
+				       u8 bus, u8 devfn, int offset)
+{
+	return ((unsigned long)hose->cfg_data) + PA_PXP_CFA(bus, devfn, offset);
+}
+
+static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose;
+	unsigned long addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!CONFIG_OFFSET_VALID(offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8((u8 *)addr);
+		break;
+	case 2:
+		*val = in_le16((u16 *)addr);
+		break;
+	default:
+		*val = in_le32((u32 *)addr);
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int offset, int len, u32 val)
+{
+	struct pci_controller *hose;
+	unsigned long addr;
+
+	hose = pci_bus_to_host(bus);
+	if (!hose)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (!CONFIG_OFFSET_VALID(offset))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8((u8 *)addr, val);
+		(void) in_8((u8 *)addr);
+		break;
+	case 2:
+		out_le16((u16 *)addr, val);
+		(void) in_le16((u16 *)addr);
+		break;
+	default:
+		out_le32((u32 *)addr, val);
+		(void) in_le32((u32 *)addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pa_pxp_ops = {
+	pa_pxp_read_config,
+	pa_pxp_write_config,
+};
+
+static void __init setup_pa_pxp(struct pci_controller *hose)
+{
+	hose->ops = &pa_pxp_ops;
+	hose->cfg_data = ioremap(0xe0000000, 0x10000000);
+}
+
+static int __init add_bridge(struct device_node *dev)
+{
+	struct pci_controller *hose;
+
+	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+
+	hose = pcibios_alloc_controller(dev);
+	if (!hose)
+		return -ENOMEM;
+
+	hose->first_busno = 0;
+	hose->last_busno = 0xff;
+
+	setup_pa_pxp(hose);
+
+	printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
+
+	/* Interpret the "ranges" property */
+	/* This also maps the I/O region and sets isa_io/mem_base */
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+	pci_setup_phb_io(hose, 1);
+
+	return 0;
+}
+
+
+void __init pas_pcibios_fixup(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
+
+static void __init pas_fixup_phb_resources(void)
+{
+	struct pci_controller *hose, *tmp;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+		hose->io_resource.start += offset;
+		hose->io_resource.end += offset;
+		printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+		       hose->global_number,
+		       hose->io_resource.start, hose->io_resource.end);
+	}
+}
+
+
+void __init pas_pci_init(void)
+{
+	struct device_node *np, *root;
+
+	root = of_find_node_by_path("/");
+	if (!root) {
+		printk(KERN_CRIT "pas_pci_init: can't find root "
+			"of device tree\n");
+		return;
+	}
+
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;)
+		if (np->name && !strcmp(np->name, "pxp") && !add_bridge(np))
+			of_node_get(np);
+
+	of_node_put(root);
+
+	pas_fixup_phb_resources();
+
+	/* Setup the linkage between OF nodes and PHBs */
+	pci_devs_phb_init();
+
+	/* Use the common resource allocation mechanism */
+	pci_probe_only = 1;
+}
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
new file mode 100644
index 0000000..6284826
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ *	    Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/setup.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+
+#include <asm/prom.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+
+#include "pasemi.h"
+
+static void pas_restart(char *cmd)
+{
+	printk("restart unimplemented, looping...\n");
+	for (;;) ;
+}
+
+static void pas_power_off(void)
+{
+	printk("power off unimplemented, looping...\n");
+	for (;;) ;
+}
+
+static void pas_halt(void)
+{
+	pas_power_off();
+}
+
+#ifdef CONFIG_SMP
+struct smp_ops_t pas_smp_ops = {
+	.probe		= smp_mpic_probe,
+	.message_pass	= smp_mpic_message_pass,
+	.kick_cpu	= smp_generic_kick_cpu,
+	.setup_cpu	= smp_mpic_setup_cpu,
+	.give_timebase	= smp_generic_give_timebase,
+	.take_timebase	= smp_generic_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+void __init pas_setup_arch(void)
+{
+#ifdef CONFIG_SMP
+	/* Setup SMP callback */
+	smp_ops = &pas_smp_ops;
+#endif
+	/* Lookup PCI hosts */
+	pas_pci_init();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	printk(KERN_DEBUG "Using default idle loop\n");
+}
+
+static void iommu_dev_setup_null(struct pci_dev *dev) { }
+static void iommu_bus_setup_null(struct pci_bus *bus) { }
+
+static void __init pas_init_early(void)
+{
+	/* No iommu code yet */
+	ppc_md.iommu_dev_setup = iommu_dev_setup_null;
+	ppc_md.iommu_bus_setup = iommu_bus_setup_null;
+	pci_direct_iommu_init();
+}
+
+/* No legacy IO on our parts */
+static int pas_check_legacy_ioport(unsigned int baseport)
+{
+	return -ENODEV;
+}
+
+static __init void pas_init_IRQ(void)
+{
+	struct device_node *np;
+	struct device_node *root, *mpic_node;
+	unsigned long openpic_addr;
+	const unsigned int *opprop;
+	int naddr, opplen;
+	struct mpic *mpic;
+
+	mpic_node = NULL;
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "open-pic")) {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node)
+		for_each_node_by_type(np, "open-pic") {
+			mpic_node = np;
+			break;
+		}
+	if (!mpic_node) {
+		printk(KERN_ERR
+			"Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
+
+	/* Find address list in /platform-open-pic */
+	root = of_find_node_by_path("/");
+	naddr = prom_n_addr_cells(root);
+	opprop = get_property(root, "platform-open-pic", &opplen);
+	if (!opprop) {
+		printk(KERN_ERR "No platform-open-pic property.\n");
+		of_node_put(root);
+		return;
+	}
+	openpic_addr = of_read_number(opprop, naddr);
+	printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
+	of_node_put(root);
+
+	mpic = mpic_alloc(mpic_node, openpic_addr, MPIC_PRIMARY, 0, 0,
+			  " PAS-OPIC  ");
+	BUG_ON(!mpic);
+
+	mpic_assign_isu(mpic, 0, openpic_addr + 0x10000);
+	mpic_init(mpic);
+	of_node_put(mpic_node);
+	of_node_put(root);
+}
+
+static void __init pas_progress(char *s, unsigned short hex)
+{
+	printk("[%04x] : %s\n", hex, s ? s : "");
+}
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pas_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "PA6T-1682M"))
+		return 0;
+
+	hpte_init_native();
+
+	return 1;
+}
+
+define_machine(pas) {
+	.name			= "PA Semi PA6T-1682M",
+	.probe			= pas_probe,
+	.setup_arch		= pas_setup_arch,
+	.init_early		= pas_init_early,
+	.init_IRQ		= pas_init_IRQ,
+	.get_irq		= mpic_get_irq,
+	.pcibios_fixup		= pas_pcibios_fixup,
+	.restart		= pas_restart,
+	.power_off		= pas_power_off,
+	.halt			= pas_halt,
+	.get_boot_time		= pas_get_boot_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.check_legacy_ioport    = pas_check_legacy_ioport,
+	.progress		= pas_progress,
+};
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
new file mode 100644
index 0000000..9bd410b
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/time.h>
+
+#include <asm/time.h>
+
+unsigned long __init pas_get_boot_time(void)
+{
+	/* Let's just return a fake date right now */
+	return mktime(2006, 1, 1, 12, 0, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
index d664154..afa593a 100644
--- a/arch/powerpc/platforms/powermac/backlight.c
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -60,7 +60,8 @@
 	struct device_node* bk_node = find_devices("backlight");
 
 	if (bk_node) {
-		char *prop = get_property(bk_node, "backlight-control", NULL);
+		const char *prop = get_property(bk_node,
+				"backlight-control", NULL);
 		if (prop && strncmp(prop, type, strlen(type)) == 0)
 			return 1;
 	}
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index 62926248..c2b6b41 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -421,7 +421,7 @@
 
 static u32 read_gpio(struct device_node *np)
 {
-	u32 *reg = (u32 *)get_property(np, "reg", NULL);
+	const u32 *reg = get_property(np, "reg", NULL);
 	u32 offset;
 
 	if (reg == NULL)
@@ -497,7 +497,7 @@
 								"frequency-gpio");
 	struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL,
 								     "slewing-done");
-	u32 *value;
+	const u32 *value;
 
 	/*
 	 * Check to see if it's GPIO driven or PMU only
@@ -519,15 +519,15 @@
 	 */
 	if (frequency_gpio && slew_done_gpio) {
 		int lenp, rc;
-		u32 *freqs, *ratio;
+		const u32 *freqs, *ratio;
 
-		freqs = (u32 *)get_property(cpunode, "bus-frequencies", &lenp);
+		freqs = get_property(cpunode, "bus-frequencies", &lenp);
 		lenp /= sizeof(u32);
 		if (freqs == NULL || lenp != 2) {
 			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
 			return 1;
 		}
-		ratio = (u32 *)get_property(cpunode, "processor-to-bus-ratio*2", NULL);
+		ratio = get_property(cpunode, "processor-to-bus-ratio*2", NULL);
 		if (ratio == NULL) {
 			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
 			return 1;
@@ -562,7 +562,7 @@
 	/* If we use the PMU, look for the min & max frequencies in the
 	 * device-tree
 	 */
-	value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL);
+	value = get_property(cpunode, "min-clock-frequency", NULL);
 	if (!value)
 		return 1;
 	low_freq = (*value) / 1000;
@@ -571,7 +571,7 @@
 	if (low_freq < 100000)
 		low_freq *= 10;
 
-	value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL);
+	value = get_property(cpunode, "max-clock-frequency", NULL);
 	if (!value)
 		return 1;
 	hi_freq = (*value) / 1000;
@@ -611,13 +611,14 @@
 static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
 {
 	struct device_node *volt_gpio_np;
-	u32 pvr, *value;
+	u32 pvr;
+	const u32 *value;
 
 	if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
 		return 1;
 
 	hi_freq = cur_freq;
-	value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL);
+	value = get_property(cpunode, "reduced-clock-frequency", NULL);
 	if (!value)
 		return 1;
 	low_freq = (*value) / 1000;
@@ -650,7 +651,7 @@
 static int __init pmac_cpufreq_setup(void)
 {
 	struct device_node	*cpunode;
-	u32			*value;
+	const u32		*value;
 
 	if (strstr(cmd_line, "nocpufreq"))
 		return 0;
@@ -661,7 +662,7 @@
 		goto out;
 
 	/* Get current cpu clock freq */
-	value = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	value = get_property(cpunode, "clock-frequency", NULL);
 	if (!value)
 		goto out;
 	cur_freq = (*value) / 1000;
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index 7b1156e..d30466d 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -89,7 +89,7 @@
 
 #ifdef CONFIG_PMAC_SMU
 
-static u32 *g5_pmode_data;
+static const u32 *g5_pmode_data;
 static int g5_pmode_max;
 
 static struct smu_sdbp_fvt *g5_fvt_table;	/* table of op. points */
@@ -391,7 +391,8 @@
 	unsigned int psize, ssize;
 	unsigned long max_freq;
 	char *freq_method, *volt_method;
-	u32 *valp, pvr_hi;
+	const u32 *valp;
+	u32 pvr_hi;
 	int use_volts_vdnap = 0;
 	int use_volts_smu = 0;
 	int rc = -ENODEV;
@@ -409,8 +410,7 @@
 	/* Get first CPU node */
 	for (cpunode = NULL;
 	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
-		u32 *reg =
-			(u32 *)get_property(cpunode, "reg", NULL);
+		const u32 *reg = get_property(cpunode, "reg", NULL);
 		if (reg == NULL || (*reg) != 0)
 			continue;
 		if (!strcmp(cpunode->type, "cpu"))
@@ -422,7 +422,7 @@
 	}
 
 	/* Check 970FX for now */
-	valp = (u32 *)get_property(cpunode, "cpu-version", NULL);
+	valp = get_property(cpunode, "cpu-version", NULL);
 	if (!valp) {
 		DBG("No cpu-version property !\n");
 		goto bail_noprops;
@@ -434,7 +434,7 @@
 	}
 
 	/* Look for the powertune data in the device-tree */
-	g5_pmode_data = (u32 *)get_property(cpunode, "power-mode-data",&psize);
+	g5_pmode_data = get_property(cpunode, "power-mode-data",&psize);
 	if (!g5_pmode_data) {
 		DBG("No power-mode-data !\n");
 		goto bail_noprops;
@@ -442,7 +442,7 @@
 	g5_pmode_max = psize / sizeof(u32) - 1;
 
 	if (use_volts_smu) {
-		struct smu_sdbp_header *shdr;
+		const struct smu_sdbp_header *shdr;
 
 		/* Look for the FVT table */
 		shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
@@ -493,7 +493,7 @@
 	 * half freq in this version. So far, I haven't yet seen a machine
 	 * supporting anything else.
 	 */
-	valp = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	valp = get_property(cpunode, "clock-frequency", NULL);
 	if (!valp)
 		return -ENODEV;
 	max_freq = (*valp)/1000;
@@ -541,8 +541,8 @@
 static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
 {
 	struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL;
-	u8 *eeprom = NULL;
-	u32 *valp;
+	const u8 *eeprom = NULL;
+	const u32 *valp;
 	u64 max_freq, min_freq, ih, il;
 	int has_volt = 1, rc = 0;
 
@@ -563,7 +563,7 @@
 	/* Lookup the cpuid eeprom node */
         cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0");
 	if (cpuid != NULL)
-		eeprom = (u8 *)get_property(cpuid, "cpuid", NULL);
+		eeprom = get_property(cpuid, "cpuid", NULL);
 	if (eeprom == NULL) {
 		printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
 		rc = -ENODEV;
@@ -573,7 +573,8 @@
 	/* Lookup the i2c hwclock */
 	for (hwclock = NULL;
 	     (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
-		char *loc = get_property(hwclock, "hwctrl-location", NULL);
+		const char *loc = get_property(hwclock,
+				"hwctrl-location", NULL);
 		if (loc == NULL)
 			continue;
 		if (strcmp(loc, "CPU CLOCK"))
@@ -637,7 +638,7 @@
 	 */
 
 	/* Get max frequency from device-tree */
-	valp = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	valp = get_property(cpunode, "clock-frequency", NULL);
 	if (!valp) {
 		printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
 		rc = -ENODEV;
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index f8313bf..e49621b 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1058,8 +1058,8 @@
 	if (np == NULL)
 		return -ENODEV;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *num = (u32 *)get_property(np, "reg", NULL);
-		u32 *rst = (u32 *)get_property(np, "soft-reset", NULL);
+		const u32 *num = get_property(np, "reg", NULL);
+		const u32 *rst = get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
 			continue;
 		if (param == *num) {
@@ -1087,7 +1087,7 @@
 {
 	struct macio_chip *macio;
 	unsigned long flags;
-	char *prop;
+	const char *prop;
 	int number;
 	u32 reg;
 
@@ -1096,7 +1096,7 @@
 	    macio->type != macio_intrepid)
 		return -ENODEV;
 
-	prop = (char *)get_property(node, "AAPL,clock-id", NULL);
+	prop = get_property(node, "AAPL,clock-id", NULL);
 	if (!prop)
 		return -ENODEV;
 	if (strncmp(prop, "usb0u048", 8) == 0)
@@ -1507,8 +1507,8 @@
 	if (np == NULL)
 		return -ENODEV;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *num = (u32 *)get_property(np, "reg", NULL);
-		u32 *rst = (u32 *)get_property(np, "soft-reset", NULL);
+		const u32 *num = get_property(np, "reg", NULL);
+		const u32 *rst = get_property(np, "soft-reset", NULL);
 		if (num == NULL || rst == NULL)
 			continue;
 		if (param == *num) {
@@ -2408,7 +2408,7 @@
 	 */
 	dt = find_devices("device-tree");
 	if (dt != NULL)
-		model = (const char *) get_property(dt, "model", NULL);
+		model = get_property(dt, "model", NULL);
 	for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
 	    if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
 		pmac_mb = pmac_mb_defs[i];
@@ -2536,7 +2536,7 @@
  */
 static void __init probe_uninorth(void)
 {
-	u32 *addrp;
+	const u32 *addrp;
 	phys_addr_t address;
 	unsigned long actrl;
 
@@ -2555,7 +2555,7 @@
 	if (uninorth_node == NULL)
 		return;
 
-	addrp = (u32 *)get_property(uninorth_node, "reg", NULL);
+	addrp = get_property(uninorth_node, "reg", NULL);
 	if (addrp == NULL)
 		return;
 	address = of_translate_address(uninorth_node, addrp);
@@ -2596,7 +2596,7 @@
 	struct device_node*	node;
 	int			i;
 	volatile u32 __iomem	*base;
-	u32			*addrp, *revp;
+	const u32		*addrp, *revp;
 	phys_addr_t		addr;
 	u64			size;
 
@@ -2639,7 +2639,7 @@
 		return;
 	}
 	if (type == macio_keylargo || type == macio_keylargo2) {
-		u32 *did = (u32 *)get_property(node, "device-id", NULL);
+		const u32 *did = get_property(node, "device-id", NULL);
 		if (*did == 0x00000025)
 			type = macio_pangea;
 		if (*did == 0x0000003e)
@@ -2652,7 +2652,7 @@
 	macio_chips[i].base	= base;
 	macio_chips[i].flags	= MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON;
 	macio_chips[i].name	= macio_names[type];
-	revp = (u32 *)get_property(node, "revision-id", NULL);
+	revp = get_property(node, "revision-id", NULL);
 	if (revp)
 		macio_chips[i].rev = *revp;
 	printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
@@ -2695,15 +2695,15 @@
 initial_serial_shutdown(struct device_node *np)
 {
 	int len;
-	struct slot_names_prop {
+	const struct slot_names_prop {
 		int	count;
 		char	name[1];
 	} *slots;
-	char *conn;
+	const char *conn;
 	int port_type = PMAC_SCC_ASYNC;
 	int modem = 0;
 
-	slots = (struct slot_names_prop *)get_property(np, "slot-names", &len);
+	slots = get_property(np, "slot-names", &len);
 	conn = get_property(np, "AAPL,connector", &len);
 	if (conn && (strcmp(conn, "infrared") == 0))
 		port_type = PMAC_SCC_IRDA;
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 8677f50..c2c7cf7 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -477,7 +477,8 @@
 static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 {
 	struct pmac_i2c_host_kw *host;
-	u32			*psteps, *prate, *addrp, steps;
+	const u32		*psteps, *prate, *addrp;
+	u32			steps;
 
 	host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
 	if (host == NULL) {
@@ -490,7 +491,7 @@
 	 * on all i2c keywest nodes so far ... we would have to fallback
 	 * to macio parsing if that wasn't the case
 	 */
-	addrp = (u32 *)get_property(np, "AAPL,address", NULL);
+	addrp = get_property(np, "AAPL,address", NULL);
 	if (addrp == NULL) {
 		printk(KERN_ERR "low_i2c: Can't find address for %s\n",
 		       np->full_name);
@@ -504,13 +505,13 @@
 	host->timeout_timer.function = kw_i2c_timeout;
 	host->timeout_timer.data = (unsigned long)host;
 
-	psteps = (u32 *)get_property(np, "AAPL,address-step", NULL);
+	psteps = get_property(np, "AAPL,address-step", NULL);
 	steps = psteps ? (*psteps) : 0x10;
 	for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
 		steps >>= 1;
 	/* Select interface rate */
 	host->speed = KW_I2C_MODE_25KHZ;
-	prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL);
+	prate = get_property(np, "AAPL,i2c-rate", NULL);
 	if (prate) switch(*prate) {
 	case 100:
 		host->speed = KW_I2C_MODE_100KHZ;
@@ -618,8 +619,8 @@
 		} else {
 			for (child = NULL;
 			     (child = of_get_next_child(np, child)) != NULL;) {
-				u32 *reg =
-					(u32 *)get_property(child, "reg", NULL);
+				const u32 *reg = get_property(child,
+						"reg", NULL);
 				if (reg == NULL)
 					continue;
 				kw_i2c_add(host, np, child, *reg);
@@ -881,7 +882,7 @@
 {
 	struct device_node *controller, *busnode;
 	struct pmac_i2c_bus *bus;
-	u32 *reg;
+	const u32 *reg;
 	int sz;
 
 	if (!smu_present())
@@ -904,7 +905,7 @@
 		if (strcmp(busnode->type, "i2c") &&
 		    strcmp(busnode->type, "i2c-bus"))
 			continue;
-		reg = (u32 *)get_property(busnode, "reg", NULL);
+		reg = get_property(busnode, "reg", NULL);
 		if (reg == NULL)
 			continue;
 
@@ -948,9 +949,8 @@
 		list_for_each_entry(bus, &pmac_i2c_busses, link) {
 			if (p == bus->busnode) {
 				if (prev && bus->flags & pmac_i2c_multibus) {
-					u32 *reg;
-					reg = (u32 *)get_property(prev, "reg",
-								  NULL);
+					const u32 *reg;
+					reg = get_property(prev, "reg", NULL);
 					if (!reg)
 						continue;
 					if (((*reg) >> 8) != bus->channel)
@@ -971,7 +971,7 @@
 
 u8 pmac_i2c_get_dev_addr(struct device_node *device)
 {
-	u32 *reg = (u32 *)get_property(device, "reg", NULL);
+	const u32 *reg = get_property(device, "reg", NULL);
 
 	if (reg == NULL)
 		return 0;
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 205d044..9923adc 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -66,16 +66,16 @@
 static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
 {
 	for (; node != 0;node = node->sibling) {
-		int * bus_range;
-		unsigned int *class_code;
+		const int * bus_range;
+		const unsigned int *class_code;
 		int len;
 
 		/* For PCI<->PCI bridges or CardBus bridges, we go down */
-		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		class_code = get_property(node, "class-code", NULL);
 		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
 			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
 			continue;
-		bus_range = (int *) get_property(node, "bus-range", &len);
+		bus_range = get_property(node, "bus-range", &len);
 		if (bus_range != NULL && len > 2 * sizeof(int)) {
 			if (bus_range[1] > higher)
 				higher = bus_range[1];
@@ -93,13 +93,15 @@
  */
 static void __init fixup_bus_range(struct device_node *bridge)
 {
-	int * bus_range;
-	int len;
+	int *bus_range, len;
+	struct property *prop;
 
 	/* Lookup the "bus-range" property for the hose */
-	bus_range = (int *) get_property(bridge, "bus-range", &len);
-	if (bus_range == NULL || len < 2 * sizeof(int))
+	prop = of_find_property(bridge, "bus-range", &len);
+	if (prop == NULL || prop->length < 2 * sizeof(int))
 		return;
+
+	bus_range = (int *)prop->value;
 	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
 }
 
@@ -237,7 +239,7 @@
 static int chaos_validate_dev(struct pci_bus *bus, int devfn, int offset)
 {
 	struct device_node *np;
-	u32 *vendor, *device;
+	const u32 *vendor, *device;
 
 	if (offset >= 0x100)
 		return  PCIBIOS_BAD_REGISTER_NUMBER;
@@ -245,8 +247,8 @@
 	if (np == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
-	vendor = (u32 *)get_property(np, "vendor-id", NULL);
-	device = (u32 *)get_property(np, "device-id", NULL);
+	vendor = get_property(np, "vendor-id", NULL);
+	device = get_property(np, "device-id", NULL);
 	if (vendor == NULL || device == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -686,20 +688,21 @@
 
 	for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) {
 		struct pci_controller *hose;
-		u32 data, *prop;
+		u32 data;
+		const u32 *prop;
 		u8 bus, devfn;
 
-		prop = (u32 *)get_property(nec, "vendor-id", NULL);
+		prop = get_property(nec, "vendor-id", NULL);
 		if (prop == NULL)
 			continue;
 		if (0x1033 != *prop)
 			continue;
-		prop = (u32 *)get_property(nec, "device-id", NULL);
+		prop = get_property(nec, "device-id", NULL);
 		if (prop == NULL)
 			continue;
 		if (0x0035 != *prop)
 			continue;
-		prop = (u32 *)get_property(nec, "reg", NULL);
+		prop = get_property(nec, "reg", NULL);
 		if (prop == NULL)
 			continue;
 		devfn = (prop[0] >> 8) & 0xff;
@@ -898,7 +901,7 @@
 	struct pci_controller *hose;
 	struct resource rsrc;
 	char *disp_name;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 1, has_address = 0;
 
 	DBG("Adding PCI host bridge %s\n", dev->full_name);
@@ -907,7 +910,7 @@
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *) get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index aacfa59..ee3b223 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -114,7 +114,7 @@
 	 * we just create them all
 	 */
 	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) {
-		u32 *reg = (u32 *)get_property(gp, "reg", NULL);
+		const u32 *reg = get_property(gp, "reg", NULL);
 		unsigned long offset;
 		if (reg == NULL)
 			continue;
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index b117adb..7651f278 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -813,14 +813,15 @@
 	struct pmf_device *dev;
 	struct pmf_function *func, *result = NULL;
 	char fname[64];
-	u32 *prop, ph;
+	const u32 *prop;
+	u32 ph;
 
 	/*
 	 * Look for a "platform-*" function reference. If we can't find
 	 * one, then we fallback to a direct call attempt
 	 */
 	snprintf(fname, 63, "platform-%s", name);
-	prop = (u32 *)get_property(target, fname, NULL);
+	prop = get_property(target, fname, NULL);
 	if (prop == NULL)
 		goto find_it;
 	ph = *prop;
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 31a9da7..824a618 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -116,7 +116,7 @@
 static void pmac_show_cpuinfo(struct seq_file *m)
 {
 	struct device_node *np;
-	char *pp;
+	const char *pp;
 	int plen;
 	int mbmodel;
 	unsigned int mbflags;
@@ -134,12 +134,12 @@
 	seq_printf(m, "machine\t\t: ");
 	np = of_find_node_by_path("/");
 	if (np != NULL) {
-		pp = (char *) get_property(np, "model", NULL);
+		pp = get_property(np, "model", NULL);
 		if (pp != NULL)
 			seq_printf(m, "%s\n", pp);
 		else
 			seq_printf(m, "PowerMac\n");
-		pp = (char *) get_property(np, "compatible", &plen);
+		pp = get_property(np, "compatible", &plen);
 		if (pp != NULL) {
 			seq_printf(m, "motherboard\t:");
 			while (plen > 0) {
@@ -163,10 +163,8 @@
 	if (np == NULL)
 		np = of_find_node_by_type(NULL, "cache");
 	if (np != NULL) {
-		unsigned int *ic = (unsigned int *)
-			get_property(np, "i-cache-size", NULL);
-		unsigned int *dc = (unsigned int *)
-			get_property(np, "d-cache-size", NULL);
+		const unsigned int *ic = get_property(np, "i-cache-size", NULL);
+		const unsigned int *dc = get_property(np, "d-cache-size", NULL);
 		seq_printf(m, "L2 cache\t:");
 		has_l2cache = 1;
 		if (get_property(np, "cache-unified", NULL) != 0 && dc) {
@@ -254,7 +252,7 @@
 		if (np == 0)
 			np = find_type_devices("cpu");
 		if (np != 0) {
-			unsigned int *l2cr = (unsigned int *)
+			const unsigned int *l2cr =
 				get_property(np, "l2cr-value", NULL);
 			if (l2cr != 0) {
 				ppc_override_l2cr = 1;
@@ -277,7 +275,7 @@
 static void __init pmac_setup_arch(void)
 {
 	struct device_node *cpu, *ic;
-	int *fp;
+	const int *fp;
 	unsigned long pvr;
 
 	pvr = PVR_VER(mfspr(SPRN_PVR));
@@ -287,7 +285,7 @@
 	loops_per_jiffy = 50000000 / HZ;
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (cpu != NULL) {
-		fp = (int *) get_property(cpu, "clock-frequency", NULL);
+		fp = get_property(cpu, "clock-frequency", NULL);
 		if (fp != NULL) {
 			if (pvr >= 0x30 && pvr < 0x80)
 				/* PPC970 etc. */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 827b712..1949b65 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -548,7 +548,7 @@
 	struct device_node *cc = NULL;	
 	struct device_node *p;
 	const char *name = NULL;
-	u32 *reg;
+	const u32 *reg;
 	int ok;
 
 	/* Look for the clock chip */
@@ -562,7 +562,7 @@
 		pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc);
 		if (pmac_tb_clock_chip_host == NULL)
 			continue;
-		reg = (u32 *)get_property(cc, "reg", NULL);
+		reg = get_property(cc, "reg", NULL);
 		if (reg == NULL)
 			continue;
 		switch (*reg) {
@@ -702,13 +702,12 @@
 	/* GPIO based HW sync on ppc32 Core99 */
 	if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) {
 		struct device_node *cpu;
-		u32 *tbprop = NULL;
+		const u32 *tbprop = NULL;
 
 		core99_tb_gpio = KL_GPIO_TB_ENABLE;	/* default value */
 		cpu = of_find_node_by_type(NULL, "cpu");
 		if (cpu != NULL) {
-			tbprop = (u32 *)get_property(cpu, "timebase-enable",
-						     NULL);
+			tbprop = get_property(cpu, "timebase-enable", NULL);
 			if (tbprop)
 				core99_tb_gpio = *tbprop;
 			of_node_put(cpu);
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 37e5b1e..ce1a235 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -68,11 +68,11 @@
 
 void udbg_scc_init(int force_scc)
 {
-	u32 *reg;
+	const u32 *reg;
 	unsigned long addr;
 	struct device_node *stdout = NULL, *escc = NULL, *macio = NULL;
 	struct device_node *ch, *ch_def = NULL, *ch_a = NULL;
-	char *path;
+	const char *path;
 	int i, x;
 
 	escc = of_find_node_by_name(NULL, "escc");
@@ -81,7 +81,7 @@
 	macio = of_get_parent(escc);
 	if (macio == NULL)
 		goto bail;
-	path = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	path = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (path != NULL)
 		stdout = of_find_node_by_path(path);
 	for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) {
@@ -96,13 +96,13 @@
 	ch = ch_def ? ch_def : ch_a;
 
 	/* Get address within mac-io ASIC */
-	reg = (u32 *)get_property(escc, "reg", NULL);
+	reg = get_property(escc, "reg", NULL);
 	if (reg == NULL)
 		goto bail;
 	addr = reg[0];
 
 	/* Get address of mac-io PCI itself */
-	reg = (u32 *)get_property(macio, "assigned-addresses", NULL);
+	reg = get_property(macio, "assigned-addresses", NULL);
 	if (reg == NULL)
 		goto bail;
 	addr += reg[2];
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index e5e0ff4..997243a 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -12,3 +12,4 @@
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 32eaddf..84bc8f7 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -449,7 +449,11 @@
 /* ------------------------------------------------------------- */
 /* The code below deals with error recovery */
 
-/** Return negative value if a permanent error, else return
+/**
+ * eeh_slot_availability - returns error status of slot
+ * @pdn pci device node
+ *
+ * Return negative value if a permanent error, else return
  * a number of milliseconds to wait until the PCI slot is
  * ready to be used.
  */
@@ -474,11 +478,42 @@
 
 	printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n",
 		rc, rets[0], rets[1], rets[2]);
-	return -1;
+	return -2;
 }
 
-/** rtas_pci_slot_reset raises/lowers the pci #RST line
- *  state: 1/0 to raise/lower the #RST
+/**
+ * rtas_pci_enable - enable MMIO or DMA transfers for this slot
+ * @pdn pci device node
+ */
+
+int
+rtas_pci_enable(struct pci_dn *pdn, int function)
+{
+	int config_addr;
+	int rc;
+
+	/* Use PE configuration address, if present */
+	config_addr = pdn->eeh_config_addr;
+	if (pdn->eeh_pe_config_addr)
+		config_addr = pdn->eeh_pe_config_addr;
+
+	rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+	               config_addr,
+	               BUID_HI(pdn->phb->buid),
+	               BUID_LO(pdn->phb->buid),
+		            function);
+
+	if (rc)
+		printk(KERN_WARNING "EEH: Cannot enable function %d, err=%d dn=%s\n",
+		        function, rc, pdn->node->full_name);
+
+	return rc;
+}
+
+/**
+ * rtas_pci_slot_reset - raises/lowers the pci #RST line
+ * @pdn pci device node
+ * @state: 1/0 to raise/lower the #RST
  *
  * Clear the EEH-frozen condition on a slot.  This routine
  * asserts the PCI #RST line if the 'state' argument is '1',
@@ -511,24 +546,21 @@
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid),
 	               state);
-	if (rc) {
-		printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d dn=%s\n", 
+	if (rc)
+		printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
+		        " (%d) #RST=%d dn=%s\n",
 		        rc, state, pdn->node->full_name);
-		return;
-	}
 }
 
-/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
- *  dn -- device node to be reset.
+/**
+ * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
+ * @pdn: pci device node to be reset.
  *
  *  Return 0 if success, else a non-zero value.
  */
 
-int
-rtas_set_slot_reset(struct pci_dn *pdn)
+static void __rtas_set_slot_reset(struct pci_dn *pdn)
 {
-	int i, rc;
-
 	rtas_pci_slot_reset (pdn, 1);
 
 	/* The PCI bus requires that the reset be held high for at least
@@ -549,17 +581,33 @@
 	 * up traffic. */
 #define PCI_BUS_SETTLE_TIME_MSEC 1800
 	msleep (PCI_BUS_SETTLE_TIME_MSEC);
+}
+
+int rtas_set_slot_reset(struct pci_dn *pdn)
+{
+	int i, rc;
+
+	__rtas_set_slot_reset(pdn);
 
 	/* Now double check with the firmware to make sure the device is
 	 * ready to be used; if not, wait for recovery. */
 	for (i=0; i<10; i++) {
 		rc = eeh_slot_availability (pdn);
-		if (rc < 0)
-			printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name);
 		if (rc == 0)
 			return 0;
-		if (rc < 0)
+
+		if (rc == -2) {
+			printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n",
+			        i, pdn->node->full_name);
+			__rtas_set_slot_reset(pdn);
+			continue;
+		}
+
+		if (rc < 0) {
+			printk (KERN_ERR "EEH: unrecoverable slot failure %s\n",
+			        pdn->node->full_name);
 			return -1;
+		}
 
 		msleep (rc+100);
 	}
@@ -582,6 +630,8 @@
 
 /**
  * __restore_bars - Restore the Base Address Registers
+ * @pdn: pci device node
+ *
  * Loads the PCI configuration space base address registers,
  * the expansion ROM base address, the latency timer, and etc.
  * from the saved values in the device node.
@@ -691,11 +741,11 @@
 {
 	struct eeh_early_enable_info *info = data;
 	int ret;
-	char *status = get_property(dn, "status", NULL);
-	u32 *class_code = (u32 *)get_property(dn, "class-code", NULL);
-	u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL);
-	u32 *device_id = (u32 *)get_property(dn, "device-id", NULL);
-	u32 *regs;
+	const char *status = get_property(dn, "status", NULL);
+	const u32 *class_code = get_property(dn, "class-code", NULL);
+	const u32 *vendor_id = get_property(dn, "vendor-id", NULL);
+	const u32 *device_id = get_property(dn, "device-id", NULL);
+	const u32 *regs;
 	int enable;
 	struct pci_dn *pdn = PCI_DN(dn);
 
@@ -737,7 +787,7 @@
 
 	/* Ok... see if this device supports EEH.  Some do, some don't,
 	 * and the only way to find out is to check each and every one. */
-	regs = (u32 *)get_property(dn, "reg", NULL);
+	regs = get_property(dn, "reg", NULL);
 	if (regs) {
 		/* First register entry is addr (00BBSS00)  */
 		/* Try to enable eeh */
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index c37a849..b6b462d 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -157,6 +157,7 @@
 	if (!piar)
 		return NULL;
 
+	pci_dev_get(dev);
 	piar->addr_lo = alo;
 	piar->addr_hi = ahi;
 	piar->pcidev = dev;
@@ -178,7 +179,6 @@
 	struct device_node *dn;
 	struct pci_dn *pdn;
 	int i;
-	int inserted = 0;
 
 	dn = pci_device_to_OF_node(dev);
 	if (!dn) {
@@ -197,9 +197,6 @@
 		return;
 	}
 
-	/* The cache holds a reference to the device... */
-	pci_dev_get(dev);
-
 	/* Walk resources on this device, poke them into the tree */
 	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 		unsigned long start = pci_resource_start(dev,i);
@@ -212,12 +209,7 @@
 		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
 			 continue;
 		pci_addr_cache_insert(dev, start, end, flags);
-		inserted = 1;
 	}
-
-	/* If there was nothing to add, the cache has no reference... */
-	if (!inserted)
-		pci_dev_put(dev);
 }
 
 /**
@@ -240,7 +232,6 @@
 static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
 {
 	struct rb_node *n;
-	int removed = 0;
 
 restart:
 	n = rb_first(&pci_io_addr_cache_root.rb_root);
@@ -250,16 +241,12 @@
 
 		if (piar->pcidev == dev) {
 			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			removed = 1;
+			pci_dev_put(piar->pcidev);
 			kfree(piar);
 			goto restart;
 		}
 		n = rb_next(n);
 	}
-
-	/* The cache no longer holds its reference to this device... */
-	if (removed)
-		pci_dev_put(dev);
 }
 
 /**
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index aaad2c0..c2bc990 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -77,8 +77,12 @@
 }
 
 /* ------------------------------------------------------- */
-/** eeh_report_error - report an EEH error to each device,
- *  collect up and merge the device responses.
+/**
+ * eeh_report_error - report pci error to each device driver
+ * 
+ * Report an EEH error to each device driver, collect up and 
+ * merge the device driver responses. Cumulative response 
+ * passed back in "userdata".
  */
 
 static void eeh_report_error(struct pci_dev *dev, void *userdata)
@@ -96,24 +100,49 @@
 		PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
 		disable_irq_nosync(dev->irq);
 	}
-	if (!driver->err_handler)
-		return;
-	if (!driver->err_handler->error_detected)
+	if (!driver->err_handler ||
+	    !driver->err_handler->error_detected)
 		return;
 
 	rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
-	if (*res == PCI_ERS_RESULT_NEED_RESET) return;
 	if (*res == PCI_ERS_RESULT_DISCONNECT &&
 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 }
 
-/** eeh_report_reset -- tell this device that the pci slot
- *  has been reset.
+/**
+ * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled
+ *
+ * Report an EEH error to each device driver, collect up and
+ * merge the device driver responses. Cumulative response
+ * passed back in "userdata".
+ */
+
+static void eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+{
+	enum pci_ers_result rc, *res = userdata;
+	struct pci_driver *driver = dev->driver;
+
+	// dev->error_state = pci_channel_mmio_enabled;
+
+	if (!driver ||
+	    !driver->err_handler ||
+	    !driver->err_handler->mmio_enabled)
+		return;
+
+	rc = driver->err_handler->mmio_enabled (dev);
+	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	if (*res == PCI_ERS_RESULT_DISCONNECT &&
+	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+}
+
+/**
+ * eeh_report_reset - tell device that slot has been reset
  */
 
 static void eeh_report_reset(struct pci_dev *dev, void *userdata)
 {
+	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver = dev->driver;
 	struct device_node *dn = pci_device_to_OF_node(dev);
 
@@ -124,14 +153,20 @@
 		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
 		enable_irq(dev->irq);
 	}
-	if (!driver->err_handler)
-		return;
-	if (!driver->err_handler->slot_reset)
+	if (!driver->err_handler ||
+	    !driver->err_handler->slot_reset)
 		return;
 
-	driver->err_handler->slot_reset(dev);
+	rc = driver->err_handler->slot_reset(dev);
+	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+	if (*res == PCI_ERS_RESULT_DISCONNECT &&
+	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 }
 
+/**
+ * eeh_report_resume - tell device to resume normal operations
+ */
+
 static void eeh_report_resume(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
@@ -148,6 +183,13 @@
 	driver->err_handler->resume(dev);
 }
 
+/**
+ * eeh_report_failure - tell device driver that device is dead.
+ *
+ * This informs the device driver that the device is permanently
+ * dead, and that no further recovery attempts will be made on it.
+ */
+
 static void eeh_report_failure(struct pci_dev *dev, void *userdata)
 {
 	struct pci_driver *driver = dev->driver;
@@ -190,11 +232,11 @@
 
 /**
  * eeh_reset_device() -- perform actual reset of a pci slot
- * Args: bus: pointer to the pci bus structure corresponding
+ * @bus: pointer to the pci bus structure corresponding
  *            to the isolated slot. A non-null value will
  *            cause all devices under the bus to be removed
  *            and then re-added.
- *     pe_dn: pointer to a "Partionable Endpoint" device node.
+ * @pe_dn: pointer to a "Partionable Endpoint" device node.
  *            This is the top-level structure on which pci
  *            bus resets can be performed.
  */
@@ -268,14 +310,14 @@
 
 	if (!frozen_dn) {
 
-		location = (char *) get_property(event->dn, "ibm,loc-code", NULL);
+		location = get_property(event->dn, "ibm,loc-code", NULL);
 		location = location ? location : "unknown";
 		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
 		                "for location=%s pci addr=%s\n",
 		        location, pci_name(event->dev));
 		return NULL;
 	}
-	location = (char *) get_property(frozen_dn, "ibm,loc-code", NULL);
+	location = get_property(frozen_dn, "ibm,loc-code", NULL);
 	location = location ? location : "unknown";
 
 	/* There are two different styles for coming up with the PE.
@@ -347,22 +389,42 @@
 			goto hard_fail;
 	}
 
+	/* If all devices reported they can proceed, then re-enable MMIO */
+	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
+
+		if (rc) {
+			result = PCI_ERS_RESULT_NEED_RESET;
+		} else {
+			result = PCI_ERS_RESULT_NONE;
+			pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
+		}
+	}
+
+	/* If all devices reported they can proceed, then re-enable DMA */
+	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+		rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
+
+		if (rc)
+			result = PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	/* If any device has a hard failure, then shut off everything. */
+	if (result == PCI_ERS_RESULT_DISCONNECT)
+		goto hard_fail;
+
 	/* If any device called out for a reset, then reset the slot */
 	if (result == PCI_ERS_RESULT_NEED_RESET) {
 		rc = eeh_reset_device(frozen_pdn, NULL);
 		if (rc)
 			goto hard_fail;
-		pci_walk_bus(frozen_bus, eeh_report_reset, NULL);
+		result = PCI_ERS_RESULT_NONE;
+		pci_walk_bus(frozen_bus, eeh_report_reset, &result);
 	}
 
-	/* If all devices reported they can proceed, the re-enable PIO */
-	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		/* XXX Not supported; we brute-force reset the device */
-		rc = eeh_reset_device(frozen_pdn, NULL);
-		if (rc)
-			goto hard_fail;
-		pci_walk_bus(frozen_bus, eeh_report_reset, NULL);
-	}
+	/* All devices should claim they have recovered by now. */
+	if (result != PCI_ERS_RESULT_RECOVERED)
+		goto hard_fail;
 
 	/* Tell all device drivers that they can resume operations */
 	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 45ccc68..1370774 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -124,11 +124,11 @@
 {
 	unsigned long flags;
 	struct eeh_event *event;
-	char *location;
+	const char *location;
 
 	if (!mem_init_done) {
 		printk(KERN_ERR "EEH: event during early boot not handled\n");
-		location = (char *) get_property(dn, "ibm,loc-code", NULL);
+		location = get_property(dn, "ibm,loc-code", NULL);
 		printk(KERN_ERR "EEH: device node = %s\n", dn->full_name);
 		printk(KERN_ERR "EEH: PCI location = %s\n", location);
 		return 1;
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index c01d8f0..1c7b2ba 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -68,7 +68,7 @@
 void __init fw_feature_init(void)
 {
 	struct device_node *dn;
-	char *hypertas, *s;
+	const char *hypertas, *s;
 	int len, i;
 
 	DBG(" -> fw_feature_init()\n");
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c9ff547..c00cfed 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -1,7 +1,6 @@
 /*
  * This file contains the generic code to perform a call to the
  * pSeries LPAR hypervisor.
- * NOTE: this file will go away when we move to inline this work.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -11,217 +10,153 @@
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
+#ifdef CONFIG_HCALL_STATS
+/*
+ * precall must preserve all registers.  use unused STK_PARM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL					\
+	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
+	mftb	r0;			/* get timebase and */	\
+	std     r0,STK_PARM(r5)(r1);	/* save for later */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r0,SPRN_PURR;		/* get PURR and */	\
+	std	r0,STK_PARM(r6)(r1);	/* save for later */	\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);
+	
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define HCALL_INST_POSTCALL					\
+	ld	r4,STK_PARM(r3)(r1);	/* validate opcode */	\
+	cmpldi	cr7,r4,MAX_HCALL_OPCODE;			\
+	bgt-	cr7,1f;						\
+								\
+	/* get time and PURR snapshots after hcall */		\
+	mftb	r7;			/* timebase after */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r8,SPRN_PURR;		/* PURR after */	\
+	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
+	subf	r6,r6,r8;		/* delta */		\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);				\
+	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
+	subf	r5,r5,r7;		/* time delta */	\
+								\
+	/* calculate address of stat structure r4 = opcode */	\
+	srdi	r4,r4,2;		/* index into array */	\
+	mulli	r4,r4,HCALL_STAT_SIZE;				\
+	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
+	add	r4,r4,r7;					\
+	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
+	add	r4,r4,r7;					\
+								\
+	/* update stats	*/					\
+	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
+	addi	r7,r7,1;					\
+	std	r7,HCALL_STAT_CALLS(r4);			\
+	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
+	add	r7,r7,r5;					\
+	std	r7,HCALL_STAT_TB(r4);				\
+BEGIN_FTR_SECTION;						\
+	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
+	add	r7,r7,r6;					\
+	std	r7,HCALL_STAT_PURR(r4);				\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);				\
+1:
+#else
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+#endif
+
 	.text
 
-/* long plpar_hcall(unsigned long opcode,		R3
-			unsigned long arg1,		R4
-			unsigned long arg2,		R5
-			unsigned long arg3,		R6
-			unsigned long arg4,		R7
-			unsigned long *out1,		R8
-			unsigned long *out2,		R9
-			unsigned long *out3);		R10
- */
-_GLOBAL(plpar_hcall)
-	HMT_MEDIUM
-
-	mfcr	r0
-
-	std	r8,STK_PARM(r8)(r1)	/* Save out ptrs */
-	std	r9,STK_PARM(r9)(r1)
-	std	r10,STK_PARM(r10)(r1)
-
-	stw	r0,8(r1)
-
-	HVSC				/* invoke the hypervisor */
-
-	lwz	r0,8(r1)
-
-	ld	r8,STK_PARM(r8)(r1)	/* Fetch r4-r6 ret args */
-	ld	r9,STK_PARM(r9)(r1)
-	ld	r10,STK_PARM(r10)(r1)
-	std	r4,0(r8)
-	std	r5,0(r9)
-	std	r6,0(r10)
-
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-
-/* Simple interface with no output values (other than status) */
 _GLOBAL(plpar_hcall_norets)
 	HMT_MEDIUM
 
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	HVSC				/* invoke the hypervisor */
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
 
-
-/* long plpar_hcall_8arg_2ret(unsigned long opcode,	R3
-			unsigned long arg1,		R4
-			unsigned long arg2,		R5
-			unsigned long arg3,		R6
-			unsigned long arg4,		R7
-			unsigned long arg5,		R8
-			unsigned long arg6,		R9
-			unsigned long arg7,		R10
-			unsigned long arg8,		112(R1)
-			unsigned long *out1);		120(R1)
- */
-_GLOBAL(plpar_hcall_8arg_2ret)
-	HMT_MEDIUM
-
-	mfcr	r0
-	ld	r11,STK_PARM(r11)(r1)	/* put arg8 in R11 */
-	stw	r0,8(r1)
-
-	HVSC				/* invoke the hypervisor */
-
-	lwz	r0,8(r1)
-	ld	r10,STK_PARM(r12)(r1)	/* Fetch r4 ret arg */
-	std	r4,0(r10)
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-
-/* long plpar_hcall_4out(unsigned long opcode,		R3
-		 	unsigned long arg1,		R4
-		 	unsigned long arg2,		R5
-		 	unsigned long arg3,		R6
-		 	unsigned long arg4,		R7
-		 	unsigned long *out1,		R8
-		 	unsigned long *out2,		R9
-		 	unsigned long *out3,		R10
-		 	unsigned long *out4);		112(R1)
- */
-_GLOBAL(plpar_hcall_4out)
+_GLOBAL(plpar_hcall)
 	HMT_MEDIUM
 
 	mfcr	r0
 	stw	r0,8(r1)
 
-	std	r8,STK_PARM(r8)(r1)	/* Save out ptrs */
-	std	r9,STK_PARM(r9)(r1)
-	std	r10,STK_PARM(r10)(r1)
+	HCALL_INST_PRECALL
+
+	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
 
 	HVSC				/* invoke the hypervisor */
 
-	lwz	r0,8(r1)
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
 
-	ld	r8,STK_PARM(r8)(r1)	/* Fetch r4-r7 ret args */
-	ld	r9,STK_PARM(r9)(r1)
-	ld	r10,STK_PARM(r10)(r1)
-	ld	r11,STK_PARM(r11)(r1)
-	std	r4,0(r8)
-	std	r5,0(r9)
-	std	r6,0(r10)
-	std	r7,0(r11)
-
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-/* plpar_hcall_7arg_7ret(unsigned long opcode,		R3
-			 unsigned long arg1,		R4
-			 unsigned long arg2,		R5
-			 unsigned long arg3,		R6
-			 unsigned long arg4,		R7
-			 unsigned long arg5,		R8
-			 unsigned long arg6,		R9
-			 unsigned long arg7,		R10
-			 unsigned long *out1,		112(R1)
-			 unsigned long *out2,		110(R1)
-			 unsigned long *out3,		108(R1)
-			 unsigned long *out4,		106(R1)
-			 unsigned long *out5,		104(R1)
-			 unsigned long *out6,		102(R1)
-			 unsigned long *out7);		100(R1)
-*/
-_GLOBAL(plpar_hcall_7arg_7ret)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HVSC				/* invoke the hypervisor */
+	HCALL_INST_POSTCALL
 
 	lwz	r0,8(r1)
-
-	ld	r11,STK_PARM(r11)(r1)	/* Fetch r4 ret arg */
-	std	r4,0(r11)
-	ld	r11,STK_PARM(r12)(r1)	/* Fetch r5 ret arg */
-	std	r5,0(r11)
-	ld	r11,STK_PARM(r13)(r1)	/* Fetch r6 ret arg */
-	std	r6,0(r11)
-	ld	r11,STK_PARM(r14)(r1)	/* Fetch r7 ret arg */
-	std	r7,0(r11)
-	ld	r11,STK_PARM(r15)(r1)	/* Fetch r8 ret arg */
-	std	r8,0(r11)
-	ld	r11,STK_PARM(r16)(r1)	/* Fetch r9 ret arg */
-	std	r9,0(r11)
-	ld	r11,STK_PARM(r17)(r1)	/* Fetch r10 ret arg */
-	std	r10,0(r11)
-
 	mtcrf	0xff,r0
 
 	blr				/* return r3 = status */
 
-/* plpar_hcall_9arg_9ret(unsigned long opcode,		R3
-			 unsigned long arg1,		R4
-			 unsigned long arg2,		R5
-			 unsigned long arg3,		R6
-			 unsigned long arg4,		R7
-			 unsigned long arg5,		R8
-			 unsigned long arg6,		R9
-			 unsigned long arg7,		R10
-			 unsigned long arg8,		112(R1)
-			 unsigned long arg9,		110(R1)
-			 unsigned long *out1,		108(R1)
-			 unsigned long *out2,		106(R1)
-			 unsigned long *out3,		104(R1)
-			 unsigned long *out4,		102(R1)
-			 unsigned long *out5,		100(R1)
-			 unsigned long *out6,		 98(R1)
-			 unsigned long *out7);		 96(R1)
-			 unsigned long *out8,		 94(R1)
-		         unsigned long *out9,            92(R1)
-*/
-_GLOBAL(plpar_hcall_9arg_9ret)
+_GLOBAL(plpar_hcall9)
 	HMT_MEDIUM
 
 	mfcr	r0
 	stw	r0,8(r1)
 
-	ld	r11,STK_PARM(r11)(r1)	 /* put arg8 in R11 */
-	ld	r12,STK_PARM(r12)(r1)    /* put arg9 in R12 */
+	HCALL_INST_PRECALL
+
+	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
+
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	mr	r9,r10
+	ld	r10,STK_PARM(r11)(r1)	 /* put arg7 in R10 */
+	ld	r11,STK_PARM(r12)(r1)	 /* put arg8 in R11 */
+	ld	r12,STK_PARM(r13)(r1)    /* put arg9 in R12 */
 
 	HVSC				/* invoke the hypervisor */
 
-	ld	r0,STK_PARM(r13)(r1)	/* Fetch r4 ret arg */
-	stdx	r4,r0,r0
-	ld	r0,STK_PARM(r14)(r1)	/* Fetch r5 ret arg */
-	stdx	r5,r0,r0
-	ld	r0,STK_PARM(r15)(r1)	/* Fetch r6 ret arg */
-	stdx	r6,r0,r0
-	ld	r0,STK_PARM(r16)(r1)	/* Fetch r7 ret arg */
-	stdx	r7,r0,r0
-	ld	r0,STK_PARM(r17)(r1)	/* Fetch r8 ret arg */
-	stdx	r8,r0,r0
-	ld	r0,STK_PARM(r18)(r1)	/* Fetch r9 ret arg */
-	stdx	r9,r0,r0
-	ld	r0,STK_PARM(r19)(r1)	/* Fetch r10 ret arg */
-	stdx	r10,r0,r0
-	ld	r0,STK_PARM(r20)(r1)	/* Fetch r11 ret arg */
-	stdx	r11,r0,r0
-	ld	r0,STK_PARM(r21)(r1)	/* Fetch r12 ret arg */
-	stdx	r12,r0,r0
+	ld	r12,STK_PARM(r4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+	std	r10,48(r12)
+	std	r11,56(r12)
+	std	r12,64(r12)
+
+	HCALL_INST_POSTCALL
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
new file mode 100644
index 0000000..641e651
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < HCALL_STAT_ARRAY_SIZE)
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = (struct hcall_stats *)m->private;
+
+	if (hs[h_num].num_calls) {
+		if (!cpu_has_feature(CPU_FTR_PURR))
+			seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total,
+				   hs[h_num].purr_total);
+		else
+			seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total);
+	}
+
+	return 0;
+}
+
+static struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	struct seq_file *seq;
+
+	rc = seq_open(file, &hcall_inst_seq_ops);
+	seq = file->private_data;
+	seq->private = file->f_dentry->d_inode->u.generic_ip;
+
+	return rc;
+}
+
+static struct file_operations hcall_inst_seq_fops = {
+	.open = hcall_inst_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	struct dentry *hcall_file;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+	if (!hcall_root)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+						 hcall_root,
+						 per_cpu(hcall_stats, cpu),
+						 &hcall_inst_seq_fops);
+		if (!hcall_file)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+__initcall(hcall_inst_init);
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index a72a987..3f6a89b 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <asm/hvcall.h>
 #include <asm/hvconsole.h>
+#include "plpar_wrappers.h"
 
 /**
  * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
@@ -40,9 +41,9 @@
 {
 	unsigned long got;
 
-	if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got,
-		(unsigned long *)buf, (unsigned long *)buf+1) == H_SUCCESS)
+	if (plpar_get_term_char(vtermno, &got, buf) == H_SUCCESS)
 		return got;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d67af2c..bbf2e34 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -267,13 +267,12 @@
 				 struct iommu_table *tbl)
 {
 	struct device_node *node;
-	unsigned long *basep;
-	unsigned int *sizep;
+	const unsigned long *basep, *sizep;
 
 	node = (struct device_node *)phb->arch_data;
 
-	basep = (unsigned long *)get_property(node, "linux,tce-base", NULL);
-	sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL);
+	basep = get_property(node, "linux,tce-base", NULL);
+	sizep = get_property(node, "linux,tce-size", NULL);
 	if (basep == NULL || sizep == NULL) {
 		printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
 				"missing tce entries !\n", dn->full_name);
@@ -315,7 +314,7 @@
 static void iommu_table_setparms_lpar(struct pci_controller *phb,
 				      struct device_node *dn,
 				      struct iommu_table *tbl,
-				      unsigned char *dma_window)
+				      const void *dma_window)
 {
 	unsigned long offset, size;
 
@@ -415,7 +414,7 @@
 	struct iommu_table *tbl;
 	struct device_node *dn, *pdn;
 	struct pci_dn *ppci;
-	unsigned char *dma_window = NULL;
+	const void *dma_window = NULL;
 
 	DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self);
 
@@ -519,7 +518,7 @@
 {
 	struct device_node *pdn, *dn;
 	struct iommu_table *tbl;
-	unsigned char *dma_window = NULL;
+	const void *dma_window = NULL;
 	struct pci_dn *pci;
 
 	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3aeb406..1820a0b 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -48,13 +48,11 @@
 #define DBG_LOW(fmt...) do { } while(0)
 #endif
 
-/* in pSeries_hvCall.S */
+/* in hvCall.S */
 EXPORT_SYMBOL(plpar_hcall);
-EXPORT_SYMBOL(plpar_hcall_4out);
+EXPORT_SYMBOL(plpar_hcall9);
 EXPORT_SYMBOL(plpar_hcall_norets);
-EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
-EXPORT_SYMBOL(plpar_hcall_7arg_7ret);
-EXPORT_SYMBOL(plpar_hcall_9arg_9ret);
+
 extern void pSeries_find_serial_port(void);
 
 
@@ -204,20 +202,20 @@
 void __init find_udbg_vterm(void)
 {
 	struct device_node *stdout_node;
-	u32 *termno;
-	char *name;
+	const u32 *termno;
+	const char *name;
 	int add_console;
 
 	/* find the boot console from /chosen/stdout */
 	if (!of_chosen)
 		return;
-	name = (char *)get_property(of_chosen, "linux,stdout-path", NULL);
+	name = get_property(of_chosen, "linux,stdout-path", NULL);
 	if (name == NULL)
 		return;
 	stdout_node = of_find_node_by_path(name);
 	if (!stdout_node)
 		return;
-	name = (char *)get_property(stdout_node, "name", NULL);
+	name = get_property(stdout_node, "name", NULL);
 	if (!name) {
 		printk(KERN_WARNING "stdout node missing 'name' property!\n");
 		goto out;
@@ -228,7 +226,7 @@
 	/* Check if it's a virtual terminal */
 	if (strncmp(name, "vty", 3) != 0)
 		goto out;
-	termno = (u32 *)get_property(stdout_node, "reg", NULL);
+	termno = get_property(stdout_node, "reg", NULL);
 	if (termno == NULL)
 		goto out;
 	vtermno = termno[0];
@@ -254,18 +252,34 @@
 void vpa_init(int cpu)
 {
 	int hwcpu = get_hard_smp_processor_id(cpu);
-	unsigned long vpa = __pa(&lppaca[cpu]);
+	unsigned long addr;
 	long ret;
 
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		lppaca[cpu].vmxregs_in_use = 1;
 
-	ret = register_vpa(hwcpu, vpa);
+	addr = __pa(&lppaca[cpu]);
+	ret = register_vpa(hwcpu, addr);
 
-	if (ret)
+	if (ret) {
 		printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
 				"cpu %d (hw %d) of area %lx returns %ld\n",
-				cpu, hwcpu, vpa, ret);
+				cpu, hwcpu, addr, ret);
+		return;
+	}
+	/*
+	 * PAPR says this feature is SLB-Buffer but firmware never
+	 * reports that.  All SPLPAR support SLB shadow buffer.
+	 */
+	addr = __pa(&slb_shadow[cpu]);
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		ret = register_slb_shadow(hwcpu, addr);
+		if (ret)
+			printk(KERN_ERR
+			       "WARNING: vpa_init: SLB shadow buffer "
+			       "registration for cpu %d (hw %d) of area %lx "
+			       "returns %ld\n", cpu, hwcpu, addr, ret);
+	}
 }
 
 long pSeries_lpar_hpte_insert(unsigned long hpte_group,
@@ -277,7 +291,6 @@
 	unsigned long flags;
 	unsigned long slot;
 	unsigned long hpte_v, hpte_r;
-	unsigned long dummy0, dummy1;
 
 	if (!(vflags & HPTE_V_BOLTED))
 		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
@@ -302,8 +315,7 @@
 	if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE))
 		hpte_r &= ~_PAGE_COHERENT;
 
-	lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v,
-			      hpte_r, &slot, &dummy0, &dummy1);
+	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
 		if (!(vflags & HPTE_V_BOLTED))
 			DBG_LOW(" full\n");
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 18abfb1..64163ce 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -123,13 +123,14 @@
 int __init pSeries_nvram_init(void)
 {
 	struct device_node *nvram;
-	unsigned int *nbytes_p, proplen;
+	const unsigned int *nbytes_p;
+	unsigned int proplen;
 
 	nvram = of_find_node_by_type(NULL, "nvram");
 	if (nvram == NULL)
 		return -ENODEV;
 
-	nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
+	nbytes_p = get_property(nvram, "#bytes", &proplen);
 	if (nbytes_p == NULL || proplen != sizeof(unsigned int))
 		return -EIO;
 
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index e97e67f..410a6bc 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -60,7 +60,7 @@
 static void __devinit check_s7a(void)
 {
 	struct device_node *root;
-	char *model;
+	const char *model;
 
 	s7a_workaround = 0;
 	root = of_find_node_by_path("/");
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index 3bd1b3e..3eb7b29 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -5,20 +5,17 @@
 
 static inline long poll_pending(void)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_POLL_PENDING, 0, 0, 0, 0, &dummy, &dummy, &dummy);
+	return plpar_hcall_norets(H_POLL_PENDING);
 }
 
 static inline long prod_processor(void)
 {
-	plpar_hcall_norets(H_PROD);
-	return 0;
+	return plpar_hcall_norets(H_PROD);
 }
 
 static inline long cede_processor(void)
 {
-	plpar_hcall_norets(H_CEDE);
-	return 0;
+	return plpar_hcall_norets(H_CEDE);
 }
 
 static inline long vpa_call(unsigned long flags, unsigned long cpu,
@@ -40,23 +37,59 @@
 	return vpa_call(0x1, cpu, vpa);
 }
 
+static inline long unregister_slb_shadow(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(0x7, cpu, vpa);
+}
+
+static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
+{
+	return vpa_call(0x3, cpu, vpa);
+}
+
 extern void vpa_init(int cpu);
 
+static inline long plpar_pte_enter(unsigned long flags,
+		unsigned long hpte_group, unsigned long hpte_v,
+		unsigned long hpte_r, unsigned long *slot)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_ENTER, retbuf, flags, hpte_group, hpte_v, hpte_r);
+
+	*slot = retbuf[0];
+
+	return rc;
+}
+
 static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex,
 		unsigned long avpn, unsigned long *old_pteh_ret,
 		unsigned long *old_ptel_ret)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_REMOVE, flags, ptex, avpn, 0, old_pteh_ret,
-			old_ptel_ret, &dummy);
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_REMOVE, retbuf, flags, ptex, avpn);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
 }
 
 static inline long plpar_pte_read(unsigned long flags, unsigned long ptex,
 		unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_READ, flags, ptex, 0, 0, old_pteh_ret,
-			old_ptel_ret, &dummy);
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_READ, retbuf, flags, ptex);
+
+	*old_pteh_ret = retbuf[0];
+	*old_ptel_ret = retbuf[1];
+
+	return rc;
 }
 
 static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
@@ -68,9 +101,14 @@
 static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba,
 		unsigned long *tce_ret)
 {
-	unsigned long dummy;
-	return plpar_hcall(H_GET_TCE, liobn, ioba, 0, 0, tce_ret, &dummy,
-			&dummy);
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_GET_TCE, retbuf, liobn, ioba);
+
+	*tce_ret = retbuf[0];
+
+	return rc;
 }
 
 static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba,
@@ -94,9 +132,17 @@
 static inline long plpar_get_term_char(unsigned long termno,
 		unsigned long *len_ret, char *buf_ret)
 {
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	unsigned long *lbuf = (unsigned long *)buf_ret;	/* TODO: alignment? */
-	return plpar_hcall(H_GET_TERM_CHAR, termno, 0, 0, 0, len_ret,
-			lbuf + 0, lbuf + 1);
+
+	rc = plpar_hcall(H_GET_TERM_CHAR, retbuf, termno);
+
+	*len_ret = retbuf[0];
+	lbuf[0] = retbuf[1];
+	lbuf[1] = retbuf[2];
+
+	return rc;
 }
 
 static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
@@ -107,4 +153,31 @@
 			lbuf[1]);
 }
 
+static inline long plpar_eoi(unsigned long xirr)
+{
+	return plpar_hcall_norets(H_EOI, xirr);
+}
+
+static inline long plpar_cppr(unsigned long cppr)
+{
+	return plpar_hcall_norets(H_CPPR, cppr);
+}
+
+static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
+{
+	return plpar_hcall_norets(H_IPI, servernum, mfrr);
+}
+
+static inline long plpar_xirr(unsigned long *xirr_ret)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_XIRR, retbuf);
+
+	*xirr_ret = retbuf[0];
+
+	return rc;
+}
+
 #endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c7ffde1..903115d 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -79,7 +79,7 @@
 {
 	int i, index, count = 0;
 	struct of_irq oirq;
-	u32 *opicprop;
+	const u32 *opicprop;
 	unsigned int opicplen;
 	unsigned int virqs[16];
 
@@ -87,7 +87,7 @@
 	 * map those interrupts using the default interrupt host and default
 	 * trigger
 	 */
-	opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen);
+	opicprop = get_property(np, "open-pic-interrupt", &opicplen);
 	if (opicprop) {
 		opicplen /= sizeof(u32);
 		for (i = 0; i < opicplen; i++) {
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index 2e4e040..8ca2612 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -359,11 +359,11 @@
 static int get_eventscan_parms(void)
 {
 	struct device_node *node;
-	int *ip;
+	const int *ip;
 
 	node = of_find_node_by_path("/rtas");
 
-	ip = (int *)get_property(node, "rtas-event-scan-rate", NULL);
+	ip = get_property(node, "rtas-event-scan-rate", NULL);
 	if (ip == NULL) {
 		printk(KERN_ERR "rtasd: no rtas-event-scan-rate\n");
 		of_node_put(node);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 31867a7..a6398fb 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -133,9 +133,9 @@
 static void __init pseries_mpic_init_IRQ(void)
 {
 	struct device_node *np, *old, *cascade = NULL;
-        unsigned int *addrp;
+        const unsigned int *addrp;
 	unsigned long intack = 0;
-	unsigned int *opprop;
+	const unsigned int *opprop;
 	unsigned long openpic_addr = 0;
 	unsigned int cascade_irq;
 	int naddr, n, i, opplen;
@@ -143,7 +143,7 @@
 
 	np = of_find_node_by_path("/");
 	naddr = prom_n_addr_cells(np);
-	opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen);
+	opprop = get_property(np, "platform-open-pic", &opplen);
 	if (opprop != 0) {
 		openpic_addr = of_read_number(opprop, naddr);
 		printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
@@ -192,7 +192,7 @@
 			break;
 		if (strcmp(np->name, "pci") != 0)
 			continue;
-		addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge",
+		addrp = get_property(np, "8259-interrupt-acknowledge",
 					    NULL);
 		if (addrp == NULL)
 			continue;
@@ -223,23 +223,37 @@
 }
 
 #ifdef CONFIG_KEXEC
-static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
-{
-	mpic_teardown_this_cpu(secondary);
-}
-
-static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
+static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 {
 	/* Don't risk a hypervisor call if we're crashing */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
-		unsigned long vpa = __pa(get_lppaca());
+		unsigned long addr;
 
-		if (unregister_vpa(hard_smp_processor_id(), vpa)) {
+		addr = __pa(get_slb_shadow());
+		if (unregister_slb_shadow(hard_smp_processor_id(), addr))
+			printk("SLB shadow buffer deregistration of "
+			       "cpu %u (hw_cpu_id %d) failed\n",
+			       smp_processor_id(),
+			       hard_smp_processor_id());
+
+		addr = __pa(get_lppaca());
+		if (unregister_vpa(hard_smp_processor_id(), addr)) {
 			printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
 					"failed\n", smp_processor_id(),
 					hard_smp_processor_id());
 		}
 	}
+}
+
+static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
+{
+	pseries_kexec_cpu_down(crash_shutdown, secondary);
+	mpic_teardown_this_cpu(secondary);
+}
+
+static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
+{
+	pseries_kexec_cpu_down(crash_shutdown, secondary);
 	xics_teardown_cpu(secondary);
 }
 #endif /* CONFIG_KEXEC */
@@ -247,11 +261,11 @@
 static void __init pseries_discover_pic(void)
 {
 	struct device_node *np;
-	char *typep;
+	const char *typep;
 
 	for (np = NULL; (np = of_find_node_by_name(np,
 						   "interrupt-controller"));) {
-		typep = (char *)get_property(np, "compatible", NULL);
+		typep = get_property(np, "compatible", NULL);
 		if (strstr(typep, "open-pic")) {
 			pSeries_mpic_node = of_node_get(np);
 			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index ac61098..c6624b8 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -62,7 +62,7 @@
  */
 static cpumask_t of_spin_map;
 
-extern void pSeries_secondary_smp_init(unsigned long);
+extern void generic_secondary_smp_init(unsigned long);
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -145,9 +145,9 @@
 	unsigned int cpu;
 	cpumask_t candidate_map, tmp = CPU_MASK_NONE;
 	int err = -ENOSPC, len, nthreads, i;
-	u32 *intserv;
+	const u32 *intserv;
 
-	intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	intserv = get_property(np, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
 		return 0;
 
@@ -205,9 +205,9 @@
 {
 	unsigned int cpu;
 	int len, nthreads, i;
-	u32 *intserv;
+	const u32 *intserv;
 
-	intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len);
+	intserv = get_property(np, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
 		return;
 
@@ -270,7 +270,7 @@
 {
 	int status;
 	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       pSeries_secondary_smp_init));
+					       generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index e988630..253972e 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -34,6 +34,7 @@
 #include <asm/i8259.h>
 
 #include "xics.h"
+#include "plpar_wrappers.h"
 
 #define XICS_IPI		2
 #define XICS_IRQ_SPURIOUS	0
@@ -110,27 +111,6 @@
 /* LPAR low level accessors */
 
 
-static inline long plpar_eoi(unsigned long xirr)
-{
-	return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
-	return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
-	return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret)
-{
-	unsigned long dummy;
-	return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
-}
-
 static inline unsigned int lpar_xirr_info_get(int n_cpu)
 {
 	unsigned long lpar_rc;
@@ -590,14 +570,14 @@
 				      unsigned int *indx)
 {
 	unsigned int ilen;
-	u32 *ireg;
+	const u32 *ireg;
 
 	/* This code does the theorically broken assumption that the interrupt
 	 * server numbers are the same as the hard CPU numbers.
 	 * This happens to be the case so far but we are playing with fire...
 	 * should be fixed one of these days. -BenH.
 	 */
-	ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL);
+	ireg = get_property(np, "ibm,interrupt-server-ranges", NULL);
 
 	/* Do that ever happen ? we'll know soon enough... but even good'old
 	 * f80 does have that property ..
@@ -609,7 +589,7 @@
 		 */
 		*indx = *ireg;
 	}
-	ireg = (u32 *)get_property(np, "reg", &ilen);
+	ireg = get_property(np, "reg", &ilen);
 	if (!ireg)
 		panic("xics_init_IRQ: can't find interrupt reg property");
 
@@ -635,7 +615,7 @@
 {
 	struct device_node *np, *old, *found = NULL;
 	int cascade, naddr;
-	u32 *addrp;
+	const u32 *addrp;
 	unsigned long intack = 0;
 
 	for_each_node_by_type(np, "interrupt-controller")
@@ -661,7 +641,7 @@
 			break;
 		if (strcmp(np->name, "pci") != 0)
 			continue;
-		addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL);
+		addrp = get_property(np, "8259-interrupt-acknowledge", NULL);
 		if (addrp == NULL)
 			continue;
 		naddr = prom_n_addr_cells(np);
@@ -680,7 +660,8 @@
 {
 	int i;
 	struct device_node *np;
-	u32 *ireg, ilen, indx = 0;
+	u32 ilen, indx = 0;
+	const u32 *ireg;
 	int found = 0;
 
 	ppc64_boot_msg(0x20, "XICS Init");
@@ -705,18 +686,17 @@
 	for (np = of_find_node_by_type(NULL, "cpu");
 	     np;
 	     np = of_find_node_by_type(np, "cpu")) {
-		ireg = (u32 *)get_property(np, "reg", &ilen);
+		ireg = get_property(np, "reg", &ilen);
 		if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) {
-			ireg = (u32 *)get_property(np,
-						  "ibm,ppc-interrupt-gserver#s",
-						   &ilen);
+			ireg = get_property(np,
+					"ibm,ppc-interrupt-gserver#s", &ilen);
 			i = ilen / sizeof(int);
 			if (ireg && i > 0) {
 				default_server = ireg[0];
 				/* take last element */
 				default_distrib_server = ireg[i-1];
 			}
-			ireg = (u32 *)get_property(np,
+			ireg = get_property(np,
 					"ibm,interrupt-server#-size", NULL);
 			if (ireg)
 				interrupt_server_size = *ireg;
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index ef10bcf..92ba378 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -41,7 +41,7 @@
 	soc = of_find_node_by_type(NULL, "soc");
 	if (soc) {
 		unsigned int size;
-		void *prop = get_property(soc, "reg", &size);
+		const void *prop = get_property(soc, "reg", &size);
 		immrbase = of_translate_address(soc, prop);
 		of_node_put(soc);
 	};
@@ -85,7 +85,7 @@
 			mdio_data.irq[k] = -1;
 
 		while ((child = of_get_next_child(np, child)) != NULL) {
-			u32 *id = get_property(child, "reg", NULL);
+			const u32 *id = get_property(child, "reg", NULL);
 			mdio_data.irq[*id] = irq_of_parse_and_map(child, 0);
 		}
 
@@ -124,10 +124,10 @@
 		struct resource r[4];
 		struct device_node *phy, *mdio;
 		struct gianfar_platform_data gfar_data;
-		unsigned int *id;
-		char *model;
-		void *mac_addr;
-		phandle *ph;
+		const unsigned int *id;
+		const char *model;
+		const void *mac_addr;
+		const phandle *ph;
 		int n_res = 1;
 
 		memset(r, 0, sizeof(r));
@@ -193,7 +193,7 @@
 			    FSL_GIANFAR_DEV_HAS_VLAN |
 			    FSL_GIANFAR_DEV_HAS_EXTENDED_HASH;
 
-		ph = (phandle *) get_property(np, "phy-handle", NULL);
+		ph = get_property(np, "phy-handle", NULL);
 		phy = of_find_node_by_phandle(*ph);
 
 		if (phy == NULL) {
@@ -203,7 +203,7 @@
 
 		mdio = of_get_parent(phy);
 
-		id = (u32 *) get_property(phy, "reg", NULL);
+		id = get_property(phy, "reg", NULL);
 		ret = of_address_to_resource(mdio, 0, &res);
 		if (ret) {
 			of_node_put(phy);
@@ -247,7 +247,7 @@
 	     i++) {
 		struct resource r[2];
 		struct fsl_i2c_platform_data i2c_data;
-		unsigned char *flags = NULL;
+		const unsigned char *flags = NULL;
 
 		memset(&r, 0, sizeof(r));
 		memset(&i2c_data, 0, sizeof(i2c_data));
@@ -298,7 +298,7 @@
 	struct resource r;
 	struct device_node *soc, *np;
 	struct platform_device *dev;
-	unsigned int *freq;
+	const unsigned int *freq;
 	int ret;
 
 	np = of_find_compatible_node(NULL, "watchdog", "mpc83xx_wdt");
@@ -315,7 +315,7 @@
 		goto nosoc;
 	}
 
-	freq = (unsigned int *)get_property(soc, "bus-frequency", NULL);
+	freq = get_property(soc, "bus-frequency", NULL);
 	if (!freq) {
 		ret = -ENODEV;
 		goto err;
@@ -355,7 +355,7 @@
 arch_initcall(mpc83xx_wdt_init);
 #endif
 
-static enum fsl_usb2_phy_modes determine_usb_phy(char * phy_type)
+static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type)
 {
 	if (!phy_type)
 		return FSL_USB2_PHY_NONE;
@@ -383,7 +383,7 @@
 	     i++) {
 		struct resource r[2];
 		struct fsl_usb2_platform_data usb_data;
-		unsigned char *prop = NULL;
+		const unsigned char *prop = NULL;
 
 		memset(&r, 0, sizeof(r));
 		memset(&usb_data, 0, sizeof(usb_data));
@@ -431,7 +431,7 @@
 	     i++) {
 		struct resource r[2];
 		struct fsl_usb2_platform_data usb_data;
-		unsigned char *prop = NULL;
+		const unsigned char *prop = NULL;
 
 		memset(&r, 0, sizeof(r));
 		memset(&usb_data, 0, sizeof(usb_data));
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index c433d3f..5a3dd48 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -2,6 +2,8 @@
 #define __PPC_FSL_SOC_H
 #ifdef __KERNEL__
 
+#include <asm/mmu.h>
+
 extern phys_addr_t get_immrbase(void);
 
 #endif
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 9855820..26a6a3b 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -224,7 +224,7 @@
 	.xlate = i8259_host_xlate,
 };
 
-/****
+/**
  * i8259_init - Initialize the legacy controller
  * @node: device node of the legacy PIC (can be NULL, but then, it will match
  *        all interrupts, so beware)
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 70e7077..0251b7c 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -210,7 +210,7 @@
 		.prio_mask = 4,
 	},
 	[64] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -218,7 +218,7 @@
 		.prio_mask = 0,
 	},
 	[65] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -226,7 +226,7 @@
 		.prio_mask = 1,
 	},
 	[66] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -234,7 +234,7 @@
 		.prio_mask = 2,
 	},
 	[67] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_A,
 		.force	= IPIC_SIFCR_L,
@@ -242,7 +242,7 @@
 		.prio_mask = 3,
 	},
 	[68] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -250,7 +250,7 @@
 		.prio_mask = 0,
 	},
 	[69] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -258,7 +258,7 @@
 		.prio_mask = 1,
 	},
 	[70] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -266,7 +266,7 @@
 		.prio_mask = 2,
 	},
 	[71] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= IPIC_SMPRR_B,
 		.force	= IPIC_SIFCR_L,
@@ -274,91 +274,91 @@
 		.prio_mask = 3,
 	},
 	[72] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 8,
 	},
 	[73] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 9,
 	},
 	[74] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 10,
 	},
 	[75] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 11,
 	},
 	[76] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 12,
 	},
 	[77] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 13,
 	},
 	[78] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 14,
 	},
 	[79] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 15,
 	},
 	[80] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 16,
 	},
 	[84] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 20,
 	},
 	[85] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 21,
 	},
 	[90] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
 		.bit	= 26,
 	},
 	[91] = {
-		.pend	= IPIC_SIPNR_H,
+		.pend	= IPIC_SIPNR_L,
 		.mask	= IPIC_SIMSR_L,
 		.prio	= 0,
 		.force	= IPIC_SIFCR_L,
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 615350d..ff23f5a 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -80,7 +80,7 @@
 int __init mmio_nvram_init(void)
 {
 	struct device_node *nvram_node;
-	unsigned long *buffer;
+	const unsigned long *buffer;
 	int proplen;
 	unsigned long nvram_addr;
 	int ret;
@@ -91,7 +91,7 @@
 		goto out;
 
 	ret = -EIO;
-	buffer = (unsigned long *)get_property(nvram_node, "reg", &proplen);
+	buffer = get_property(nvram_node, "reg", &proplen);
 	if (proplen != 2*sizeof(unsigned long))
 		goto out;
 
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 2ab06ed..c28f69b 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -194,7 +194,7 @@
 	int len;
 	struct pci_controller *hose;
 	struct resource rsrc;
-	int *bus_range;
+	const int *bus_range;
 	int primary = 0, has_address = 0;
 
 	/* PCI Config mapping */
@@ -207,7 +207,7 @@
 	has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
 
 	/* Get bus range if any */
-	bus_range = (int *)get_property(dev, "bus-range", &len);
+	bus_range = get_property(dev, "bus-range", &len);
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING "Can't get bus-range for %s, assume"
 		       " bus 0\n", dev->full_name);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 179b10c..8adad14 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -137,10 +137,14 @@
 static void proccall(void);
 void dump_segments(void);
 static void symbol_lookup(void);
+static void xmon_show_stack(unsigned long sp, unsigned long lr,
+			    unsigned long pc);
 static void xmon_print_symbol(unsigned long address, const char *mid,
 			      const char *after);
 static const char *getvecname(unsigned long vec);
 
+int xmon_no_auto_backtrace;
+
 extern int print_insn_powerpc(unsigned long, unsigned long, int);
 
 extern void xmon_enter(void);
@@ -736,6 +740,12 @@
 
 	last_cmd = NULL;
 	xmon_regs = excp;
+
+	if (!xmon_no_auto_backtrace) {
+		xmon_no_auto_backtrace = 1;
+		xmon_show_stack(excp->gpr[1], excp->link, excp->nip);
+	}
+
 	for(;;) {
 #ifdef CONFIG_SMP
 		printf("%x:", smp_processor_id());
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index a04cdf0..8fa10cf 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -1204,7 +1204,7 @@
 	default PCI
 
 config MPC83xx_PCI2
-	bool "  Supprt for 2nd PCI host controller"
+	bool "Support for 2nd PCI host controller"
 	depends on PCI && MPC834x
 	default y if MPC834x_SYS
 
@@ -1223,12 +1223,12 @@
 	default y
 
 config 8260_PCI9
-	bool "  Enable workaround for MPC826x erratum PCI 9"
+	bool "Enable workaround for MPC826x erratum PCI 9"
 	depends on PCI_8260 && !ADS8272
 	default y
 
 choice
-	prompt "  IDMA channel for PCI 9 workaround"
+	prompt "IDMA channel for PCI 9 workaround"
 	depends on 8260_PCI9
 
 config 8260_PCI9_IDMA1
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 2fa0075..50b4bbd 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -768,91 +768,6 @@
 	bdnz	00b
 	blr
 
-_GLOBAL(_insw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhbrx	r5,0,r3
-01:	eieio
-02:	sthu	r5,2(r4)
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(_outsw)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,2
-	blelr-
-00:	lhzu	r5,2(r4)
-01:	eieio
-02:	sthbrx	r5,0,r3
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(_insl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwbrx	r5,0,r3
-01:	eieio
-02:	stwu	r5,4(r4)
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(_outsl)
-	cmpwi	0,r5,0
-	mtctr	r5
-	subi	r4,r4,4
-	blelr-
-00:	lwzu	r5,4(r4)
-01:	stwbrx	r5,0,r3
-02:	eieio
-	ISYNC_8xx
-	.section .fixup,"ax"
-03:	blr
-	.text
-	.section __ex_table, "a"
-		.align 2
-		.long 00b, 03b
-		.long 01b, 03b
-		.long 02b, 03b
-	.text
-	bdnz	00b
-	blr
-
-_GLOBAL(__ide_mm_insw)
 _GLOBAL(_insw_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -874,7 +789,6 @@
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_outsw)
 _GLOBAL(_outsw_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -896,7 +810,6 @@
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_insl)
 _GLOBAL(_insl_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
@@ -918,7 +831,6 @@
 	bdnz	00b
 	blr
 
-_GLOBAL(__ide_mm_outsl)
 _GLOBAL(_outsl_ns)
 	cmpwi	0,r5,0
 	mtctr	r5
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index d173540..c8b65ca 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -115,17 +115,8 @@
 EXPORT_SYMBOL(outl);
 EXPORT_SYMBOL(outsl);*/
 
-EXPORT_SYMBOL(__ide_mm_insl);
-EXPORT_SYMBOL(__ide_mm_outsw);
-EXPORT_SYMBOL(__ide_mm_insw);
-EXPORT_SYMBOL(__ide_mm_outsl);
-
 EXPORT_SYMBOL(_insb);
 EXPORT_SYMBOL(_outsb);
-EXPORT_SYMBOL(_insw);
-EXPORT_SYMBOL(_outsw);
-EXPORT_SYMBOL(_insl);
-EXPORT_SYMBOL(_outsl);
 EXPORT_SYMBOL(_insw_ns);
 EXPORT_SYMBOL(_outsw_ns);
 EXPORT_SYMBOL(_insl_ns);
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index a74f46d..5458ac5 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -127,11 +127,8 @@
 	ppc_md.restart(cmd);
 }
 
-void machine_power_off(void)
+static void ppc_generic_power_off(void)
 {
-#ifdef CONFIG_NVRAM
-	nvram_sync();
-#endif
 	ppc_md.power_off();
 }
 
@@ -143,7 +140,17 @@
 	ppc_md.halt();
 }
 
-void (*pm_power_off)(void) = machine_power_off;
+void (*pm_power_off)(void) = ppc_generic_power_off;
+
+void machine_power_off(void)
+{
+#ifdef CONFIG_NVRAM
+	nvram_sync();
+#endif
+	if (pm_power_off)
+		pm_power_off();
+	ppc_generic_power_off();
+}
 
 #ifdef CONFIG_TAU
 extern u32 cpu_temp(unsigned long cpu);
diff --git a/arch/ppc/platforms/85xx/sbc8560.h b/arch/ppc/platforms/85xx/sbc8560.h
index c7d61cf..e5e156f 100644
--- a/arch/ppc/platforms/85xx/sbc8560.h
+++ b/arch/ppc/platforms/85xx/sbc8560.h
@@ -14,6 +14,7 @@
 #define __MACH_SBC8560_H__
  
 #include <platforms/85xx/sbc85xx.h>
+#include <asm/irq.h>
 
 #define CPM_MAP_ADDR    (CCSRBAR + MPC85xx_CPM_OFFSET)
  
diff --git a/arch/ppc/platforms/85xx/sbc85xx.h b/arch/ppc/platforms/85xx/sbc85xx.h
index 21ea7a5..51df4dc 100644
--- a/arch/ppc/platforms/85xx/sbc85xx.h
+++ b/arch/ppc/platforms/85xx/sbc85xx.h
@@ -49,4 +49,22 @@
 
 #define MPC85XX_PCI1_IO_SIZE	0x01000000
 
+/* FCC1 Clock Source Configuration.  These can be
+ * redefined in the board specific file.
+ *    Can only choose from CLK9-12 */
+#define F1_RXCLK       12
+#define F1_TXCLK       11
+
+/* FCC2 Clock Source Configuration.  These can be
+ * redefined in the board specific file.
+ *    Can only choose from CLK13-16 */
+#define F2_RXCLK       13
+#define F2_TXCLK       14
+
+/* FCC3 Clock Source Configuration.  These can be
+ * redefined in the board specific file.
+ *    Can only choose from CLK13-16 */
+#define F3_RXCLK       15
+#define F3_TXCLK       16
+
 #endif /* __PLATFORMS_85XX_SBC85XX_H__ */
diff --git a/arch/ppc/syslib/m8260_pci_erratum9.c b/arch/ppc/syslib/m8260_pci_erratum9.c
index 974581e..5475709 100644
--- a/arch/ppc/syslib/m8260_pci_erratum9.c
+++ b/arch/ppc/syslib/m8260_pci_erratum9.c
@@ -339,20 +339,6 @@
 	idma_pci9_read((u8 *)buf, (u8 *)addr, nl*sizeof(u32), sizeof(u32), 0);
 }
 
-void insw_ns(unsigned port, void *buf, int ns)
-{
-	u8 *addr = (u8 *)(port + _IO_BASE);
-
-	idma_pci9_read((u8 *)buf, (u8 *)addr, ns*sizeof(u16), sizeof(u16), 0);
-}
-
-void insl_ns(unsigned port, void *buf, int nl)
-{
-	u8 *addr = (u8 *)(port + _IO_BASE);
-
-	idma_pci9_read((u8 *)buf, (u8 *)addr, nl*sizeof(u32), sizeof(u32), 0);
-}
-
 void *memcpy_fromio(void *dest, unsigned long src, size_t count)
 {
 	unsigned long pa = iopa((unsigned long) src);
@@ -373,8 +359,6 @@
 EXPORT_SYMBOL(insb);
 EXPORT_SYMBOL(insw);
 EXPORT_SYMBOL(insl);
-EXPORT_SYMBOL(insw_ns);
-EXPORT_SYMBOL(insl_ns);
 EXPORT_SYMBOL(memcpy_fromio);
 
 #endif	/* ifdef CONFIG_8260_PCI9 */
diff --git a/arch/ppc/xmon/start.c b/arch/ppc/xmon/start.c
index f7e9298..d74a883 100644
--- a/arch/ppc/xmon/start.c
+++ b/arch/ppc/xmon/start.c
@@ -15,6 +15,7 @@
 #include <asm/processor.h>
 #include <asm/delay.h>
 #include <asm/btext.h>
+#include <asm/ibm4xx.h>
 
 static volatile unsigned char *sccc, *sccd;
 unsigned int TXRDY, RXRDY, DLAB;
@@ -57,23 +58,30 @@
 void
 xmon_map_scc(void)
 {
-#ifdef CONFIG_PPC_PREP
-	volatile unsigned char *base;
-
-#elif defined(CONFIG_GEMINI)
+#if defined(CONFIG_GEMINI)
 	/* should already be mapped by the kernel boot */
-	sccc = (volatile unsigned char *) 0xffeffb0d;
 	sccd = (volatile unsigned char *) 0xffeffb08;
-	TXRDY = 0x20;
-	RXRDY = 1;
-	DLAB = 0x80;
 #elif defined(CONFIG_405GP)
-	sccc = (volatile unsigned char *)0xef600305;
 	sccd = (volatile unsigned char *)0xef600300;
+#elif defined(CONFIG_440EP)
+	sccd = (volatile unsigned char *) ioremap(PPC440EP_UART0_ADDR, 8);
+#elif defined(CONFIG_440SP)
+	sccd = (volatile unsigned char *) ioremap64(PPC440SP_UART0_ADDR, 8);
+#elif defined(CONFIG_440SPE)
+	sccd = (volatile unsigned char *) ioremap64(PPC440SPE_UART0_ADDR, 8);
+#elif defined(CONFIG_44x)
+	/* This is the default for 44x platforms.  Any boards that have a
+	   different UART address need to be put in cases before this or the
+	   port will be mapped incorrectly */
+	sccd = (volatile unsigned char *) ioremap64(PPC440GP_UART0_ADDR, 8);
+#endif /* platform */
+
+#ifndef CONFIG_PPC_PREP
+	sccc = sccd + 5;
 	TXRDY = 0x20;
 	RXRDY = 1;
 	DLAB = 0x80;
-#endif /* platform */
+#endif
 
 	register_sysrq_key('x', &sysrq_xmon_op);
 }
diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c
index 37d234f..b1a9174 100644
--- a/arch/ppc/xmon/xmon.c
+++ b/arch/ppc/xmon/xmon.c
@@ -153,6 +153,12 @@
 #define SSTEP	1		/* stepping because of 's' command */
 #define BRSTEP	2		/* stepping over breakpoint */
 
+#ifdef CONFIG_4xx
+#define MSR_SSTEP_ENABLE 0x200
+#else
+#define MSR_SSTEP_ENABLE 0x400
+#endif
+
 static struct pt_regs *xmon_regs[NR_CPUS];
 
 extern inline void sync(void)
@@ -211,6 +217,14 @@
 	p[1] = lo;
 }
 
+static inline void xmon_enable_sstep(struct pt_regs *regs)
+{
+	regs->msr |= MSR_SSTEP_ENABLE;
+#ifdef CONFIG_4xx
+	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#endif
+}
+
 int xmon(struct pt_regs *excp)
 {
 	struct pt_regs regs;
@@ -254,10 +268,10 @@
 	cmd = cmds(excp);
 	if (cmd == 's') {
 		xmon_trace[smp_processor_id()] = SSTEP;
-		excp->msr |= 0x400;
+		xmon_enable_sstep(excp);
 	} else if (at_breakpoint(excp->nip)) {
 		xmon_trace[smp_processor_id()] = BRSTEP;
-		excp->msr |= 0x400;
+		xmon_enable_sstep(excp);
 	} else {
 		xmon_trace[smp_processor_id()] = 0;
 		insert_bpts();
@@ -298,7 +312,7 @@
 		remove_bpts();
 		excprint(regs);
 		xmon_trace[smp_processor_id()] = BRSTEP;
-		regs->msr |= 0x400;
+		xmon_enable_sstep(regs);
 	} else {
 		xmon(regs);
 	}
@@ -385,7 +399,7 @@
 		}
 		store_inst((void *) bp->address);
 	}
-#if !defined(CONFIG_8xx)
+#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx))
 	if (dabr.enabled)
 		set_dabr(dabr.address);
 	if (iabr.enabled)
@@ -400,7 +414,7 @@
 	struct bpt *bp;
 	unsigned instr;
 
-#if !defined(CONFIG_8xx)
+#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx))
 	set_dabr(0);
 	set_iabr(0);
 #endif
@@ -677,7 +691,7 @@
 
 	cmd = inchar();
 	switch (cmd) {
-#if !defined(CONFIG_8xx)
+#if ! (defined(CONFIG_8xx) || defined(CONFIG_4xx))
 	case 'd':
 		mode = 7;
 		cmd = inchar();
@@ -792,7 +806,7 @@
 	for (; sp != 0; sp = stack[0]) {
 		if (mread(sp, stack, sizeof(stack)) != sizeof(stack))
 			break;
-		printf("[%.8lx] ", stack);
+		printf("[%.8lx] ", stack[0]);
 		xmon_print_symbol(stack[1], " ", "\n");
 		if (stack[1] == (unsigned) &ret_from_except
 		    || stack[1] == (unsigned) &ret_from_except_full
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2f4f70c..b216ca6 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -460,8 +460,7 @@
 	  information in an s390 hypervisor environment.
 
 config KEXEC
-	bool "kexec system call (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "kexec system call"
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
@@ -487,8 +486,22 @@
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+
 source "arch/s390/oprofile/Kconfig"
 
+config KPROBES
+	bool "Kprobes (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && MODULES
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.	register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+endmenu
+
 source "arch/s390/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
index 71d65eb..0429481 100644
--- a/arch/s390/appldata/appldata.h
+++ b/arch/s390/appldata/appldata.h
@@ -29,22 +29,6 @@
 #define CTL_APPLDATA_NET_SUM	2125
 #define CTL_APPLDATA_PROC	2126
 
-#ifndef CONFIG_64BIT
-
-#define APPLDATA_START_INTERVAL_REC 0x00	/* Function codes for */
-#define APPLDATA_STOP_REC	    0x01	/* DIAG 0xDC	  */
-#define APPLDATA_GEN_EVENT_RECORD   0x02
-#define APPLDATA_START_CONFIG_REC   0x03
-
-#else
-
-#define APPLDATA_START_INTERVAL_REC 0x80
-#define APPLDATA_STOP_REC	    0x81
-#define APPLDATA_GEN_EVENT_RECORD   0x82
-#define APPLDATA_START_CONFIG_REC   0x83
-
-#endif /* CONFIG_64BIT */
-
 #define P_INFO(x...)	printk(KERN_INFO MY_PRINT_NAME " info: " x)
 #define P_ERROR(x...)	printk(KERN_ERR MY_PRINT_NAME " error: " x)
 #define P_WARNING(x...)	printk(KERN_WARNING MY_PRINT_NAME " status: " x)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index a0a94e0..b69ed74 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -14,20 +14,20 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/smp.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/page-flags.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
-#include <asm/timer.h>
-//#include <linux/kernel_stat.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/workqueue.h>
+#include <asm/appldata.h>
+#include <asm/timer.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/smp.h>
 
 #include "appldata.h"
 
@@ -39,34 +39,6 @@
 
 #define TOD_MICRO	0x01000			/* nr. of TOD clock units
 						   for 1 microsecond */
-
-/*
- * Parameter list for DIAGNOSE X'DC'
- */
-#ifndef CONFIG_64BIT
-struct appldata_parameter_list {
-	u16 diag;		/* The DIAGNOSE code X'00DC'          */
-	u8  function;		/* The function code for the DIAGNOSE */
-	u8  parlist_length;	/* Length of the parameter list       */
-	u32 product_id_addr;	/* Address of the 16-byte product ID  */
-	u16 reserved;
-	u16 buffer_length;	/* Length of the application data buffer  */
-	u32 buffer_addr;	/* Address of the application data buffer */
-};
-#else
-struct appldata_parameter_list {
-	u16 diag;
-	u8  function;
-	u8  parlist_length;
-	u32 unused01;
-	u16 reserved;
-	u16 buffer_length;
-	u32 unused02;
-	u64 product_id_addr;
-	u64 buffer_addr;
-};
-#endif /* CONFIG_64BIT */
-
 /*
  * /proc entries (sysctl)
  */
@@ -181,46 +153,17 @@
 int appldata_diag(char record_nr, u16 function, unsigned long buffer,
 			u16 length, char *mod_lvl)
 {
-	unsigned long ry;
-	struct appldata_product_id {
-		char prod_nr[7];			/* product nr. */
-		char prod_fn[2];			/* product function */
-		char record_nr;				/* record nr. */
-		char version_nr[2];			/* version */
-		char release_nr[2];			/* release */
-		char mod_lvl[2];			/* modification lvl. */
-	} appldata_product_id = {
-	/* all strings are EBCDIC, record_nr is byte */
+	struct appldata_product_id id = {
 		.prod_nr    = {0xD3, 0xC9, 0xD5, 0xE4,
-				0xE7, 0xD2, 0xD9},	/* "LINUXKR" */
-		.prod_fn    = {0xD5, 0xD3},		/* "NL" */
+			       0xE7, 0xD2, 0xD9},	/* "LINUXKR" */
+		.prod_fn    = 0xD5D3,			/* "NL" */
 		.record_nr  = record_nr,
-		.version_nr = {0xF2, 0xF6},		/* "26" */
-		.release_nr = {0xF0, 0xF1},		/* "01" */
-		.mod_lvl    = {mod_lvl[0], mod_lvl[1]},
-	};
-	struct appldata_parameter_list appldata_parameter_list = {
-				.diag = 0xDC,
-				.function = function,
-				.parlist_length =
-					sizeof(appldata_parameter_list),
-				.buffer_length = length,
-				.product_id_addr =
-					(unsigned long) &appldata_product_id,
-				.buffer_addr = virt_to_phys((void *) buffer)
+		.version_nr = 0xF2F6,			/* "26" */
+		.release_nr = 0xF0F1,			/* "01" */
+		.mod_lvl    = (mod_lvl[0]) << 8 | mod_lvl[1],
 	};
 
-	if (!MACHINE_IS_VM)
-		return -ENOSYS;
-	ry = -1;
-	asm volatile(
-			"diag %1,%0,0xDC\n\t"
-			: "=d" (ry)
-			: "d" (&appldata_parameter_list),
-			  "m" (appldata_parameter_list),
-			  "m" (appldata_product_id)
-			: "cc");
-	return (int) ry;
+	return appldata_asm(&id, function, (void *) buffer, length);
 }
 /************************ timer, work, DIAG <END> ****************************/
 
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 161acc5..76a1552 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -16,6 +16,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/netdevice.h>
 #include <linux/sched.h>
+#include <asm/appldata.h>
 #include <asm/smp.h>
 
 #include "appldata.h"
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 5713c7e..15c9eec 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -16,9 +16,9 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/crypto.h>
 #include "crypt_s390.h"
 
 #define AES_MIN_KEY_SIZE	16
@@ -34,13 +34,16 @@
 struct s390_aes_ctx {
 	u8 iv[AES_BLOCK_SIZE];
 	u8 key[AES_MAX_KEY_SIZE];
+	long enc;
+	long dec;
 	int key_len;
 };
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 
 	switch (key_len) {
 	case 16:
@@ -110,117 +113,11 @@
 	}
 }
 
-static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	return nbytes;
-}
-
-static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	return nbytes;
-}
-
-static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE);
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_kmc(KMC_AES_128_ENCRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_kmc(KMC_AES_192_ENCRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_kmc(KMC_AES_256_ENCRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	memcpy(desc->info, &sctx->iv, AES_BLOCK_SIZE);
-
-	return nbytes;
-}
-
-static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(AES_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, AES_BLOCK_SIZE);
-	switch (sctx->key_len) {
-	case 16:
-		ret = crypt_s390_kmc(KMC_AES_128_DECRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 24:
-		ret = crypt_s390_kmc(KMC_AES_192_DECRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	case 32:
-		ret = crypt_s390_kmc(KMC_AES_256_DECRYPT, &sctx->iv, out, in, nbytes);
-		BUG_ON((ret < 0) || (ret != nbytes));
-		break;
-	}
-	return nbytes;
-}
-
 
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
+	.cra_driver_name	=	"aes-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
@@ -233,10 +130,189 @@
 			.cia_setkey		=	aes_set_key,
 			.cia_encrypt		=	aes_encrypt,
 			.cia_decrypt		=	aes_decrypt,
-			.cia_encrypt_ecb	=	aes_encrypt_ecb,
-			.cia_decrypt_ecb	=	aes_decrypt_ecb,
-			.cia_encrypt_cbc	=	aes_encrypt_cbc,
-			.cia_decrypt_cbc	=	aes_decrypt_cbc,
+		}
+	}
+};
+
+static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KM_AES_128_ENCRYPT;
+		sctx->dec = KM_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KM_AES_192_ENCRYPT;
+		sctx->dec = KM_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KM_AES_256_ENCRYPT;
+		sctx->dec = KM_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+			 struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes;
+
+	while ((nbytes = walk->nbytes)) {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return ret;
+}
+
+static int ecb_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
+}
+
+static int ecb_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_aes_alg = {
+	.cra_name		=	"ecb(aes)",
+	.cra_driver_name	=	"ecb-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.setkey			=	ecb_aes_set_key,
+			.encrypt		=	ecb_aes_encrypt,
+			.decrypt		=	ecb_aes_decrypt,
+		}
+	}
+};
+
+static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KMC_AES_128_ENCRYPT;
+		sctx->dec = KMC_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KMC_AES_192_ENCRYPT;
+		sctx->dec = KMC_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KMC_AES_256_ENCRYPT;
+		sctx->dec = KMC_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+			 struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+
+	if (!nbytes)
+		goto out;
+
+	memcpy(param, walk->iv, AES_BLOCK_SIZE);
+	do {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_kmc(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+	memcpy(walk->iv, param, AES_BLOCK_SIZE);
+
+out:
+	return ret;
+}
+
+static int cbc_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk);
+}
+
+static int cbc_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_aes_alg = {
+	.cra_name		=	"cbc(aes)",
+	.cra_driver_name	=	"cbc-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	cbc_aes_set_key,
+			.encrypt		=	cbc_aes_encrypt,
+			.decrypt		=	cbc_aes_decrypt,
 		}
 	}
 };
@@ -256,13 +332,40 @@
 		return -ENOSYS;
 
 	ret = crypto_register_alg(&aes_alg);
-	if (ret != 0)
-		printk(KERN_INFO "crypt_s390: aes_s390 couldn't be loaded.\n");
+	if (ret != 0) {
+		printk(KERN_INFO "crypt_s390: aes-s390 couldn't be loaded.\n");
+		goto aes_err;
+	}
+
+	ret = crypto_register_alg(&ecb_aes_alg);
+	if (ret != 0) {
+		printk(KERN_INFO
+		       "crypt_s390: ecb-aes-s390 couldn't be loaded.\n");
+		goto ecb_aes_err;
+	}
+
+	ret = crypto_register_alg(&cbc_aes_alg);
+	if (ret != 0) {
+		printk(KERN_INFO
+		       "crypt_s390: cbc-aes-s390 couldn't be loaded.\n");
+		goto cbc_aes_err;
+	}
+
+out:
 	return ret;
+
+cbc_aes_err:
+	crypto_unregister_alg(&ecb_aes_alg);
+ecb_aes_err:
+	crypto_unregister_alg(&aes_alg);
+aes_err:
+	goto out;
 }
 
 static void __exit aes_fini(void)
 {
+	crypto_unregister_alg(&cbc_aes_alg);
+	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index d1c259a..efd836c 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -20,6 +20,9 @@
 #define CRYPT_S390_OP_MASK 0xFF00
 #define CRYPT_S390_FUNC_MASK 0x00FF
 
+#define CRYPT_S390_PRIORITY 300
+#define CRYPT_S390_COMPOSITE_PRIORITY 400
+
 /* s930 cryptographic operations */
 enum crypt_s390_operations {
 	CRYPT_S390_KM   = 0x0100,
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index b3f7496..2aba048 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -13,9 +13,10 @@
  * (at your option) any later version.
  *
  */
+
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/crypto.h>
 
 #include "crypt_s390.h"
 #include "crypto_des.h"
@@ -45,9 +46,10 @@
 };
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
-		      unsigned int keylen, u32 *flags)
+		      unsigned int keylen)
 {
 	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 	int ret;
 
 	/* test if key is valid (not a weak key) */
@@ -71,69 +73,10 @@
 	crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
 }
 
-static unsigned int des_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_DEA_ENCRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_DEA_DECRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-
-	memcpy(sctx->iv, desc->info, DES_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_DEA_ENCRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	memcpy(desc->info, sctx->iv, DES_BLOCK_SIZE);
-	return nbytes;
-}
-
-static unsigned int des_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct crypt_s390_des_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, DES_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_DEA_DECRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
 static struct crypto_alg des_alg = {
 	.cra_name		=	"des",
+	.cra_driver_name	=	"des-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
@@ -146,10 +89,143 @@
 			.cia_setkey		=	des_setkey,
 			.cia_encrypt		=	des_encrypt,
 			.cia_decrypt		=	des_decrypt,
-			.cia_encrypt_ecb	=	des_encrypt_ecb,
-			.cia_decrypt_ecb	=	des_decrypt_ecb,
-			.cia_encrypt_cbc	=	des_encrypt_cbc,
-			.cia_decrypt_cbc	=	des_decrypt_cbc,
+		}
+	}
+};
+
+static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
+			    void *param, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes;
+
+	while ((nbytes = walk->nbytes)) {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= DES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return ret;
+}
+
+static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
+			    void *param, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+
+	if (!nbytes)
+		goto out;
+
+	memcpy(param, walk->iv, DES_BLOCK_SIZE);
+	do {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_kmc(func, param, out, in, n);
+		BUG_ON((ret < 0) || (ret != n));
+
+		nbytes &= DES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+	memcpy(walk->iv, param, DES_BLOCK_SIZE);
+
+out:
+	return ret;
+}
+
+static int ecb_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk);
+}
+
+static int ecb_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des_alg = {
+	.cra_name		=	"ecb(des)",
+	.cra_driver_name	=	"ecb-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ecb_des_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	ecb_des_encrypt,
+			.decrypt		=	ecb_des_decrypt,
+		}
+	}
+};
+
+static int cbc_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk);
+}
+
+static int cbc_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_des_alg = {
+	.cra_name		=	"cbc(des)",
+	.cra_driver_name	=	"cbc-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(cbc_des_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	cbc_des_encrypt,
+			.decrypt		=	cbc_des_decrypt,
 		}
 	}
 };
@@ -167,11 +243,12 @@
  *
  */
 static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen, u32 *flags)
+			   unsigned int keylen)
 {
 	int i, ret;
 	struct crypt_s390_des3_128_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8* temp_key = key;
+	const u8 *temp_key = key;
+	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED;
@@ -202,73 +279,10 @@
 		      DES3_128_BLOCK_SIZE);
 }
 
-static unsigned int des3_128_encrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_128_ENCRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_128_decrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_128_DECRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_128_encrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-
-	memcpy(sctx->iv, desc->info, DES3_128_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_128_ENCRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	memcpy(desc->info, sctx->iv, DES3_128_BLOCK_SIZE);
-	return nbytes;
-}
-
-static unsigned int des3_128_decrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_128_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_128_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, DES3_128_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_128_DECRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
 static struct crypto_alg des3_128_alg = {
 	.cra_name		=	"des3_ede128",
+	.cra_driver_name	=	"des3_ede128-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
@@ -281,10 +295,95 @@
 			.cia_setkey		=	des3_128_setkey,
 			.cia_encrypt		=	des3_128_encrypt,
 			.cia_decrypt		=	des3_128_decrypt,
-			.cia_encrypt_ecb	=	des3_128_encrypt_ecb,
-			.cia_decrypt_ecb	=	des3_128_decrypt_ecb,
-			.cia_encrypt_cbc	=	des3_128_encrypt_cbc,
-			.cia_decrypt_cbc	=	des3_128_decrypt_cbc,
+		}
+	}
+};
+
+static int ecb_des3_128_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_128_ENCRYPT, sctx->key, &walk);
+}
+
+static int ecb_des3_128_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_128_DECRYPT, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des3_128_alg = {
+	.cra_name		=	"ecb(des3_ede128)",
+	.cra_driver_name	=	"ecb-des3_ede128-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						ecb_des3_128_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_128_KEY_SIZE,
+			.max_keysize		=	DES3_128_KEY_SIZE,
+			.setkey			=	des3_128_setkey,
+			.encrypt		=	ecb_des3_128_encrypt,
+			.decrypt		=	ecb_des3_128_decrypt,
+		}
+	}
+};
+
+static int cbc_des3_128_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_128_ENCRYPT, sctx->iv, &walk);
+}
+
+static int cbc_des3_128_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_128_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_128_DECRYPT, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_des3_128_alg = {
+	.cra_name		=	"cbc(des3_ede128)",
+	.cra_driver_name	=	"cbc-des3_ede128-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_128_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_128_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						cbc_des3_128_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_128_KEY_SIZE,
+			.max_keysize		=	DES3_128_KEY_SIZE,
+			.ivsize			=	DES3_128_BLOCK_SIZE,
+			.setkey			=	des3_128_setkey,
+			.encrypt		=	cbc_des3_128_encrypt,
+			.decrypt		=	cbc_des3_128_decrypt,
 		}
 	}
 };
@@ -303,11 +402,12 @@
  *
  */
 static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen, u32 *flags)
+			   unsigned int keylen)
 {
 	int i, ret;
 	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u8* temp_key = key;
+	const u8 *temp_key = key;
+	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
@@ -341,73 +441,10 @@
 		      DES3_192_BLOCK_SIZE);
 }
 
-static unsigned int des3_192_encrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_192_ENCRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_192_decrypt_ecb(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-	ret = crypt_s390_km(KM_TDEA_192_DECRYPT, sctx->key, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
-static unsigned int des3_192_encrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-
-	memcpy(sctx->iv, desc->info, DES3_192_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_192_ENCRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	memcpy(desc->info, sctx->iv, DES3_192_BLOCK_SIZE);
-	return nbytes;
-}
-
-static unsigned int des3_192_decrypt_cbc(const struct cipher_desc *desc,
-					 u8 *out, const u8 *in,
-					 unsigned int nbytes)
-{
-	struct crypt_s390_des3_192_ctx *sctx = crypto_tfm_ctx(desc->tfm);
-	int ret;
-
-	/* only use complete blocks */
-	nbytes &= ~(DES3_192_BLOCK_SIZE - 1);
-
-	memcpy(&sctx->iv, desc->info, DES3_192_BLOCK_SIZE);
-	ret = crypt_s390_kmc(KMC_TDEA_192_DECRYPT, &sctx->iv, out, in, nbytes);
-	BUG_ON((ret < 0) || (ret != nbytes));
-
-	return nbytes;
-}
-
 static struct crypto_alg des3_192_alg = {
 	.cra_name		=	"des3_ede",
+	.cra_driver_name	=	"des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
@@ -420,10 +457,95 @@
 			.cia_setkey		=	des3_192_setkey,
 			.cia_encrypt		=	des3_192_encrypt,
 			.cia_decrypt		=	des3_192_decrypt,
-			.cia_encrypt_ecb	=	des3_192_encrypt_ecb,
-			.cia_decrypt_ecb	=	des3_192_decrypt_ecb,
-			.cia_encrypt_cbc	=	des3_192_encrypt_cbc,
-			.cia_decrypt_cbc	=	des3_192_decrypt_cbc,
+		}
+	}
+};
+
+static int ecb_des3_192_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk);
+}
+
+static int ecb_des3_192_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des3_192_alg = {
+	.cra_name		=	"ecb(des3_ede)",
+	.cra_driver_name	=	"ecb-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						ecb_des3_192_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_192_KEY_SIZE,
+			.max_keysize		=	DES3_192_KEY_SIZE,
+			.setkey			=	des3_192_setkey,
+			.encrypt		=	ecb_des3_192_encrypt,
+			.decrypt		=	ecb_des3_192_decrypt,
+		}
+	}
+};
+
+static int cbc_des3_192_encrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk);
+}
+
+static int cbc_des3_192_decrypt(struct blkcipher_desc *desc,
+				struct scatterlist *dst,
+				struct scatterlist *src, unsigned int nbytes)
+{
+	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk);
+}
+
+static struct crypto_alg cbc_des3_192_alg = {
+	.cra_name		=	"cbc(des3_ede)",
+	.cra_driver_name	=	"cbc-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES3_192_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(
+						cbc_des3_192_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_192_KEY_SIZE,
+			.max_keysize		=	DES3_192_KEY_SIZE,
+			.ivsize			=	DES3_192_BLOCK_SIZE,
+			.setkey			=	des3_192_setkey,
+			.encrypt		=	cbc_des3_192_encrypt,
+			.decrypt		=	cbc_des3_192_decrypt,
 		}
 	}
 };
@@ -437,22 +559,69 @@
 	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
 		return -ENOSYS;
 
-	ret |= (crypto_register_alg(&des_alg) == 0) ? 0:1;
-	ret |= (crypto_register_alg(&des3_128_alg) == 0) ? 0:2;
-	ret |= (crypto_register_alg(&des3_192_alg) == 0) ? 0:4;
-	if (ret) {
-		crypto_unregister_alg(&des3_192_alg);
-		crypto_unregister_alg(&des3_128_alg);
-		crypto_unregister_alg(&des_alg);
-		return -EEXIST;
-	}
-	return 0;
+	ret = crypto_register_alg(&des_alg);
+	if (ret)
+		goto des_err;
+	ret = crypto_register_alg(&ecb_des_alg);
+	if (ret)
+		goto ecb_des_err;
+	ret = crypto_register_alg(&cbc_des_alg);
+	if (ret)
+		goto cbc_des_err;
+
+	ret = crypto_register_alg(&des3_128_alg);
+	if (ret)
+		goto des3_128_err;
+	ret = crypto_register_alg(&ecb_des3_128_alg);
+	if (ret)
+		goto ecb_des3_128_err;
+	ret = crypto_register_alg(&cbc_des3_128_alg);
+	if (ret)
+		goto cbc_des3_128_err;
+
+	ret = crypto_register_alg(&des3_192_alg);
+	if (ret)
+		goto des3_192_err;
+	ret = crypto_register_alg(&ecb_des3_192_alg);
+	if (ret)
+		goto ecb_des3_192_err;
+	ret = crypto_register_alg(&cbc_des3_192_alg);
+	if (ret)
+		goto cbc_des3_192_err;
+
+out:
+	return ret;
+
+cbc_des3_192_err:
+	crypto_unregister_alg(&ecb_des3_192_alg);
+ecb_des3_192_err:
+	crypto_unregister_alg(&des3_192_alg);
+des3_192_err:
+	crypto_unregister_alg(&cbc_des3_128_alg);
+cbc_des3_128_err:
+	crypto_unregister_alg(&ecb_des3_128_alg);
+ecb_des3_128_err:
+	crypto_unregister_alg(&des3_128_alg);
+des3_128_err:
+	crypto_unregister_alg(&cbc_des_alg);
+cbc_des_err:
+	crypto_unregister_alg(&ecb_des_alg);
+ecb_des_err:
+	crypto_unregister_alg(&des_alg);
+des_err:
+	goto out;
 }
 
 static void __exit fini(void)
 {
+	crypto_unregister_alg(&cbc_des3_192_alg);
+	crypto_unregister_alg(&ecb_des3_192_alg);
 	crypto_unregister_alg(&des3_192_alg);
+	crypto_unregister_alg(&cbc_des3_128_alg);
+	crypto_unregister_alg(&ecb_des3_128_alg);
 	crypto_unregister_alg(&des3_128_alg);
+	crypto_unregister_alg(&cbc_des_alg);
+	crypto_unregister_alg(&ecb_des_alg);
 	crypto_unregister_alg(&des_alg);
 }
 
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 9d34a35..49ca869 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -126,6 +126,8 @@
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha1",
+	.cra_driver_name =	"sha1-s390",
+	.cra_priority	=	CRYPT_S390_PRIORITY,
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA1_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct crypt_s390_sha1_ctx),
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index f573df3..8e4e675 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -127,6 +127,8 @@
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha256",
+	.cra_driver_name =	"sha256-s390",
+	.cra_priority	=	CRYPT_S390_PRIORITY,
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA256_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct s390_sha256_ctx),
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index f1d4591..35da539 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -428,6 +428,7 @@
 # CONFIG_VMLOGRDR is not set
 # CONFIG_VMCP is not set
 # CONFIG_MONREADER is not set
+CONFIG_MONWRITER=m
 
 #
 # Cryptographic devices
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index ea5567b..f3dbd91 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/hypfs.h
+ *  arch/s390/hypfs/hypfs.h
  *    Hypervisor filesystem for Linux on s390.
  *
  *    Copyright (C) IBM Corp. 2006
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 1785bce..75144ef 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/hypfs_diag.c
+ *  arch/s390/hypfs/hypfs_diag.c
  *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
  *    implementation.
  *
@@ -432,12 +432,14 @@
 
 	buf = diag204_get_buffer(INFO_EXT, &pages);
 	if (!IS_ERR(buf)) {
-		if (diag204(SUBC_STIB7 | INFO_EXT, pages, buf) >= 0) {
+		if (diag204((unsigned long)SUBC_STIB7 |
+			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = SUBC_STIB7;
 			diag204_info_type = INFO_EXT;
 			goto out;
 		}
-		if (diag204(SUBC_STIB6 | INFO_EXT, pages, buf) >= 0) {
+		if (diag204((unsigned long)SUBC_STIB6 |
+			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = SUBC_STIB7;
 			diag204_info_type = INFO_EXT;
 			goto out;
@@ -452,7 +454,8 @@
 		rc = PTR_ERR(buf);
 		goto fail_alloc;
 	}
-	if (diag204(SUBC_STIB4 | INFO_SIMPLE, pages, buf) >= 0) {
+	if (diag204((unsigned long)SUBC_STIB4 |
+		    (unsigned long)INFO_SIMPLE, pages, buf) >= 0) {
 		diag204_store_sc = SUBC_STIB4;
 		diag204_info_type = INFO_SIMPLE;
 		goto out;
@@ -476,7 +479,8 @@
 	buf = diag204_get_buffer(diag204_info_type, &pages);
 	if (IS_ERR(buf))
 		goto out;
-	if (diag204(diag204_store_sc | diag204_info_type, pages, buf) < 0)
+	if (diag204((unsigned long)diag204_store_sc |
+		    (unsigned long)diag204_info_type, pages, buf) < 0)
 		return ERR_PTR(-ENOSYS);
 out:
 	return buf;
@@ -531,7 +535,7 @@
 	return rc;
 }
 
-__exit void hypfs_diag_exit(void)
+void hypfs_diag_exit(void)
 {
 	diag224_delete_name_table();
 	diag204_free_buffer();
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
index 793dea6..256b384 100644
--- a/arch/s390/hypfs/hypfs_diag.h
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/hypfs_diag.h
+ *  arch/s390/hypfs_diag.h
  *    Hypervisor filesystem for Linux on s390.
  *
  *    Copyright (C) IBM Corp. 2006
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 18c0919..bdade5f 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -1,5 +1,5 @@
 /*
- *  fs/hypfs/inode.c
+ *  arch/s390/hypfs/inode.c
  *    Hypervisor filesystem for Linux on s390.
  *
  *    Copyright (C) IBM Corp. 2006
@@ -312,10 +312,12 @@
 {
 	struct hypfs_sb_info *sb_info = sb->s_fs_info;
 
-	hypfs_delete_tree(sb->s_root);
-	hypfs_remove(sb_info->update_file);
-	kfree(sb->s_fs_info);
-	sb->s_fs_info = NULL;
+	if (sb->s_root) {
+		hypfs_delete_tree(sb->s_root);
+		hypfs_remove(sb_info->update_file);
+		kfree(sb->s_fs_info);
+		sb->s_fs_info = NULL;
+	}
 	kill_litter_super(sb);
 }
 
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9a33ed6..aa97897 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -6,7 +6,7 @@
 
 obj-y	:=  bitmap.o traps.o time.o process.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-            semaphore.o s390_ext.o debug.o profile.o irq.o reipl_diag.o
+	    semaphore.o s390_ext.o debug.o profile.o irq.o ipl.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -24,6 +24,7 @@
 
 obj-$(CONFIG_VIRT_TIMER)	+= vtime.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5b5799a..0c712b7 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -505,6 +505,8 @@
 	mvc	__THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
 	oi	__TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
+	bz	BASED(kernel_per)
 	l	%r3,__LC_PGM_ILC	 # load program interruption code
 	la	%r8,0x7f
 	nr	%r8,%r3                  # clear per-event-bit and ilc
@@ -536,6 +538,16 @@
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	b	BASED(sysc_do_svc)
 
+#
+# per was called from kernel, must be kprobes
+#
+kernel_per:
+	mvi	SP_TRAP+1(%r15),0x28	# set trap indication to pgm check
+	la	%r2,SP_PTREGS(%r15)	# address of register-save area
+	l	%r1,BASED(.Lhandle_per)	# load adr. of per handler
+	la	%r14,BASED(sysc_leave)	# load adr. of system return
+	br	%r1			# branch to do_single_step
+
 /*
  * IO interrupt handler routine
  */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 56f5f61..29bbfba 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -518,6 +518,8 @@
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	lg	%r1,__TI_task(%r9)
+	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
+	jz	kernel_per
 	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
@@ -553,6 +555,16 @@
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	j	sysc_do_svc
 
+#
+# per was called from kernel, must be kprobes
+#
+kernel_per:
+	lhi	%r0,__LC_PGM_OLD_PSW
+	sth	%r0,SP_TRAP(%r15)	# set trap indication to pgm check
+	la	%r2,SP_PTREGS(%r15)	# address of register-save area
+	larl	%r14,sysc_leave		# load adr. of system ret, no work
+	jg	do_single_step		# branch to do_single_step
+
 /*
  * IO interrupt handler routine
  */
@@ -815,7 +827,7 @@
  */
 stack_overflow:
 	lg	%r15,__LC_PANIC_STACK	# change to panic stack
-	aghi	%r1,-SP_SIZE
+	aghi	%r15,-SP_SIZE
 	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
 	stmg	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
 	la	%r1,__LC_SAVE_AREA
@@ -823,7 +835,7 @@
 	je	0f
 	chi	%r12,__LC_PGM_OLD_PSW
 	je	0f
-	la	%r1,__LC_SAVE_AREA+16
+	la	%r1,__LC_SAVE_AREA+32
 0:	mvc	SP_R12(32,%r15),0(%r1)  # move %r12-%r15 to stack
         xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
         la      %r2,SP_PTREGS(%r15)	# load pt_regs
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index adad886..0f1db26 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -272,7 +272,7 @@
 # load parameter file from ipl device
 #
 .Lagain1:
- 	l     %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # ramdisk loc. is temp
+	l     %r2,.Linitrd		       # ramdisk loc. is temp
         bas   %r14,.Lloader                    # load parameter file
         ltr   %r2,%r2                          # got anything ?
         bz    .Lnopf
@@ -280,7 +280,7 @@
 	bnh   .Lnotrunc
 	la    %r2,895
 .Lnotrunc:
-	l     %r4,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+	l     %r4,.Linitrd
 	clc   0(3,%r4),.L_hdr		       # if it is HDRx
 	bz    .Lagain1			       # skip dataset header
 	clc   0(3,%r4),.L_eof		       # if it is EOFx
@@ -323,14 +323,15 @@
 # load ramdisk from ipl device
 #	
 .Lagain2:
- 	l     %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # addr of ramdisk
+	l     %r2,.Linitrd		       # addr of ramdisk
+	st    %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
         bas   %r14,.Lloader                    # load ramdisk
  	st    %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of ramdisk
         ltr   %r2,%r2
         bnz   .Lrdcont
         st    %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
 .Lrdcont:
-	l     %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+	l     %r2,.Linitrd
 
 	clc   0(3,%r2),.L_hdr		       # skip HDRx and EOFx 
 	bz    .Lagain2
@@ -379,6 +380,7 @@
         l     %r1,.Lstartup
         br    %r1
 
+.Linitrd:.long _end + 0x400000		       # default address of initrd
 .Lparm:	.long  PARMAREA
 .Lstartup: .long startup
 .Lcvtab:.long  _ebcasc                         # ebcdic to ascii table
@@ -479,65 +481,6 @@
 	.byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 
 	.byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
 
-.macro GET_IPL_DEVICE
-.Lget_ipl_device:
-	l     %r1,0xb8			# get sid
-	sll   %r1,15			# test if subchannel is enabled
-	srl   %r1,31
-	ltr   %r1,%r1
-	bz    2f-.LPG1(%r13)		# subchannel disabled
-	l     %r1,0xb8
-	la    %r5,.Lipl_schib-.LPG1(%r13)
-	stsch 0(%r5)		        # get schib of subchannel
-	bnz   2f-.LPG1(%r13)		# schib not available
-	tm    5(%r5),0x01		# devno valid?
-	bno   2f-.LPG1(%r13)
-	la    %r6,ipl_parameter_flags-.LPG1(%r13)
-	oi    3(%r6),0x01		# set flag
-	la    %r2,ipl_devno-.LPG1(%r13)
-	mvc   0(2,%r2),6(%r5)		# store devno
-	tm    4(%r5),0x80		# qdio capable device?
-	bno   2f-.LPG1(%r13)
-	oi    3(%r6),0x02		# set flag
-
-	# copy ipl parameters
-
-	lhi   %r0,4096
-	l     %r2,20(%r0)		# get address of parameter list
-	lhi   %r3,IPL_PARMBLOCK_ORIGIN
-	st    %r3,20(%r0)
-	lhi   %r4,1
-	cr    %r2,%r3			# start parameters < destination ?
-	jl    0f
-	lhi   %r1,1			# copy direction is upwards
-	j     1f
-0:	lhi   %r1,-1			# copy direction is downwards
-	ar    %r2,%r0
-	ar    %r3,%r0
-	ar    %r2,%r1
-	ar    %r3,%r1
-1:	mvc   0(1,%r3),0(%r2)		# finally copy ipl parameters
-	ar    %r3,%r1
-	ar    %r2,%r1
-	sr    %r0,%r4
-	jne   1b
-	b     2f-.LPG1(%r13)
-
-	.align 4
-.Lipl_schib:
-	.rept 13
-	.long 0
-	.endr
-
-	.globl ipl_parameter_flags
-ipl_parameter_flags:
-	.long 0
-	.globl ipl_devno
-ipl_devno:
-	.word 0
-2:
-.endm
-
 #ifdef CONFIG_64BIT
 #include "head64.S"
 #else
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index a4dc61f..1fa9fa1c 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -26,8 +26,8 @@
 #
 	.org	PARMAREA
 	.long	0,0			# IPL_DEVICE
-	.long	0,RAMDISK_ORIGIN	# INITRD_START
-	.long	0,RAMDISK_SIZE		# INITRD_SIZE
+	.long	0,0			# INITRD_START
+	.long	0,0			# INITRD_SIZE
 
 	.org	COMMAND_LINE
 	.byte	"root=/dev/ram0 ro"
@@ -37,12 +37,23 @@
 
 startup_continue:
 	basr	%r13,0			# get base
-.LPG1:	GET_IPL_DEVICE
+.LPG1:	mvi	__LC_AR_MODE_ID,0	# set ESA flag (mode 0)
 	lctl	%c0,%c15,.Lctl-.LPG1(%r13) # load control registers
 	l	%r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
 					# move IPL device to lowcore
 	mvc	__LC_IPLDEV(4),IPL_DEVICE-PARMAREA(%r12)
+#
+# Setup stack
+#
+	l	%r15,.Linittu-.LPG1(%r13)
+	mvc	__LC_CURRENT(4),__TI_task(%r15)
+	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
+	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack
+	ahi	%r15,-96
+	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
 
+	l	%r14,.Lipl_save_parameters-.LPG1(%r13)
+	basr	%r14,%r14
 #
 # clear bss memory
 #
@@ -114,6 +125,10 @@
 	b	.Lfchunk-.LPG1(%r13)
 
 	.align 4
+.Lipl_save_parameters:
+	.long	ipl_save_parameters
+.Linittu:
+	.long	init_thread_union
 .Lpmask:
 	.byte	0
 .align 8
@@ -273,7 +288,23 @@
 .Lbss_end:  .long _end
 .Lparmaddr: .long PARMAREA
 .Lsccbaddr: .long .Lsccb
+
+	.globl ipl_schib
+ipl_schib:
+	.rept 13
+	.long 0
+	.endr
+
+	.globl ipl_flags
+ipl_flags:
+	.long 0
+	.globl ipl_devno
+ipl_devno:
+	.word 0
+
 	.org	0x12000
+.globl s390_readinfo_sccb
+s390_readinfo_sccb:
 .Lsccb:
 	.hword	0x1000			# length, one page
 	.byte	0x00,0x00,0x00
@@ -302,16 +333,6 @@
 	.globl	_stext
 _stext:	basr	%r13,0			# get base
 .LPG3:
-#
-# Setup stack
-#
-	l	%r15,.Linittu-.LPG3(%r13)
-	mvc	__LC_CURRENT(4),__TI_task(%r15)
-	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
-	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack
-	ahi	%r15,-96
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
-
 # check control registers
 	stctl	%c0,%c15,0(%r15)
 	oi	2(%r15),0x40		# enable sigp emergency signal
@@ -330,6 +351,5 @@
 #
 	.align	8
 .Ldw:	.long	0x000a0000,0x00000000
-.Linittu:.long	init_thread_union
 .Lstart:.long	start_kernel
 .Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 9d80c5b..a8bdd96 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,8 +26,8 @@
 #
 	.org   PARMAREA
 	.quad  0			# IPL_DEVICE
-	.quad  RAMDISK_ORIGIN		# INITRD_START
-	.quad  RAMDISK_SIZE		# INITRD_SIZE
+	.quad  0			# INITRD_START
+	.quad  0			# INITRD_SIZE
 
 	.org   COMMAND_LINE
 	.byte  "root=/dev/ram0 ro"
@@ -39,8 +39,8 @@
 	basr  %r13,0			 # get base
 .LPG1:  sll   %r13,1                     # remove high order bit
         srl   %r13,1
-	GET_IPL_DEVICE
         lhi   %r1,1                      # mode 1 = esame
+	mvi   __LC_AR_MODE_ID,1		 # set esame flag
         slr   %r0,%r0                    # set cpuid to zero
         sigp  %r1,%r0,0x12               # switch to esame mode
 	sam64				 # switch to 64 bit mode
@@ -48,7 +48,18 @@
 	lg    %r12,.Lparmaddr-.LPG1(%r13)# pointer to parameter area
 					 # move IPL device to lowcore
         mvc   __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
+#
+# Setup stack
+#
+	larl  %r15,init_thread_union
+	lg    %r14,__TI_task(%r15)	# cache current in lowcore
+	stg   %r14,__LC_CURRENT
+	aghi  %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
+	stg   %r15,__LC_KERNEL_STACK	# set end of kernel stack
+	aghi  %r15,-160
+	xc    __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
 
+	brasl %r14,ipl_save_parameters
 #
 # clear bss memory
 #
@@ -239,6 +250,19 @@
 	oi	7(%r12),0x80		# set IDTE flag
 0:
 
+#
+# find out if we have the MVCOS instruction
+#
+	la	%r1,0f-.LPG1(%r13)	# set program check address
+	stg	%r1,__LC_PGM_NEW_PSW+8
+	.short	0xc800			# mvcos 0(%r0),0(%r0),%r0
+	.short	0x0000
+	.short	0x0000
+0:	tm	0x8f,0x13		# special-operation exception?
+	bno	1f-.LPG1(%r13)		# if yes, MVCOS is present
+	oi	6(%r12),2		# set MVCOS flag
+1:
+
         lpswe .Lentry-.LPG1(13)         # jump to _stext in primary-space,
                                         # virtual and never return ...
         .align 16
@@ -268,7 +292,22 @@
 .Lparmaddr:
 	.quad	PARMAREA
 
+	.globl ipl_schib
+ipl_schib:
+	.rept 13
+	.long 0
+	.endr
+
+	.globl ipl_flags
+ipl_flags:
+	.long 0
+	.globl ipl_devno
+ipl_devno:
+	.word 0
+
 	.org	0x12000
+.globl s390_readinfo_sccb
+s390_readinfo_sccb:
 .Lsccb:
 	.hword 0x1000			# length, one page
 	.byte 0x00,0x00,0x00
@@ -297,24 +336,12 @@
         .globl _stext
 _stext:	basr  %r13,0                    # get base
 .LPG3:
-#
-# Setup stack
-#
-	larl  %r15,init_thread_union
-	lg    %r14,__TI_task(%r15)      # cache current in lowcore
-	stg   %r14,__LC_CURRENT
-        aghi  %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
-        stg   %r15,__LC_KERNEL_STACK    # set end of kernel stack
-        aghi  %r15,-160
-        xc    __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain
-
 # check control registers
         stctg  %c0,%c15,0(%r15)
 	oi     6(%r15),0x40             # enable sigp emergency signal
 	oi     4(%r15),0x10             # switch on low address proctection
         lctlg  %c0,%c15,0(%r15)
 
-#
         lam    0,15,.Laregs-.LPG3(%r13) # load access regs needed by uaccess
         brasl  %r14,start_kernel        # go to C code
 #
@@ -322,7 +349,7 @@
 #
         basr  %r13,0
 	lpswe .Ldw-.(%r13)           # load disabled wait psw
-#
+
             .align 8
 .Ldw:       .quad  0x0002000180000000,0x0000000000000000
 .Laregs:    .long  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 0000000..6555cc4
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,942 @@
+/*
+ *  arch/s390/kernel/ipl.c
+ *    ipl/reipl/dump support for Linux on s390.
+ *
+ *    Copyright (C) IBM Corp. 2005,2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>
+ *		 Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/cio.h>
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+enum ipl_type {
+	IPL_TYPE_NONE	 = 1,
+	IPL_TYPE_UNKNOWN = 2,
+	IPL_TYPE_CCW	 = 4,
+	IPL_TYPE_FCP	 = 8,
+};
+
+#define IPL_NONE_STR	 "none"
+#define IPL_UNKNOWN_STR  "unknown"
+#define IPL_CCW_STR	 "ccw"
+#define IPL_FCP_STR	 "fcp"
+
+static char *ipl_type_str(enum ipl_type type)
+{
+	switch (type) {
+	case IPL_TYPE_NONE:
+		return IPL_NONE_STR;
+	case IPL_TYPE_CCW:
+		return IPL_CCW_STR;
+	case IPL_TYPE_FCP:
+		return IPL_FCP_STR;
+	case IPL_TYPE_UNKNOWN:
+	default:
+		return IPL_UNKNOWN_STR;
+	}
+}
+
+enum ipl_method {
+	IPL_METHOD_NONE,
+	IPL_METHOD_CCW_CIO,
+	IPL_METHOD_CCW_DIAG,
+	IPL_METHOD_CCW_VM,
+	IPL_METHOD_FCP_RO_DIAG,
+	IPL_METHOD_FCP_RW_DIAG,
+	IPL_METHOD_FCP_RO_VM,
+};
+
+enum shutdown_action {
+	SHUTDOWN_REIPL,
+	SHUTDOWN_DUMP,
+	SHUTDOWN_STOP,
+};
+
+#define SHUTDOWN_REIPL_STR "reipl"
+#define SHUTDOWN_DUMP_STR  "dump"
+#define SHUTDOWN_STOP_STR  "stop"
+
+static char *shutdown_action_str(enum shutdown_action action)
+{
+	switch (action) {
+	case SHUTDOWN_REIPL:
+		return SHUTDOWN_REIPL_STR;
+	case SHUTDOWN_DUMP:
+		return SHUTDOWN_DUMP_STR;
+	case SHUTDOWN_STOP:
+		return SHUTDOWN_STOP_STR;
+	default:
+		BUG();
+	}
+}
+
+enum diag308_subcode  {
+	DIAG308_IPL   = 3,
+	DIAG308_DUMP  = 4,
+	DIAG308_SET   = 5,
+	DIAG308_STORE = 6,
+};
+
+enum diag308_ipl_type {
+	DIAG308_IPL_TYPE_FCP = 0,
+	DIAG308_IPL_TYPE_CCW = 2,
+};
+
+enum diag308_opt {
+	DIAG308_IPL_OPT_IPL  = 0x10,
+	DIAG308_IPL_OPT_DUMP = 0x20,
+};
+
+enum diag308_rc {
+	DIAG308_RC_OK = 1,
+};
+
+static int diag308_set_works = 0;
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static enum ipl_method reipl_method = IPL_METHOD_NONE;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_ccw;
+
+static int dump_capabilities = IPL_TYPE_NONE;
+static enum ipl_type dump_type = IPL_TYPE_NONE;
+static enum ipl_method dump_method = IPL_METHOD_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_ccw;
+
+static enum shutdown_action on_panic_action = SHUTDOWN_STOP;
+
+static int diag308(unsigned long subcode, void *addr)
+{
+	register unsigned long _addr asm("0") = (unsigned long)addr;
+	register unsigned long _rc asm("1") = 0;
+
+	asm volatile (
+		"   diag %0,%2,0x308\n"
+		"0: \n"
+		".section __ex_table,\"a\"\n"
+#ifdef CONFIG_64BIT
+		"   .align 8\n"
+		"   .quad 0b, 0b\n"
+#else
+		"   .align 4\n"
+		"   .long 0b, 0b\n"
+#endif
+		".previous\n"
+		: "+d" (_addr), "+d" (_rc)
+		: "d" (subcode) : "cc", "memory" );
+
+	return _rc;
+}
+
+/* SYSFS */
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys,	\
+		char *page)						\
+{									\
+	return sprintf(page, _format, _value);				\
+}									\
+static struct subsys_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL);
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
+static ssize_t sys_##_prefix##_##_name##_show(struct subsystem *subsys,	\
+		char *page)						\
+{									\
+	return sprintf(page, _fmt_out,					\
+			(unsigned long long) _value);			\
+}									\
+static ssize_t sys_##_prefix##_##_name##_store(struct subsystem *subsys,\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long value;					\
+	if (sscanf(buf, _fmt_in, &value) != 1)				\
+		return -EINVAL;						\
+	_value = value;							\
+	return len;							\
+}									\
+static struct subsys_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store);
+
+static void make_attrs_ro(struct attribute **attrs)
+{
+	while (*attrs) {
+		(*attrs)->mode = S_IRUGO;
+		attrs++;
+	}
+}
+
+/*
+ * ipl section
+ */
+
+static enum ipl_type ipl_get_type(void)
+{
+	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+	if (!(ipl_flags & IPL_DEVNO_VALID))
+		return IPL_TYPE_UNKNOWN;
+	if (!(ipl_flags & IPL_PARMBLOCK_VALID))
+		return IPL_TYPE_CCW;
+	if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
+		return IPL_TYPE_UNKNOWN;
+	if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
+		return IPL_TYPE_UNKNOWN;
+	return IPL_TYPE_FCP;
+}
+
+static ssize_t ipl_type_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(ipl_get_type()));
+}
+
+static struct subsys_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+static ssize_t sys_ipl_device_show(struct subsystem *subsys, char *page)
+{
+	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+	switch (ipl_get_type()) {
+	case IPL_TYPE_CCW:
+		return sprintf(page, "0.0.%04x\n", ipl_devno);
+	case IPL_TYPE_FCP:
+		return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+	default:
+		return 0;
+	}
+}
+
+static struct subsys_attribute sys_ipl_device_attr =
+	__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off,
+				  size_t count)
+{
+	unsigned int size = IPL_PARMBLOCK_SIZE;
+
+	if (off > size)
+		return 0;
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count);
+	return count;
+}
+
+static struct bin_attribute ipl_parameter_attr = {
+	.attr = {
+		.name = "binary_parameter",
+		.mode = S_IRUGO,
+		.owner = THIS_MODULE,
+	},
+	.size = PAGE_SIZE,
+	.read = &ipl_parameter_read,
+};
+
+static ssize_t ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off,
+	size_t count)
+{
+	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
+	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+
+	if (off > size)
+		return 0;
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, scp_data + off, count);
+	return count;
+}
+
+static struct bin_attribute ipl_scp_data_attr = {
+	.attr = {
+		.name = "scp_data",
+		.mode = S_IRUGO,
+		.owner = THIS_MODULE,
+	},
+	.size = PAGE_SIZE,
+	.read = &ipl_scp_data_read,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+
+static struct attribute *ipl_fcp_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_fcp_wwpn_attr.attr,
+	&sys_ipl_fcp_lun_attr.attr,
+	&sys_ipl_fcp_bootprog_attr.attr,
+	&sys_ipl_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+	.attrs = ipl_fcp_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group = {
+	.attrs = ipl_ccw_attrs,
+};
+
+/* UNKNOWN ipl device attributes */
+
+static struct attribute *ipl_unknown_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_unknown_attr_group = {
+	.attrs = ipl_unknown_attrs,
+};
+
+static decl_subsys(ipl, NULL, NULL);
+
+/*
+ * reipl section
+ */
+
+/* FCP reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+		   reipl_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
+		   reipl_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   reipl_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *reipl_fcp_attrs[] = {
+	&sys_reipl_fcp_device_attr.attr,
+	&sys_reipl_fcp_wwpn_attr.attr,
+	&sys_reipl_fcp_lun_attr.attr,
+	&sys_reipl_fcp_bootprog_attr.attr,
+	&sys_reipl_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = reipl_fcp_attrs,
+};
+
+/* CCW reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+	reipl_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *reipl_ccw_attrs[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs,
+};
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+	if (!(reipl_capabilities & type))
+		return -EINVAL;
+
+	switch(type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			reipl_method = IPL_METHOD_CCW_VM;
+		else
+			reipl_method = IPL_METHOD_CCW_CIO;
+		break;
+	case IPL_TYPE_FCP:
+		if (diag308_set_works)
+			reipl_method = IPL_METHOD_FCP_RW_DIAG;
+		else if (MACHINE_IS_VM)
+			reipl_method = IPL_METHOD_FCP_RO_VM;
+		else
+			reipl_method = IPL_METHOD_FCP_RO_DIAG;
+		break;
+	default:
+		reipl_method = IPL_METHOD_NONE;
+	}
+	reipl_type = type;
+	return 0;
+}
+
+static ssize_t reipl_type_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct subsystem *subsys, const char *buf,
+				size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_FCP);
+	return (rc != 0) ? rc : len;
+}
+
+static struct subsys_attribute reipl_type_attr =
+		__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static decl_subsys(reipl, NULL, NULL);
+
+/*
+ * dump section
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+		   dump_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
+		   dump_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+		   dump_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+		   dump_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+	&sys_dump_fcp_device_attr.attr,
+	&sys_dump_fcp_wwpn_attr.attr,
+	&sys_dump_fcp_lun_attr.attr,
+	&sys_dump_fcp_bootprog_attr.attr,
+	&sys_dump_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = dump_fcp_attrs,
+};
+
+/* CCW dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *dump_ccw_attrs[] = {
+	&sys_dump_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum ipl_type type)
+{
+	if (!(dump_capabilities & type))
+		return -EINVAL;
+	switch(type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			dump_method = IPL_METHOD_CCW_VM;
+		else
+			dump_method = IPL_METHOD_CCW_CIO;
+		break;
+	case IPL_TYPE_FCP:
+		dump_method = IPL_METHOD_FCP_RW_DIAG;
+		break;
+	default:
+		dump_method = IPL_METHOD_NONE;
+	}
+	dump_type = type;
+	return 0;
+}
+
+static ssize_t dump_type_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct subsystem *subsys, const char *buf,
+			       size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_NONE_STR, strlen(IPL_NONE_STR)) == 0)
+		rc = dump_set_type(IPL_TYPE_NONE);
+	else if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = dump_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = dump_set_type(IPL_TYPE_FCP);
+	return (rc != 0) ? rc : len;
+}
+
+static struct subsys_attribute dump_type_attr =
+		__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static decl_subsys(dump, NULL, NULL);
+
+#ifdef CONFIG_SMP
+static void dump_smp_stop_all(void)
+{
+	int cpu;
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		while (signal_processor(cpu, sigp_stop) == sigp_busy)
+			udelay(10);
+	}
+	preempt_enable();
+}
+#else
+#define dump_smp_stop_all() do { } while (0)
+#endif
+
+/*
+ * Shutdown actions section
+ */
+
+static decl_subsys(shutdown_actions, NULL, NULL);
+
+/* on panic */
+
+static ssize_t on_panic_show(struct subsystem *subsys, char *page)
+{
+	return sprintf(page, "%s\n", shutdown_action_str(on_panic_action));
+}
+
+static ssize_t on_panic_store(struct subsystem *subsys, const char *buf,
+			      size_t len)
+{
+	if (strncmp(buf, SHUTDOWN_REIPL_STR, strlen(SHUTDOWN_REIPL_STR)) == 0)
+		on_panic_action = SHUTDOWN_REIPL;
+	else if (strncmp(buf, SHUTDOWN_DUMP_STR,
+			 strlen(SHUTDOWN_DUMP_STR)) == 0)
+		on_panic_action = SHUTDOWN_DUMP;
+	else if (strncmp(buf, SHUTDOWN_STOP_STR,
+			 strlen(SHUTDOWN_STOP_STR)) == 0)
+		on_panic_action = SHUTDOWN_STOP;
+	else
+		return -EINVAL;
+
+	return len;
+}
+
+static struct subsys_attribute on_panic_attr =
+		__ATTR(on_panic, 0644, on_panic_show, on_panic_store);
+
+static void print_fcp_block(struct ipl_parameter_block *fcp_block)
+{
+	printk(KERN_EMERG "wwpn:      %016llx\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.wwpn);
+	printk(KERN_EMERG "lun:       %016llx\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.lun);
+	printk(KERN_EMERG "bootprog:  %lld\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.bootprog);
+	printk(KERN_EMERG "br_lba:    %lld\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.br_lba);
+	printk(KERN_EMERG "device:    %llx\n",
+		(unsigned long long)fcp_block->ipl_info.fcp.devno);
+	printk(KERN_EMERG "opt:       %x\n", fcp_block->ipl_info.fcp.opt);
+}
+
+void do_reipl(void)
+{
+	struct ccw_dev_id devid;
+	static char buf[100];
+
+	switch (reipl_type) {
+	case IPL_TYPE_CCW:
+		printk(KERN_EMERG "reboot on ccw device: 0.0.%04x\n",
+			reipl_block_ccw->ipl_info.ccw.devno);
+		break;
+	case IPL_TYPE_FCP:
+		printk(KERN_EMERG "reboot on fcp device:\n");
+		print_fcp_block(reipl_block_fcp);
+		break;
+	default:
+		break;
+	}
+
+	switch (reipl_method) {
+	case IPL_METHOD_CCW_CIO:
+		devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
+		devid.ssid  = 0;
+		reipl_ccw_dev(&devid);
+		break;
+	case IPL_METHOD_CCW_VM:
+		sprintf(buf, "IPL %X", reipl_block_ccw->ipl_info.ccw.devno);
+		cpcmd(buf, NULL, 0, NULL);
+		break;
+	case IPL_METHOD_CCW_DIAG:
+		diag308(DIAG308_SET, reipl_block_ccw);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case IPL_METHOD_FCP_RW_DIAG:
+		diag308(DIAG308_SET, reipl_block_fcp);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case IPL_METHOD_FCP_RO_DIAG:
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case IPL_METHOD_FCP_RO_VM:
+		cpcmd("IPL", NULL, 0, NULL);
+		break;
+	case IPL_METHOD_NONE:
+	default:
+		if (MACHINE_IS_VM)
+			cpcmd("IPL", NULL, 0, NULL);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	}
+	panic("reipl failed!\n");
+}
+
+static void do_dump(void)
+{
+	struct ccw_dev_id devid;
+	static char buf[100];
+
+	switch (dump_type) {
+	case IPL_TYPE_CCW:
+		printk(KERN_EMERG "Automatic dump on ccw device: 0.0.%04x\n",
+		       dump_block_ccw->ipl_info.ccw.devno);
+		break;
+	case IPL_TYPE_FCP:
+		printk(KERN_EMERG "Automatic dump on fcp device:\n");
+		print_fcp_block(dump_block_fcp);
+		break;
+	default:
+		return;
+	}
+
+	switch (dump_method) {
+	case IPL_METHOD_CCW_CIO:
+		dump_smp_stop_all();
+		devid.devno = dump_block_ccw->ipl_info.ccw.devno;
+		devid.ssid  = 0;
+		reipl_ccw_dev(&devid);
+		break;
+	case IPL_METHOD_CCW_VM:
+		dump_smp_stop_all();
+		sprintf(buf, "STORE STATUS");
+		cpcmd(buf, NULL, 0, NULL);
+		sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
+		cpcmd(buf, NULL, 0, NULL);
+		break;
+	case IPL_METHOD_CCW_DIAG:
+		diag308(DIAG308_SET, dump_block_ccw);
+		diag308(DIAG308_DUMP, NULL);
+		break;
+	case IPL_METHOD_FCP_RW_DIAG:
+		diag308(DIAG308_SET, dump_block_fcp);
+		diag308(DIAG308_DUMP, NULL);
+		break;
+	case IPL_METHOD_NONE:
+	default:
+		return;
+	}
+	printk(KERN_EMERG "Dump failed!\n");
+}
+
+/* init functions */
+
+static int __init ipl_register_fcp_files(void)
+{
+	int rc;
+
+	rc = sysfs_create_group(&ipl_subsys.kset.kobj,
+				&ipl_fcp_attr_group);
+	if (rc)
+		goto out;
+	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
+				   &ipl_parameter_attr);
+	if (rc)
+		goto out_ipl_parm;
+	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
+				   &ipl_scp_data_attr);
+	if (!rc)
+		goto out;
+
+	sysfs_remove_bin_file(&ipl_subsys.kset.kobj, &ipl_parameter_attr);
+
+out_ipl_parm:
+	sysfs_remove_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group);
+out:
+	return rc;
+}
+
+static int __init ipl_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&ipl_subsys);
+	if (rc)
+		return rc;
+	switch (ipl_get_type()) {
+	case IPL_TYPE_CCW:
+		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
+					&ipl_ccw_attr_group);
+		break;
+	case IPL_TYPE_FCP:
+		rc = ipl_register_fcp_files();
+		break;
+	default:
+		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
+					&ipl_unknown_attr_group);
+		break;
+	}
+	if (rc)
+		firmware_unregister(&ipl_subsys);
+	return rc;
+}
+
+static void __init reipl_probe(void)
+{
+	void *buffer;
+
+	buffer = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!buffer)
+		return;
+	if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
+		diag308_set_works = 1;
+	free_page((unsigned long)buffer);
+}
+
+static int __init reipl_ccw_init(void)
+{
+	int rc;
+
+	reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)reipl_block_ccw);
+		return rc;
+	}
+	reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	reipl_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw);
+	reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	if (ipl_get_type() == IPL_TYPE_CCW)
+		reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+	reipl_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+	int rc;
+
+	if ((!diag308_set_works) && (ipl_get_type() != IPL_TYPE_FCP))
+		return 0;
+	if ((!diag308_set_works) && (ipl_get_type() == IPL_TYPE_FCP))
+		make_attrs_ro(reipl_fcp_attrs);
+
+	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&reipl_subsys.kset.kobj, &reipl_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)reipl_block_fcp);
+		return rc;
+	}
+	if (ipl_get_type() == IPL_TYPE_FCP) {
+		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
+	} else {
+		reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_fcp->hdr.blk0_len =
+			sizeof(reipl_block_fcp->ipl_info.fcp);
+		reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+		reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_FCP;
+	return 0;
+}
+
+static int __init reipl_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&reipl_subsys);
+	if (rc)
+		return rc;
+	rc = subsys_create_file(&reipl_subsys, &reipl_type_attr);
+	if (rc) {
+		firmware_unregister(&reipl_subsys);
+		return rc;
+	}
+	rc = reipl_ccw_init();
+	if (rc)
+		return rc;
+	rc = reipl_fcp_init();
+	if (rc)
+		return rc;
+	rc = reipl_set_type(ipl_get_type());
+	if (rc)
+		return rc;
+	return 0;
+}
+
+static int __init dump_ccw_init(void)
+{
+	int rc;
+
+	dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_subsys.kset.kobj, &dump_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_ccw);
+		return rc;
+	}
+	dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_ccw->hdr.blk0_len = sizeof(reipl_block_ccw->ipl_info.ccw);
+	dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	dump_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+extern char s390_readinfo_sccb[];
+
+static int __init dump_fcp_init(void)
+{
+	int rc;
+
+	if(!(s390_readinfo_sccb[91] & 0x2))
+		return 0; /* LDIPL DUMP is not installed */
+	if (!diag308_set_works)
+		return 0;
+	dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_subsys.kset.kobj, &dump_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_fcp);
+		return rc;
+	}
+	dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_fcp->hdr.blk0_len = sizeof(dump_block_fcp->ipl_info.fcp);
+	dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+	dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+	dump_capabilities |= IPL_TYPE_FCP;
+	return 0;
+}
+
+#define SHUTDOWN_ON_PANIC_PRIO 0
+
+static int shutdown_on_panic_notify(struct notifier_block *self,
+				    unsigned long event, void *data)
+{
+	if (on_panic_action == SHUTDOWN_DUMP)
+		do_dump();
+	else if (on_panic_action == SHUTDOWN_REIPL)
+		do_reipl();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block shutdown_on_panic_nb = {
+	.notifier_call = shutdown_on_panic_notify,
+	.priority = SHUTDOWN_ON_PANIC_PRIO
+};
+
+static int __init dump_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&dump_subsys);
+	if (rc)
+		return rc;
+	rc = subsys_create_file(&dump_subsys, &dump_type_attr);
+	if (rc) {
+		firmware_unregister(&dump_subsys);
+		return rc;
+	}
+	rc = dump_ccw_init();
+	if (rc)
+		return rc;
+	rc = dump_fcp_init();
+	if (rc)
+		return rc;
+	dump_set_type(IPL_TYPE_NONE);
+	return 0;
+}
+
+static int __init shutdown_actions_init(void)
+{
+	int rc;
+
+	rc = firmware_register(&shutdown_actions_subsys);
+	if (rc)
+		return rc;
+	rc = subsys_create_file(&shutdown_actions_subsys, &on_panic_attr);
+	if (rc) {
+		firmware_unregister(&shutdown_actions_subsys);
+		return rc;
+	}
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &shutdown_on_panic_nb);
+	return 0;
+}
+
+static int __init s390_ipl_init(void)
+{
+	int rc;
+
+	reipl_probe();
+	rc = ipl_init();
+	if (rc)
+		return rc;
+	rc = reipl_init();
+	if (rc)
+		return rc;
+	rc = dump_init();
+	if (rc)
+		return rc;
+	rc = shutdown_actions_init();
+	if (rc)
+		return rc;
+	return 0;
+}
+
+__initcall(s390_ipl_init);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 0000000..ca28fb0
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,657 @@
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	/* Make sure the probe isn't going on a difficult instruction */
+	if (is_prohibited_opcode((kprobe_opcode_t *) p->addr))
+		return -EINVAL;
+
+	if ((unsigned long)p->addr & 0x01) {
+		printk("Attempt to register kprobe at an unaligned address\n");
+		return -EINVAL;
+		}
+
+	/* Use the get_insn_slot() facility for correctness */
+	if (!(p->ainsn.insn = get_insn_slot()))
+		return -ENOMEM;
+
+	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+
+	get_instruction_type(&p->ainsn);
+	p->opcode = *p->addr;
+	return 0;
+}
+
+int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction)
+{
+	switch (*(__u8 *) instruction) {
+	case 0x0c:	/* bassm */
+	case 0x0b:	/* bsm	 */
+	case 0x83:	/* diag  */
+	case 0x44:	/* ex	 */
+		return -EINVAL;
+	}
+	switch (*(__u16 *) instruction) {
+	case 0x0101:	/* pr	 */
+	case 0xb25a:	/* bsa	 */
+	case 0xb240:	/* bakr  */
+	case 0xb258:	/* bsg	 */
+	case 0xb218:	/* pc	 */
+	case 0xb228:	/* pt	 */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void __kprobes get_instruction_type(struct arch_specific_insn *ainsn)
+{
+	/* default fixup method */
+	ainsn->fixup = FIXUP_PSW_NORMAL;
+
+	/* save r1 operand */
+	ainsn->reg = (*ainsn->insn & 0xf0) >> 4;
+
+	/* save the instruction length (pop 5-5) in bytes */
+	switch (*(__u8 *) (ainsn->insn) >> 4) {
+	case 0:
+		ainsn->ilen = 2;
+		break;
+	case 1:
+	case 2:
+		ainsn->ilen = 4;
+		break;
+	case 3:
+		ainsn->ilen = 6;
+		break;
+	}
+
+	switch (*(__u8 *) ainsn->insn) {
+	case 0x05:	/* balr	*/
+	case 0x0d:	/* basr */
+		ainsn->fixup = FIXUP_RETURN_REGISTER;
+		/* if r2 = 0, no branch will be taken */
+		if ((*ainsn->insn & 0x0f) == 0)
+			ainsn->fixup |= FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x06:	/* bctr	*/
+	case 0x07:	/* bcr	*/
+		ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x45:	/* bal	*/
+	case 0x4d:	/* bas	*/
+		ainsn->fixup = FIXUP_RETURN_REGISTER;
+		break;
+	case 0x47:	/* bc	*/
+	case 0x46:	/* bct	*/
+	case 0x86:	/* bxh	*/
+	case 0x87:	/* bxle	*/
+		ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x82:	/* lpsw	*/
+		ainsn->fixup = FIXUP_NOT_REQUIRED;
+		break;
+	case 0xb2:	/* lpswe */
+		if (*(((__u8 *) ainsn->insn) + 1) == 0xb2) {
+			ainsn->fixup = FIXUP_NOT_REQUIRED;
+		}
+		break;
+	case 0xa7:	/* bras	*/
+		if ((*ainsn->insn & 0x0f) == 0x05) {
+			ainsn->fixup |= FIXUP_RETURN_REGISTER;
+		}
+		break;
+	case 0xc0:
+		if ((*ainsn->insn & 0x0f) == 0x00  /* larl  */
+			|| (*ainsn->insn & 0x0f) == 0x05) /* brasl */
+		ainsn->fixup |= FIXUP_RETURN_REGISTER;
+		break;
+	case 0xeb:
+		if (*(((__u8 *) ainsn->insn) + 5 ) == 0x44 ||	/* bxhg  */
+			*(((__u8 *) ainsn->insn) + 5) == 0x45) {/* bxleg */
+			ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		}
+		break;
+	case 0xe3:	/* bctg	*/
+		if (*(((__u8 *) ainsn->insn) + 5) == 0x46) {
+			ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN;
+		}
+		break;
+	}
+}
+
+static int __kprobes swap_instruction(void *aref)
+{
+	struct ins_replace_args *args = aref;
+	int err = -EFAULT;
+
+	asm volatile(
+		"0: mvc  0(2,%2),0(%3)\n"
+		"1: la   %0,0\n"
+		"2:\n"
+		EX_TABLE(0b,2b)
+		: "+d" (err), "=m" (*args->ptr)
+		: "a" (args->ptr), "a" (&args->new), "m" (args->new));
+	return err;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long status = kcb->kprobe_status;
+	struct ins_replace_args args;
+
+	args.ptr = p->addr;
+	args.old = p->opcode;
+	args.new = BREAKPOINT_INSTRUCTION;
+
+	kcb->kprobe_status = KPROBE_SWAP_INST;
+	stop_machine_run(swap_instruction, &args, NR_CPUS);
+	kcb->kprobe_status = status;
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long status = kcb->kprobe_status;
+	struct ins_replace_args args;
+
+	args.ptr = p->addr;
+	args.old = BREAKPOINT_INSTRUCTION;
+	args.new = p->opcode;
+
+	kcb->kprobe_status = KPROBE_SWAP_INST;
+	stop_machine_run(swap_instruction, &args, NR_CPUS);
+	kcb->kprobe_status = status;
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+	mutex_lock(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	mutex_unlock(&kprobe_mutex);
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+	per_cr_bits kprobe_per_regs[1];
+
+	memset(kprobe_per_regs, 0, sizeof(per_cr_bits));
+	regs->psw.addr = (unsigned long)p->ainsn.insn | PSW_ADDR_AMODE;
+
+	/* Set up the per control reg info, will pass to lctl */
+	kprobe_per_regs[0].em_instruction_fetch = 1;
+	kprobe_per_regs[0].starting_addr = (unsigned long)p->ainsn.insn;
+	kprobe_per_regs[0].ending_addr = (unsigned long)p->ainsn.insn + 1;
+
+	/* Set the PER control regs, turns on single step for this address */
+	__ctl_load(kprobe_per_regs, 9, 11);
+	regs->psw.mask |= PSW_MASK_PER;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.kprobe_saved_imask = kcb->kprobe_saved_imask;
+	memcpy(kcb->prev_kprobe.kprobe_saved_ctl, kcb->kprobe_saved_ctl,
+					sizeof(kcb->kprobe_saved_ctl));
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_saved_imask = kcb->prev_kprobe.kprobe_saved_imask;
+	memcpy(kcb->kprobe_saved_ctl, kcb->prev_kprobe.kprobe_saved_ctl,
+					sizeof(kcb->kprobe_saved_ctl));
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+						struct kprobe_ctlblk *kcb)
+{
+	__get_cpu_var(current_kprobe) = p;
+	/* Save the interrupt and per flags */
+	kcb->kprobe_saved_imask = regs->psw.mask &
+	    (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK);
+	/* Save the control regs that govern PER */
+	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+}
+
+/* Called with kretprobe_lock held */
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+					struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+
+		/* Replace the return addr with trampoline addr */
+		regs->gprs[14] = (unsigned long)&kretprobe_trampoline;
+
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	int ret = 0;
+	unsigned long *addr = (unsigned long *)
+		((regs->psw.addr & PSW_ADDR_INSN) - 2);
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	/* Check we're not actually recursing */
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
+			    *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
+				regs->psw.mask &= ~PSW_MASK_PER;
+				regs->psw.mask |= kcb->kprobe_saved_imask;
+				goto no_kprobe;
+			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kprobes_inc_nmissed_count(p);
+			prepare_singlestep(p, regs);
+			kcb->kprobe_status = KPROBE_REENTER;
+			return 1;
+		} else {
+			p = __get_cpu_var(current_kprobe);
+			if (p->break_handler && p->break_handler(p, regs)) {
+				goto ss_probe;
+			}
+		}
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		if (*addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 *
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+	set_current_kprobe(p, regs, kcb);
+	if (p->pre_handler && p->pre_handler(p, regs))
+		/* handler has already set things up, so skip ss setup */
+		return 1;
+
+ss_probe:
+	prepare_singlestep(p, regs);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/*
+ * Function return probe trampoline:
+ *	- init_kprobes() establishes a probepoint here
+ *	- When the probed function returns, this probe
+ *		causes the handlers to fire
+ */
+void __kprobes kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+		     "kretprobe_trampoline: bcr 0,0\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head;
+	struct hlist_node *node, *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+
+	spin_lock_irqsave(&kretprobe_lock, flags);
+	head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *	 function, the first instance's ret_addr will point to the
+	 *	 real return address, and all the rest will point to
+	 *	 kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address) {
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+		}
+	}
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+
+	reset_current_kprobe();
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	preempt_enable_no_resched();
+
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	regs->psw.addr &= PSW_ADDR_INSN;
+
+	if (p->ainsn.fixup & FIXUP_PSW_NORMAL)
+		regs->psw.addr = (unsigned long)p->addr +
+				((unsigned long)regs->psw.addr -
+				 (unsigned long)p->ainsn.insn);
+
+	if (p->ainsn.fixup & FIXUP_BRANCH_NOT_TAKEN)
+		if ((unsigned long)regs->psw.addr -
+		    (unsigned long)p->ainsn.insn == p->ainsn.ilen)
+			regs->psw.addr = (unsigned long)p->addr + p->ainsn.ilen;
+
+	if (p->ainsn.fixup & FIXUP_RETURN_REGISTER)
+		regs->gprs[p->ainsn.reg] = ((unsigned long)p->addr +
+						(regs->gprs[p->ainsn.reg] -
+						(unsigned long)p->ainsn.insn))
+						| PSW_ADDR_AMODE;
+
+	regs->psw.addr |= PSW_ADDR_AMODE;
+	/* turn off PER mode */
+	regs->psw.mask &= ~PSW_MASK_PER;
+	/* Restore the original per control regs */
+	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+	regs->psw.mask |= kcb->kprobe_saved_imask;
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs);
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, psw mask
+	 * will have PER set, in which case, continue the remaining processing
+	 * of do_single_step, as if this is not a probe hit.
+	 */
+	if (regs->psw.mask & PSW_MASK_PER) {
+		return 0;
+	}
+
+	return 1;
+}
+
+static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	const struct exception_table_entry *entry;
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_SWAP_INST:
+		/* We are here because the instruction replacement failed */
+		return 0;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the nip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs->psw.addr = (unsigned long)cur->addr | PSW_ADDR_AMODE;
+		regs->psw.mask &= ~PSW_MASK_PER;
+		regs->psw.mask |= kcb->kprobe_saved_imask;
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accouting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		if (entry) {
+			regs->psw.addr = entry->fixup | PSW_ADDR_AMODE;
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BPT:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_TRAP:
+	case DIE_PAGE_FAULT:
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() &&
+		    kprobe_fault_handler(args->regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		preempt_enable();
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	unsigned long addr;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/* setup return addr to the jprobe handler routine */
+	regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE;
+
+	/* r14 is the function return address */
+	kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14];
+	/* r15 is the stack pointer */
+	kcb->jprobe_saved_r15 = (unsigned long)regs->gprs[15];
+	addr = (unsigned long)kcb->jprobe_saved_r15;
+
+	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr,
+	       MIN_STACK_SIZE(addr));
+	return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+	asm volatile(".word 0x0002");
+}
+
+void __kprobes jprobe_return_end(void)
+{
+	asm volatile("bcr 0,0");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_r15);
+
+	/* Put the regs back */
+	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* put the stack back */
+	memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
+	       MIN_STACK_SIZE(stack_addr));
+	preempt_enable_no_resched();
+	return 1;
+}
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) & kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 658e5ac..4562cdb 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -8,13 +8,30 @@
 
 #include <asm/lowcore.h>
 
-		.globl	do_reipl
-do_reipl:	basr	%r13,0
+		.globl	do_reipl_asm
+do_reipl_asm:	basr	%r13,0
 .Lpg0:		lpsw	.Lnewpsw-.Lpg0(%r13)
-.Lpg1:		lctl	%c6,%c6,.Lall-.Lpg0(%r13)
-                stctl   %c0,%c0,.Lctlsave-.Lpg0(%r13)
-                ni      .Lctlsave-.Lpg0(%r13),0xef
-                lctl    %c0,%c0,.Lctlsave-.Lpg0(%r13)
+
+		# switch off lowcore protection
+
+.Lpg1:		stctl	%c0,%c0,.Lctlsave1-.Lpg0(%r13)
+		stctl	%c0,%c0,.Lctlsave2-.Lpg0(%r13)
+		ni	.Lctlsave1-.Lpg0(%r13),0xef
+		lctl	%c0,%c0,.Lctlsave1-.Lpg0(%r13)
+
+		# do store status of all registers
+
+		stm	%r0,%r15,__LC_GPREGS_SAVE_AREA
+		stctl	%c0,%c15,__LC_CREGS_SAVE_AREA
+		mvc	__LC_CREGS_SAVE_AREA(4),.Lctlsave2-.Lpg0(%r13)
+		stam	%a0,%a15,__LC_AREGS_SAVE_AREA
+		stpx	__LC_PREFIX_SAVE_AREA
+		stckc	.Lclkcmp-.Lpg0(%r13)
+		mvc	__LC_CLOCK_COMP_SAVE_AREA(8),.Lclkcmp-.Lpg0(%r13)
+		stpt	__LC_CPU_TIMER_SAVE_AREA
+		st	%r13, __LC_PSW_SAVE_AREA+4
+
+		lctl	%c6,%c6,.Lall-.Lpg0(%r13)
                 lr      %r1,%r2
         	mvc     __LC_PGM_NEW_PSW(8),.Lpcnew-.Lpg0(%r13)
                 stsch   .Lschib-.Lpg0(%r13)                                    
@@ -46,9 +63,11 @@
 .Ldisab:	st      %r14,.Ldispsw+4-.Lpg0(%r13)
 		lpsw	.Ldispsw-.Lpg0(%r13)
                 .align 	8
+.Lclkcmp:	.quad	0x0000000000000000
 .Lall:		.long	0xff000000
 .Lnull:		.long   0x00000000
-.Lctlsave:      .long   0x00000000
+.Lctlsave1:	.long	0x00000000
+.Lctlsave2:	.long	0x00000000
                 .align 	8
 .Lnewpsw:	.long   0x00080000,0x80000000+.Lpg1
 .Lpcnew:  	.long   0x00080000,0x80000000+.Lecs
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
index 4d090d6..95bd1e2 100644
--- a/arch/s390/kernel/reipl64.S
+++ b/arch/s390/kernel/reipl64.S
@@ -8,13 +8,30 @@
  */
 
 #include <asm/lowcore.h>
-		.globl	do_reipl
-do_reipl:	basr	%r13,0
-.Lpg0:		lpswe   .Lnewpsw-.Lpg0(%r13)
+		.globl	do_reipl_asm
+do_reipl_asm:	basr	%r13,0
+
+		# do store status of all registers
+
+.Lpg0:		stg	%r1,.Lregsave-.Lpg0(%r13)
+		lghi	%r1,0x1000
+		stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1)
+		lg	%r0,.Lregsave-.Lpg0(%r13)
+		stg	%r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1)
+		stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1)
+		stam	%a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1)
+		stpx	__LC_PREFIX_SAVE_AREA-0x1000(%r1)
+		stfpc	__LC_FP_CREG_SAVE_AREA-0x1000(%r1)
+		stckc	.Lclkcmp-.Lpg0(%r13)
+		mvc	__LC_CLOCK_COMP_SAVE_AREA-0x1000(8,%r1),.Lclkcmp-.Lpg0(%r13)
+		stpt	__LC_CPU_TIMER_SAVE_AREA-0x1000(%r1)
+		stg	%r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1)
+
+		lpswe	.Lnewpsw-.Lpg0(%r13)
 .Lpg1:		lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
-                stctg   %c0,%c0,.Lctlsave-.Lpg0(%r13)
-                ni      .Lctlsave+4-.Lpg0(%r13),0xef
-                lctlg   %c0,%c0,.Lctlsave-.Lpg0(%r13)
+		stctg	%c0,%c0,.Lregsave-.Lpg0(%r13)
+		ni	.Lregsave+4-.Lpg0(%r13),0xef
+		lctlg	%c0,%c0,.Lregsave-.Lpg0(%r13)
                 lgr     %r1,%r2
         	mvc     __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
                 stsch   .Lschib-.Lpg0(%r13)                                    
@@ -50,8 +67,9 @@
 		st     %r14,.Ldispsw+12-.Lpg0(%r13)
 		lpswe	.Ldispsw-.Lpg0(%r13)
                 .align 	8
+.Lclkcmp:	.quad	0x0000000000000000
 .Lall:		.quad	0x00000000ff000000
-.Lctlsave:      .quad   0x0000000000000000
+.Lregsave:	.quad	0x0000000000000000
 .Lnull:		.long   0x0000000000000000
                 .align 	16
 /*
@@ -92,5 +110,3 @@
 		.long	0x00000000,0x00000000
 		.long	0x00000000,0x00000000
 	
-
-	
diff --git a/arch/s390/kernel/reipl_diag.c b/arch/s390/kernel/reipl_diag.c
deleted file mode 100644
index 1f33951..0000000
--- a/arch/s390/kernel/reipl_diag.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * This file contains the implementation of the
- * Linux re-IPL support
- *
- * (C) Copyright IBM Corp. 2005
- *
- * Author(s): Volker Sameske (sameske@de.ibm.com)
- *
- */
-
-#include <linux/kernel.h>
-
-static unsigned int reipl_diag_rc1;
-static unsigned int reipl_diag_rc2;
-
-/*
- * re-IPL the system using the last used IPL parameters
- */
-void reipl_diag(void)
-{
-        asm volatile (
-		"   la   %%r4,0\n"
-		"   la   %%r5,0\n"
-                "   diag %%r4,%2,0x308\n"
-                "0:\n"
-		"   st   %%r4,%0\n"
-		"   st   %%r5,%1\n"
-                ".section __ex_table,\"a\"\n"
-#ifdef CONFIG_64BIT
-                "   .align 8\n"
-                "   .quad 0b, 0b\n"
-#else
-                "   .align 4\n"
-                "   .long 0b, 0b\n"
-#endif
-                ".previous\n"
-                : "=m" (reipl_diag_rc1), "=m" (reipl_diag_rc2)
-		: "d" (3) : "cc", "4", "5" );
-}
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index c73a454..9f19e83 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -25,12 +25,6 @@
 EXPORT_SYMBOL(_ni_bitmap);
 EXPORT_SYMBOL(_zb_findmap);
 EXPORT_SYMBOL(_sb_findmap);
-EXPORT_SYMBOL(__copy_from_user_asm);
-EXPORT_SYMBOL(__copy_to_user_asm);
-EXPORT_SYMBOL(__copy_in_user_asm);
-EXPORT_SYMBOL(__clear_user_asm);
-EXPORT_SYMBOL(__strncpy_from_user_asm);
-EXPORT_SYMBOL(__strnlen_user_asm);
 EXPORT_SYMBOL(diag10);
 
 /*
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c902f05..e3d9325 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/device.h>
 #include <linux/notifier.h>
+#include <linux/pfn.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -50,6 +51,12 @@
 #include <asm/sections.h>
 
 /*
+ * User copy operations.
+ */
+struct uaccess_ops uaccess;
+EXPORT_SYMBOL_GPL(uaccess);
+
+/*
  * Machine setup..
  */
 unsigned int console_mode = 0;
@@ -284,16 +291,9 @@
 /*
  * Reboot, halt and power_off routines for non SMP.
  */
-extern void reipl(unsigned long devno);
-extern void reipl_diag(void);
 static void do_machine_restart_nonsmp(char * __unused)
 {
-	reipl_diag();
-
-	if (MACHINE_IS_VM)
-		cpcmd ("IPL", NULL, 0, NULL);
-	else
-		reipl (0x10000 | S390_lowcore.ipl_device);
+	do_reipl();
 }
 
 static void do_machine_halt_nonsmp(void)
@@ -501,13 +501,47 @@
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	end_pfn = max_pfn = memory_end >> PAGE_SHIFT;
+	start_pfn = PFN_UP(__pa(&_end));
+	end_pfn = max_pfn = PFN_DOWN(memory_end);
 
 	/* Initialize storage key for kernel pages */
 	for (init_pfn = 0 ; init_pfn < start_pfn; init_pfn++)
 		page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY);
 
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * Move the initrd in case the bitmap of the bootmem allocater
+	 * would overwrite it.
+	 */
+
+	if (INITRD_START && INITRD_SIZE) {
+		unsigned long bmap_size;
+		unsigned long start;
+
+		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
+		bmap_size = PFN_PHYS(bmap_size);
+
+		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
+			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
+
+			if (start + INITRD_SIZE > memory_end) {
+				printk("initrd extends beyond end of memory "
+				       "(0x%08lx > 0x%08lx)\n"
+				       "disabling initrd\n",
+				       start + INITRD_SIZE, memory_end);
+				INITRD_START = INITRD_SIZE = 0;
+			} else {
+				printk("Moving initrd (0x%08lx -> 0x%08lx, "
+				       "size: %ld)\n",
+				       INITRD_START, start, INITRD_SIZE);
+				memmove((void *) start, (void *) INITRD_START,
+					INITRD_SIZE);
+				INITRD_START = start;
+			}
+		}
+	}
+#endif
+
 	/*
 	 * Initialize the boot-time allocator (with low memory only):
 	 */
@@ -559,7 +593,7 @@
 	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size);
 
 #ifdef CONFIG_BLK_DEV_INITRD
-	if (INITRD_START) {
+	if (INITRD_START && INITRD_SIZE) {
 		if (INITRD_START + INITRD_SIZE <= memory_end) {
 			reserve_bootmem(INITRD_START, INITRD_SIZE);
 			initrd_start = INITRD_START;
@@ -613,6 +647,11 @@
 
 	memory_end = memory_size;
 
+	if (MACHINE_HAS_MVCOS)
+		memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
+	else
+		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));
+
 	parse_early_param();
 
 #ifndef CONFIG_64BIT
@@ -720,214 +759,3 @@
 	.show	= show_cpuinfo,
 };
 
-#define DEFINE_IPL_ATTR(_name, _format, _value)			\
-static ssize_t ipl_##_name##_show(struct subsystem *subsys,	\
-		char *page)					\
-{								\
-	return sprintf(page, _format, _value);			\
-}								\
-static struct subsys_attribute ipl_##_name##_attr =		\
-	__ATTR(_name, S_IRUGO, ipl_##_name##_show, NULL);
-
-DEFINE_IPL_ATTR(wwpn, "0x%016llx\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.wwpn);
-DEFINE_IPL_ATTR(lun, "0x%016llx\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.lun);
-DEFINE_IPL_ATTR(bootprog, "%lld\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.bootprog);
-DEFINE_IPL_ATTR(br_lba, "%lld\n", (unsigned long long)
-		IPL_PARMBLOCK_START->fcp.br_lba);
-
-enum ipl_type_type {
-	ipl_type_unknown,
-	ipl_type_ccw,
-	ipl_type_fcp,
-};
-
-static enum ipl_type_type
-get_ipl_type(void)
-{
-	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
-	if (!IPL_DEVNO_VALID)
-		return ipl_type_unknown;
-	if (!IPL_PARMBLOCK_VALID)
-		return ipl_type_ccw;
-	if (ipl->hdr.header.version > IPL_MAX_SUPPORTED_VERSION)
-		return ipl_type_unknown;
-	if (ipl->fcp.pbt != IPL_TYPE_FCP)
-		return ipl_type_unknown;
-	return ipl_type_fcp;
-}
-
-static ssize_t
-ipl_type_show(struct subsystem *subsys, char *page)
-{
-	switch (get_ipl_type()) {
-	case ipl_type_ccw:
-		return sprintf(page, "ccw\n");
-	case ipl_type_fcp:
-		return sprintf(page, "fcp\n");
-	default:
-		return sprintf(page, "unknown\n");
-	}
-}
-
-static struct subsys_attribute ipl_type_attr = __ATTR_RO(ipl_type);
-
-static ssize_t
-ipl_device_show(struct subsystem *subsys, char *page)
-{
-	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
-	switch (get_ipl_type()) {
-	case ipl_type_ccw:
-		return sprintf(page, "0.0.%04x\n", ipl_devno);
-	case ipl_type_fcp:
-		return sprintf(page, "0.0.%04x\n", ipl->fcp.devno);
-	default:
-		return 0;
-	}
-}
-
-static struct subsys_attribute ipl_device_attr =
-	__ATTR(device, S_IRUGO, ipl_device_show, NULL);
-
-static struct attribute *ipl_fcp_attrs[] = {
-	&ipl_type_attr.attr,
-	&ipl_device_attr.attr,
-	&ipl_wwpn_attr.attr,
-	&ipl_lun_attr.attr,
-	&ipl_bootprog_attr.attr,
-	&ipl_br_lba_attr.attr,
-	NULL,
-};
-
-static struct attribute_group ipl_fcp_attr_group = {
-	.attrs = ipl_fcp_attrs,
-};
-
-static struct attribute *ipl_ccw_attrs[] = {
-	&ipl_type_attr.attr,
-	&ipl_device_attr.attr,
-	NULL,
-};
-
-static struct attribute_group ipl_ccw_attr_group = {
-	.attrs = ipl_ccw_attrs,
-};
-
-static struct attribute *ipl_unknown_attrs[] = {
-	&ipl_type_attr.attr,
-	NULL,
-};
-
-static struct attribute_group ipl_unknown_attr_group = {
-	.attrs = ipl_unknown_attrs,
-};
-
-static ssize_t
-ipl_parameter_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
-{
-	unsigned int size = IPL_PARMBLOCK_SIZE;
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-
-	memcpy(buf, (void *) IPL_PARMBLOCK_START + off, count);
-	return count;
-}
-
-static struct bin_attribute ipl_parameter_attr = {
-	.attr = {
-		.name = "binary_parameter",
-		.mode = S_IRUGO,
-		.owner = THIS_MODULE,
-	},
-	.size = PAGE_SIZE,
-	.read = &ipl_parameter_read,
-};
-
-static ssize_t
-ipl_scp_data_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
-{
-	unsigned int size =  IPL_PARMBLOCK_START->fcp.scp_data_len;
-	void *scp_data = &IPL_PARMBLOCK_START->fcp.scp_data;
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-
-	memcpy(buf, scp_data + off, count);
-	return count;
-}
-
-static struct bin_attribute ipl_scp_data_attr = {
-	.attr = {
-		.name = "scp_data",
-		.mode = S_IRUGO,
-		.owner = THIS_MODULE,
-	},
-	.size = PAGE_SIZE,
-	.read = &ipl_scp_data_read,
-};
-
-static decl_subsys(ipl, NULL, NULL);
-
-static int ipl_register_fcp_files(void)
-{
-	int rc;
-
-	rc = sysfs_create_group(&ipl_subsys.kset.kobj,
-				&ipl_fcp_attr_group);
-	if (rc)
-		goto out;
-	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
-				   &ipl_parameter_attr);
-	if (rc)
-		goto out_ipl_parm;
-	rc = sysfs_create_bin_file(&ipl_subsys.kset.kobj,
-				   &ipl_scp_data_attr);
-	if (!rc)
-		goto out;
-
-	sysfs_remove_bin_file(&ipl_subsys.kset.kobj, &ipl_parameter_attr);
-
-out_ipl_parm:
-	sysfs_remove_group(&ipl_subsys.kset.kobj, &ipl_fcp_attr_group);
-out:
-	return rc;
-}
-
-static int __init
-ipl_device_sysfs_register(void) {
-	int rc;
-
-	rc = firmware_register(&ipl_subsys);
-	if (rc)
-		goto out;
-
-	switch (get_ipl_type()) {
-	case ipl_type_ccw:
-		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
-					&ipl_ccw_attr_group);
-		break;
-	case ipl_type_fcp:
-		rc = ipl_register_fcp_files();
-		break;
-	default:
-		rc = sysfs_create_group(&ipl_subsys.kset.kobj,
-					&ipl_unknown_attr_group);
-		break;
-	}
-
-	if (rc)
-		firmware_unregister(&ipl_subsys);
-out:
-	return rc;
-}
-
-__initcall(ipl_device_sysfs_register);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index a887b68..642095e 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -114,29 +114,26 @@
 static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 {
 	unsigned long old_mask = regs->psw.mask;
-	int err;
-  
+	_sigregs user_sregs;
+
 	save_access_regs(current->thread.acrs);
 
 	/* Copy a 'clean' PSW mask to the user to avoid leaking
 	   information about whether PER is currently on.  */
 	regs->psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask);
-	err = __copy_to_user(&sregs->regs.psw, &regs->psw,
-			     sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs));
+	memcpy(&user_sregs.regs.psw, &regs->psw, sizeof(sregs->regs.psw) +
+	       sizeof(sregs->regs.gprs));
 	regs->psw.mask = old_mask;
-	if (err != 0)
-		return err;
-	err = __copy_to_user(&sregs->regs.acrs, current->thread.acrs,
-			     sizeof(sregs->regs.acrs));
-	if (err != 0)
-		return err;
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(sregs->regs.acrs));
 	/* 
 	 * We have to store the fp registers to current->thread.fp_regs
 	 * to merge them with the emulated registers.
 	 */
 	save_fp_regs(&current->thread.fp_regs);
-	return __copy_to_user(&sregs->fpregs, &current->thread.fp_regs,
-			      sizeof(s390_fp_regs));
+	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+	       sizeof(s390_fp_regs));
+	return __copy_to_user(sregs, &user_sregs, sizeof(_sigregs));
 }
 
 /* Returns positive number on error */
@@ -144,27 +141,25 @@
 {
 	unsigned long old_mask = regs->psw.mask;
 	int err;
+	_sigregs user_sregs;
 
 	/* Alwys make any pending restarted system call return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-	err = __copy_from_user(&regs->psw, &sregs->regs.psw,
-			       sizeof(sregs->regs.psw)+sizeof(sregs->regs.gprs));
+	err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs));
 	regs->psw.mask = PSW_MASK_MERGE(old_mask, regs->psw.mask);
 	regs->psw.addr |= PSW_ADDR_AMODE;
 	if (err)
 		return err;
-	err = __copy_from_user(&current->thread.acrs, &sregs->regs.acrs,
-			       sizeof(sregs->regs.acrs));
-	if (err)
-		return err;
+	memcpy(&regs->psw, &user_sregs.regs.psw, sizeof(sregs->regs.psw) +
+	       sizeof(sregs->regs.gprs));
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(sregs->regs.acrs));
 	restore_access_regs(current->thread.acrs);
 
-	err = __copy_from_user(&current->thread.fp_regs, &sregs->fpregs,
-			       sizeof(s390_fp_regs));
+	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+	       sizeof(s390_fp_regs));
 	current->thread.fp_regs.fpc &= FPC_VALID_MASK;
-	if (err)
-		return err;
 
 	restore_fp_regs(&current->thread.fp_regs);
 	regs->trap = -1;	/* disable syscall checks */
@@ -457,6 +452,7 @@
 		case -ERESTART_RESTARTBLOCK:
 			regs->gprs[2] = -EINTR;
 		}
+		regs->trap = -1;	/* Don't deal with this again. */
 	}
 
 	/* Get signal to deliver.  When running under ptrace, at this point
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8e03219..b2e6f4c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,9 +59,6 @@
 extern char vmhalt_cmd[];
 extern char vmpoff_cmd[];
 
-extern void reipl(unsigned long devno);
-extern void reipl_diag(void);
-
 static void smp_ext_bitcall(int, ec_bit_sig);
 static void smp_ext_bitcall_others(ec_bit_sig);
 
@@ -279,12 +276,7 @@
 	 * interrupted by an external interrupt and s390irq
 	 * locks are always held disabled).
 	 */
-	reipl_diag();
-
-	if (MACHINE_IS_VM)
-		cpcmd ("IPL", NULL, 0, NULL);
-	else
-		reipl (0x10000 | S390_lowcore.ipl_device);
+	do_reipl();
 }
 
 void machine_restart_smp(char * __unused) 
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index bde1d1d..c4982c9 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/reboot.h>
+#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -39,6 +40,7 @@
 #include <asm/s390_ext.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
+#include <asm/kdebug.h>
 
 /* Called from entry.S only */
 extern void handle_per_exception(struct pt_regs *regs);
@@ -74,6 +76,20 @@
 static int kstack_depth_to_print = 20;
 #endif /* CONFIG_64BIT */
 
+ATOMIC_NOTIFIER_HEAD(s390die_chain);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&s390die_chain, nb);
+}
+EXPORT_SYMBOL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&s390die_chain, nb);
+}
+EXPORT_SYMBOL(unregister_die_notifier);
+
 /*
  * For show_trace we have tree different stack to consider:
  *   - the panic stack which is used if the kernel stack has overflown
@@ -305,8 +321,9 @@
 #endif
 }
 
-static void inline do_trap(long interruption_code, int signr, char *str,
-                           struct pt_regs *regs, siginfo_t *info)
+static void __kprobes inline do_trap(long interruption_code, int signr,
+					char *str, struct pt_regs *regs,
+					siginfo_t *info)
 {
 	/*
 	 * We got all needed information from the lowcore and can
@@ -315,6 +332,10 @@
         if (regs->psw.mask & PSW_MASK_PSTATE)
 		local_irq_enable();
 
+	if (notify_die(DIE_TRAP, str, regs, interruption_code,
+				interruption_code, signr) == NOTIFY_STOP)
+		return;
+
         if (regs->psw.mask & PSW_MASK_PSTATE) {
                 struct task_struct *tsk = current;
 
@@ -336,8 +357,12 @@
 	return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN);
 }
 
-void do_single_step(struct pt_regs *regs)
+void __kprobes do_single_step(struct pt_regs *regs)
 {
+	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0,
+					SIGTRAP) == NOTIFY_STOP){
+		return;
+	}
 	if ((current->ptrace & PT_PTRACED) != 0)
 		force_sig(SIGTRAP, current);
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index ff5f7bb..af9e69a 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -24,6 +24,7 @@
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
+	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
 	} = 0x0700
@@ -117,7 +118,7 @@
 
   /* Sections to be discarded */
   /DISCARD/ : {
-	*(.exitcall.exit)
+	*(.exit.text) *(.exit.data) *(.exitcall.exit)
 	}
 
   /* Stabs debugging sections.  */
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index e05d087..c42ffed 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -4,6 +4,6 @@
 
 EXTRA_AFLAGS := -traditional
 
-lib-y += delay.o string.o
-lib-y += $(if $(CONFIG_64BIT),uaccess64.o,uaccess.o)
+lib-y += delay.o string.o uaccess_std.o
+lib-$(CONFIG_64BIT) += uaccess_mvcos.o
 lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/uaccess.S b/arch/s390/lib/uaccess.S
deleted file mode 100644
index 8372752..0000000
--- a/arch/s390/lib/uaccess.S
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- *  arch/s390/lib/uaccess.S
- *    __copy_{from|to}_user functions.
- *
- *  s390
- *    Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- *  These functions have standard call interface
- */
-
-#include <linux/errno.h>
-#include <asm/lowcore.h>
-#include <asm/asm-offsets.h>
-
-        .text
-        .align 4
-        .globl __copy_from_user_asm
-	# %r2 = to, %r3 = n, %r4 = from
-__copy_from_user_asm:
-	slr	%r0,%r0
-0:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1f
-	slr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	ahi	%r3,-256
-2:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1b
-3:	slr	%r2,%r2
-	br	%r14
-4:	lhi	%r0,-4096
-	lr	%r5,%r4
-	slr	%r5,%r0
-	nr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcp	0(%r5,%r2),0(%r4),%r0
-	slr	%r3,%r5
-6:	lr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.long	0b,4b
-	.long	2b,4b
-	.long	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_to_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_to_user_asm:
-	slr	%r0,%r0
-0:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1f
-	slr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	ahi	%r3,-256
-2:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1b
-3:	slr	%r2,%r2
-	br	%r14
-4:	lhi	%r0,-4096
-	lr	%r5,%r4
-	slr	%r5,%r0
-	nr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcs	0(%r5,%r4),0(%r2),%r0
-	slr	%r3,%r5
-6:	lr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.long	0b,4b
-	.long	2b,4b
-	.long	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_in_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_in_user_asm:
-	ahi	%r3,-1
-	jo	6f
-	sacf	256
-	bras	%r1,4f
-0:	ahi	%r3,257
-1:	mvc	0(1,%r4),0(%r2)
-	la	%r2,1(%r2)
-	la	%r4,1(%r4)
-	ahi	%r3,-1
-	jnz	1b
-2:	lr	%r2,%r3
-	br	%r14
-3:	mvc	0(256,%r4),0(%r2)
-	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-4:	ahi	%r3,-256
-	jnm	3b
-5:	ex	%r3,4(%r1)
-	sacf	0
-6:	slr	%r2,%r2
-	br	%r14
-        .section __ex_table,"a"
-	.long	1b,2b
-	.long	3b,0b
-	.long	5b,0b
-        .previous
-
-        .align 4
-        .text
-        .globl __clear_user_asm
-	# %r2 = to, %r3 = n
-__clear_user_asm:
-	bras	%r5,0f
-	.long	empty_zero_page
-0:	l	%r5,0(%r5)
-	slr	%r0,%r0
-1:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2f
-	slr	%r2,%r2
-	br	%r14
-2:	la	%r2,256(%r2)
-	ahi	%r3,-256
-3:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2b
-4:	slr	%r2,%r2
-	br	%r14
-5:	lhi	%r0,-4096
-	lr	%r4,%r2
-	slr	%r4,%r0
-	nr	%r4,%r0		# %r4 = (%r2 + 4096) & -4096
-	slr	%r4,%r2		# %r4 = #bytes to next user page boundary
-	clr	%r3,%r4		# clear crosses next page boundary ?
-	jnh	7f		# no, the current page faulted
-	# clear with the reduced length which is < 256
-6:	mvcs	0(%r4,%r2),0(%r5),%r0
-	slr	%r3,%r4
-7:	lr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.long	1b,5b
-	.long	3b,5b
-	.long	6b,7b
-        .previous
-
-        .align 4
-        .text
-        .globl __strncpy_from_user_asm
-	# %r2 = count, %r3 = dst, %r4 = src
-__strncpy_from_user_asm:
-	lhi	%r0,0
-	lr	%r1,%r4
-	la	%r4,0(%r4)	# clear high order bit from %r4
-	la	%r2,0(%r2,%r4)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	lr	%r1,%r2
-	jh	1f		# \0 found in string ?
-	ahi	%r1,1		# include \0 in copy
-1:	slr	%r1,%r4		# %r1 = copy length (without \0)
-	slr	%r2,%r4		# %r2 = return length (including \0)
-2:	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3f
-	br	%r14
-3:	la	%r3,256(%r3)
-	la	%r4,256(%r4)
-	ahi	%r1,-256
-	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3b
-	br	%r14
-4:	sacf	0
-	lhi	%r2,-EFAULT
-	br	%r14
-	.section __ex_table,"a"
-	.long	0b,4b
-	.previous
-
-        .align 4
-        .text
-        .globl __strnlen_user_asm
-	# %r2 = count, %r3 = src
-__strnlen_user_asm:
-	lhi	%r0,0
-	lr	%r1,%r3
-	la	%r3,0(%r3)	# clear high order bit from %r4
-	la	%r2,0(%r2,%r3)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	ahi	%r2,1		# strnlen_user result includes the \0
-				# or return count+1 if \0 not found
-	slr	%r2,%r3
-	br	%r14
-2:	sacf	0
-	slr	%r2,%r2		# return 0 on exception
-	br	%r14
-	.section __ex_table,"a"
-	.long	0b,2b
-	.previous
diff --git a/arch/s390/lib/uaccess64.S b/arch/s390/lib/uaccess64.S
deleted file mode 100644
index 1f755be..0000000
--- a/arch/s390/lib/uaccess64.S
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- *  arch/s390x/lib/uaccess.S
- *    __copy_{from|to}_user functions.
- *
- *  s390
- *    Copyright (C) 2000,2002 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Authors(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- *  These functions have standard call interface
- */
-
-#include <linux/errno.h>
-#include <asm/lowcore.h>
-#include <asm/asm-offsets.h>
-
-        .text
-        .align 4
-        .globl __copy_from_user_asm
-	# %r2 = to, %r3 = n, %r4 = from
-__copy_from_user_asm:
-	slgr	%r0,%r0
-0:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1f
-	slgr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	aghi	%r3,-256
-2:	mvcp	0(%r3,%r2),0(%r4),%r0
-	jnz	1b
-3:	slgr	%r2,%r2
-	br	%r14
-4:	lghi	%r0,-4096
-	lgr	%r5,%r4
-	slgr	%r5,%r0
-	ngr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slgr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clgr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcp	0(%r5,%r2),0(%r4),%r0
-	slgr	%r3,%r5
-6:	lgr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.quad	0b,4b
-	.quad	2b,4b
-	.quad	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_to_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_to_user_asm:
-	slgr	%r0,%r0
-0:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1f
-	slgr	%r2,%r2
-	br	%r14
-1:	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-	aghi	%r3,-256
-2:	mvcs	0(%r3,%r4),0(%r2),%r0
-	jnz	1b
-3:	slgr	%r2,%r2
-	br	%r14
-4:	lghi	%r0,-4096
-	lgr	%r5,%r4
-	slgr	%r5,%r0
-	ngr	%r5,%r0		# %r5 = (%r4 + 4096) & -4096
-	slgr	%r5,%r4		# %r5 = #bytes to next user page boundary
-	clgr	%r3,%r5		# copy crosses next page boundary ?
-	jnh	6f		# no, the current page faulted
-	# move with the reduced length which is < 256
-5:	mvcs	0(%r5,%r4),0(%r2),%r0
-	slgr	%r3,%r5
-6:	lgr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.quad	0b,4b
-	.quad	2b,4b
-	.quad	5b,6b
-        .previous
-
-        .align 4
-        .text
-        .globl __copy_in_user_asm
-	# %r2 = from, %r3 = n, %r4 = to
-__copy_in_user_asm:
-	aghi	%r3,-1
-	jo	6f
-	sacf	256
-	bras	%r1,4f
-0:	aghi	%r3,257
-1:	mvc	0(1,%r4),0(%r2)
-	la	%r2,1(%r2)
-	la	%r4,1(%r4)
-	aghi	%r3,-1
-	jnz	1b
-2:	lgr	%r2,%r3
-	br	%r14
-3:	mvc	0(256,%r4),0(%r2)
-	la	%r2,256(%r2)
-	la	%r4,256(%r4)
-4:	aghi	%r3,-256
-	jnm	3b
-5:	ex	%r3,4(%r1)
-	sacf	0
-6:	slgr	%r2,%r2
-	br	14
-        .section __ex_table,"a"
-	.quad	1b,2b
-	.quad	3b,0b
-	.quad	5b,0b
-        .previous
-
-        .align 4
-        .text
-        .globl __clear_user_asm
-	# %r2 = to, %r3 = n
-__clear_user_asm:
-	slgr	%r0,%r0
-	larl	%r5,empty_zero_page
-1:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2f
-	slgr	%r2,%r2
-	br	%r14
-2:	la	%r2,256(%r2)
-	aghi	%r3,-256
-3:	mvcs	0(%r3,%r2),0(%r5),%r0
-	jnz	2b
-4:	slgr	%r2,%r2
-	br	%r14
-5:	lghi	%r0,-4096
-	lgr	%r4,%r2
-	slgr	%r4,%r0
-	ngr	%r4,%r0		# %r4 = (%r2 + 4096) & -4096
-	slgr	%r4,%r2		# %r4 = #bytes to next user page boundary
-	clgr	%r3,%r4		# clear crosses next page boundary ?
-	jnh	7f		# no, the current page faulted
-	# clear with the reduced length which is < 256
-6:	mvcs	0(%r4,%r2),0(%r5),%r0
-	slgr	%r3,%r4
-7:	lgr	%r2,%r3
-	br	%r14
-        .section __ex_table,"a"
-	.quad	1b,5b
-	.quad	3b,5b
-	.quad	6b,7b
-        .previous
-
-        .align 4
-        .text
-        .globl __strncpy_from_user_asm
-	# %r2 = count, %r3 = dst, %r4 = src
-__strncpy_from_user_asm:
-	lghi	%r0,0
-	lgr	%r1,%r4
-	la	%r2,0(%r2,%r4)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	lgr	%r1,%r2
-	jh	1f		# \0 found in string ?
-	aghi	%r1,1		# include \0 in copy
-1:	slgr	%r1,%r4		# %r1 = copy length (without \0)
-	slgr	%r2,%r4		# %r2 = return length (including \0)
-2:	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3f
-	br	%r14
-3:	la	%r3,256(%r3)
-	la	%r4,256(%r4)
-	aghi	%r1,-256
-	mvcp	0(%r1,%r3),0(%r4),%r0
-	jnz	3b
-	br	%r14
-4:	sacf	0
-	lghi	%r2,-EFAULT
-	br	%r14
-	.section __ex_table,"a"
-	.quad	0b,4b
-	.previous
-
-        .align 4
-        .text
-        .globl __strnlen_user_asm
-	# %r2 = count, %r3 = src
-__strnlen_user_asm:
-	lghi	%r0,0
-	lgr	%r1,%r3
-	la	%r2,0(%r2,%r3)	# %r2 points to first byte after string
-	sacf	256
-0:	srst	%r2,%r1
-	jo	0b
-	sacf	0
-	aghi	%r2,1		# strnlen_user result includes the \0
-				# or return count+1 if \0 not found
-	slgr	%r2,%r3
-	br	%r14
-2:	sacf	0
-	slgr	%r2,%r2		# return 0 on exception
-	br	%r14
-	.section __ex_table,"a"
-	.quad	0b,2b
-	.previous
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
new file mode 100644
index 0000000..86c96d6
--- /dev/null
+++ b/arch/s390/lib/uaccess_mvcos.c
@@ -0,0 +1,156 @@
+/*
+ *  arch/s390/lib/uaccess_mvcos.c
+ *
+ *  Optimized user space space access functions based on mvcos.
+ *
+ *    Copyright (C) IBM Corp. 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+#include <asm/futex.h>
+
+#ifndef __s390x__
+#define AHI	"ahi"
+#define ALR	"alr"
+#define CLR	"clr"
+#define LHI	"lhi"
+#define SLR	"slr"
+#else
+#define AHI	"aghi"
+#define ALR	"algr"
+#define CLR	"clgr"
+#define LHI	"lghi"
+#define SLR	"slgr"
+#endif
+
+size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x)
+{
+	register unsigned long reg0 asm("0") = 0x81UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+		"   jz    4f\n"
+		"1:"ALR"  %0,%3\n"
+		"  "SLR"  %1,%3\n"
+		"  "SLR"  %2,%3\n"
+		"   j     0b\n"
+		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
+		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+		"  "SLR"  %0,%4\n"
+		"   j     5f\n"
+		"4:"SLR"  %0,%0\n"
+		"5: \n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x)
+{
+	register unsigned long reg0 asm("0") = 0x810000UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+		"   jz    4f\n"
+		"1:"ALR"  %0,%3\n"
+		"  "SLR"  %1,%3\n"
+		"  "SLR"  %2,%3\n"
+		"   j     0b\n"
+		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
+		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+		"  "SLR"  %0,%4\n"
+		"   j     5f\n"
+		"4:"SLR"  %0,%0\n"
+		"5: \n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t copy_in_user_mvcos(size_t size, void __user *to, const void __user *from)
+{
+	register unsigned long reg0 asm("0") = 0x810081UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	/* FIXME: copy with reduced length. */
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+		"   jz    2f\n"
+		"1:"ALR"  %0,%3\n"
+		"  "SLR"  %1,%3\n"
+		"  "SLR"  %2,%3\n"
+		"   j     0b\n"
+		"2:"SLR"  %0,%0\n"
+		"3: \n"
+		EX_TABLE(0b,3b)
+		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t clear_user_mvcos(size_t size, void __user *to)
+{
+	register unsigned long reg0 asm("0") = 0x810000UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+		"   jz    4f\n"
+		"1:"ALR"  %0,%2\n"
+		"  "SLR"  %1,%2\n"
+		"   j     0b\n"
+		"2: la    %3,4095(%1)\n"/* %4 = to + 4095 */
+		"   nr    %3,%2\n"	/* %4 = (to + 4095) & -4096 */
+		"  "SLR"  %3,%1\n"
+		"  "CLR"  %0,%3\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+		"  "SLR"  %0,%3\n"
+		"   j     5f\n"
+		"4:"SLR"  %0,%0\n"
+		"5: \n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+		: "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+extern size_t copy_from_user_std_small(size_t, const void __user *, void *);
+extern size_t copy_to_user_std_small(size_t, void __user *, const void *);
+extern size_t strnlen_user_std(size_t, const char __user *);
+extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
+extern int futex_atomic_op(int, int __user *, int, int *);
+extern int futex_atomic_cmpxchg(int __user *, int, int);
+
+struct uaccess_ops uaccess_mvcos = {
+	.copy_from_user = copy_from_user_mvcos,
+	.copy_from_user_small = copy_from_user_std_small,
+	.copy_to_user = copy_to_user_mvcos,
+	.copy_to_user_small = copy_to_user_std_small,
+	.copy_in_user = copy_in_user_mvcos,
+	.clear_user = clear_user_mvcos,
+	.strnlen_user = strnlen_user_std,
+	.strncpy_from_user = strncpy_from_user_std,
+	.futex_atomic_op = futex_atomic_op,
+	.futex_atomic_cmpxchg = futex_atomic_cmpxchg,
+};
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
new file mode 100644
index 0000000..9a4d4a2
--- /dev/null
+++ b/arch/s390/lib/uaccess_std.c
@@ -0,0 +1,340 @@
+/*
+ *  arch/s390/lib/uaccess_std.c
+ *
+ *  Standard user space access functions based on mvcp/mvcs and doing
+ *  interesting things in the secondary space mode.
+ *
+ *    Copyright (C) IBM Corp. 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+#include <asm/futex.h>
+
+#ifndef __s390x__
+#define AHI	"ahi"
+#define ALR	"alr"
+#define CLR	"clr"
+#define LHI	"lhi"
+#define SLR	"slr"
+#else
+#define AHI	"aghi"
+#define ALR	"algr"
+#define CLR	"clgr"
+#define LHI	"lghi"
+#define SLR	"slgr"
+#endif
+
+size_t copy_from_user_std(size_t size, const void __user *ptr, void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -256UL;
+	asm volatile(
+		"0: mvcp  0(%0,%2),0(%1),%3\n"
+		"   jz    5f\n"
+		"1:"ALR"  %0,%3\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"2: mvcp  0(%0,%2),0(%1),%3\n"
+		"   jnz   1b\n"
+		"   j     5f\n"
+		"3: la    %4,255(%1)\n"	/* %4 = ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   6f\n"
+		"4: mvcp  0(%4,%2),0(%1),%3\n"
+		"  "SLR"  %0,%4\n"
+		"   j     6f\n"
+		"5:"SLR"  %0,%0\n"
+		"6: \n"
+		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_from_user_std_small(size_t size, const void __user *ptr, void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = 0UL;
+	asm volatile(
+		"0: mvcp  0(%0,%2),0(%1),%3\n"
+		"  "SLR"  %0,%0\n"
+		"   j     3f\n"
+		"1: la    %4,255(%1)\n" /* %4 = ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   3f\n"
+		"2: mvcp  0(%4,%2),0(%1),%3\n"
+		"  "SLR"  %0,%4\n"
+		"3:\n"
+		EX_TABLE(0b,1b) EX_TABLE(2b,3b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_to_user_std(size_t size, void __user *ptr, const void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -256UL;
+	asm volatile(
+		"0: mvcs  0(%0,%1),0(%2),%3\n"
+		"   jz    5f\n"
+		"1:"ALR"  %0,%3\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"2: mvcs  0(%0,%1),0(%2),%3\n"
+		"   jnz   1b\n"
+		"   j     5f\n"
+		"3: la    %4,255(%1)\n" /* %4 = ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   6f\n"
+		"4: mvcs  0(%4,%1),0(%2),%3\n"
+		"  "SLR"  %0,%4\n"
+		"   j     6f\n"
+		"5:"SLR"  %0,%0\n"
+		"6: \n"
+		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_to_user_std_small(size_t size, void __user *ptr, const void *x)
+{
+	unsigned long tmp1, tmp2;
+
+	tmp1 = 0UL;
+	asm volatile(
+		"0: mvcs  0(%0,%1),0(%2),%3\n"
+		"  "SLR"  %0,%0\n"
+		"   j     3f\n"
+		"1: la    %4,255(%1)\n" /* ptr + 255 */
+		"  "LHI"  %3,-4096\n"
+		"   nr    %4,%3\n"	/* (ptr + 255) & -4096UL */
+		"  "SLR"  %4,%1\n"
+		"  "CLR"  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   3f\n"
+		"2: mvcs  0(%4,%1),0(%2),%3\n"
+		"  "SLR"  %0,%4\n"
+		"3:\n"
+		EX_TABLE(0b,1b) EX_TABLE(2b,3b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t copy_in_user_std(size_t size, void __user *to, const void __user *from)
+{
+	unsigned long tmp1;
+
+	asm volatile(
+		"  "AHI"  %0,-1\n"
+		"   jo    5f\n"
+		"   sacf  256\n"
+		"   bras  %3,3f\n"
+		"0:"AHI"  %0,257\n"
+		"1: mvc   0(1,%1),0(%2)\n"
+		"   la    %1,1(%1)\n"
+		"   la    %2,1(%2)\n"
+		"  "AHI"  %0,-1\n"
+		"   jnz   1b\n"
+		"   j     5f\n"
+		"2: mvc   0(256,%1),0(%2)\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"3:"AHI"  %0,-256\n"
+		"   jnm   2b\n"
+		"4: ex    %0,1b-0b(%3)\n"
+		"   sacf  0\n"
+		"5: "SLR"  %0,%0\n"
+		"6:\n"
+		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+		: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t clear_user_std(size_t size, void __user *to)
+{
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"  "AHI"  %0,-1\n"
+		"   jo    5f\n"
+		"   sacf  256\n"
+		"   bras  %3,3f\n"
+		"   xc    0(1,%1),0(%1)\n"
+		"0:"AHI"  %0,257\n"
+		"   la    %2,255(%1)\n" /* %2 = ptr + 255 */
+		"   srl   %2,12\n"
+		"   sll   %2,12\n"	/* %2 = (ptr + 255) & -4096 */
+		"  "SLR"  %2,%1\n"
+		"  "CLR"  %0,%2\n"	/* clear crosses next page boundary? */
+		"   jnh   5f\n"
+		"  "AHI"  %2,-1\n"
+		"1: ex    %2,0(%3)\n"
+		"  "AHI"  %2,1\n"
+		"  "SLR"  %0,%2\n"
+		"   j     5f\n"
+		"2: xc    0(256,%1),0(%1)\n"
+		"   la    %1,256(%1)\n"
+		"3:"AHI"  %0,-256\n"
+		"   jnm   2b\n"
+		"4: ex    %0,0(%3)\n"
+		"   sacf  0\n"
+		"5: "SLR"  %0,%0\n"
+		"6:\n"
+		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+		: "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+size_t strnlen_user_std(size_t size, const char __user *src)
+{
+	register unsigned long reg0 asm("0") = 0UL;
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"   la    %2,0(%1)\n"
+		"   la    %3,0(%0,%1)\n"
+		"  "SLR"  %0,%0\n"
+		"   sacf  256\n"
+		"0: srst  %3,%2\n"
+		"   jo    0b\n"
+		"   la    %0,1(%3)\n"	/* strnlen_user results includes \0 */
+		"  "SLR"  %0,%1\n"
+		"1: sacf  0\n"
+		EX_TABLE(0b,1b)
+		: "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
+{
+	register unsigned long reg0 asm("0") = 0UL;
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"   la    %3,0(%1)\n"
+		"   la    %4,0(%0,%1)\n"
+		"   sacf  256\n"
+		"0: srst  %4,%3\n"
+		"   jo    0b\n"
+		"   sacf  0\n"
+		"   la    %0,0(%4)\n"
+		"   jh    1f\n"		/* found \0 in string ? */
+		"  "AHI"  %4,1\n"	/* include \0 in copy */
+		"1:"SLR"  %0,%1\n"	/* %0 = return length (without \0) */
+		"  "SLR"  %4,%1\n"	/* %4 = copy length (including \0) */
+		"2: mvcp  0(%4,%2),0(%1),%5\n"
+		"   jz    9f\n"
+		"3:"AHI"  %4,-256\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"4: mvcp  0(%4,%2),0(%1),%5\n"
+		"   jnz   3b\n"
+		"   j     9f\n"
+		"7: sacf  0\n"
+		"8:"LHI"  %0,%6\n"
+		"9:\n"
+		EX_TABLE(0b,7b) EX_TABLE(2b,8b) EX_TABLE(4b,8b)
+		: "+a" (size), "+a" (src), "+d" (dst), "=a" (tmp1), "=a" (tmp2)
+		: "d" (reg0), "K" (-EFAULT) : "cc", "memory");
+	return size;
+}
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
+	asm volatile(							\
+		"   sacf  256\n"					\
+		"0: l     %1,0(%6)\n"					\
+		"1:"insn						\
+		"2: cs    %1,%2,0(%6)\n"				\
+		"3: jl    1b\n"						\
+		"   lhi   %0,0\n"					\
+		"4: sacf  0\n"						\
+		EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b)		\
+		: "=d" (ret), "=&d" (oldval), "=&d" (newval),		\
+		  "=m" (*uaddr)						\
+		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
+		  "m" (*uaddr) : "cc");
+
+int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old)
+{
+	int oldval = 0, newval, ret;
+
+	inc_preempt_count();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("lr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+	dec_preempt_count();
+	*old = oldval;
+	return ret;
+}
+
+int futex_atomic_cmpxchg(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	asm volatile(
+		"   sacf 256\n"
+		"   cs   %1,%4,0(%5)\n"
+		"0: lr   %0,%1\n"
+		"1: sacf 0\n"
+		EX_TABLE(0b,1b)
+		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+		: "cc", "memory" );
+	return ret;
+}
+
+struct uaccess_ops uaccess_std = {
+	.copy_from_user = copy_from_user_std,
+	.copy_from_user_small = copy_from_user_std_small,
+	.copy_to_user = copy_to_user_std,
+	.copy_to_user_small = copy_to_user_std_small,
+	.copy_in_user = copy_in_user_std,
+	.clear_user = clear_user_std,
+	.strnlen_user = strnlen_user_std,
+	.strncpy_from_user = strncpy_from_user_std,
+	.futex_atomic_op = futex_atomic_op,
+	.futex_atomic_cmpxchg = futex_atomic_cmpxchg,
+};
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index ceea51c..786a44d 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -53,22 +53,6 @@
 static void cmm_set_timer(void);
 
 static long
-cmm_strtoul(const char *cp, char **endp)
-{
-	unsigned int base = 10;
-
-	if (*cp == '0') {
-		base = 8;
-		cp++;
-		if ((*cp == 'x' || *cp == 'X') && isxdigit(cp[1])) {
-			base = 16;
-			cp++;
-		}
-	}
-	return simple_strtoul(cp, endp, base);
-}
-
-static long
 cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list)
 {
 	struct cmm_page_array *pa;
@@ -276,7 +260,7 @@
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = cmm_strtoul(p, &p);
+		pages = simple_strtoul(p, &p, 0);
 		if (ctl == &cmm_table[0])
 			cmm_set_pages(pages);
 		else
@@ -317,9 +301,9 @@
 			return -EFAULT;
 		buf[sizeof(buf) - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
-		pages = cmm_strtoul(p, &p);
+		pages = simple_strtoul(p, &p, 0);
 		cmm_skip_blanks(p, &p);
-		seconds = cmm_strtoul(p, &p);
+		seconds = simple_strtoul(p, &p, 0);
 		cmm_set_timeout(pages, seconds);
 	} else {
 		len = sprintf(buf, "%ld %ld\n",
@@ -382,24 +366,24 @@
 	if (strncmp(msg, "SHRINK", 6) == 0) {
 		if (!cmm_skip_blanks(msg + 6, &msg))
 			return;
-		pages = cmm_strtoul(msg, &msg);
+		pages = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
 			cmm_set_pages(pages);
 	} else if (strncmp(msg, "RELEASE", 7) == 0) {
 		if (!cmm_skip_blanks(msg + 7, &msg))
 			return;
-		pages = cmm_strtoul(msg, &msg);
+		pages = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
 			cmm_add_timed_pages(pages);
 	} else if (strncmp(msg, "REUSE", 5) == 0) {
 		if (!cmm_skip_blanks(msg + 5, &msg))
 			return;
-		pages = cmm_strtoul(msg, &msg);
+		pages = simple_strtoul(msg, &msg, 0);
 		if (!cmm_skip_blanks(msg, &msg))
 			return;
-		seconds = cmm_strtoul(msg, &msg);
+		seconds = simple_strtoul(msg, &msg, 0);
 		cmm_skip_blanks(msg, &msg);
 		if (*msg == '\0')
 			cmm_set_timeout(pages, seconds);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7cd8257..44f0cda 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -25,10 +25,12 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/kdebug.h>
 
 #ifndef CONFIG_64BIT
 #define __FAIL_ADDR_MASK 0x7ffff000
@@ -48,6 +50,38 @@
 
 extern void die(const char *,struct pt_regs *,long);
 
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.str = str,
+		.err = err,
+		.trapnr = trap,
+		.signr = sig
+	};
+	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig)
+{
+	return NOTIFY_DONE;
+}
+#endif
+
 extern spinlock_t timerlist_lock;
 
 /*
@@ -159,7 +193,7 @@
  *   11       Page translation     ->  Not present       (nullification)
  *   3b       Region third trans.  ->  Not present       (nullification)
  */
-static inline void
+static inline void __kprobes
 do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
 {
         struct task_struct *tsk;
@@ -173,6 +207,10 @@
         tsk = current;
         mm = tsk->mm;
 	
+	if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+					SIGSEGV) == NOTIFY_STOP)
+		return;
+
 	/* 
          * Check for low-address protection.  This needs to be treated
 	 * as a special case because the translation exception code 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6e6b6de..cfd9b8f 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -108,16 +108,23 @@
         unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
         static const int ssm_mask = 0x04000000L;
 	unsigned long ro_start_pfn, ro_end_pfn;
+	unsigned long zones_size[MAX_NR_ZONES];
 
 	ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
 	ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
 
+	memset(zones_size, 0, sizeof(zones_size));
+	zones_size[ZONE_DMA] = max_low_pfn;
+	free_area_init_node(0, &contig_page_data, zones_size,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT,
+			    zholes_size);
+
 	/* unmap whole virtual address space */
 	
         pg_dir = swapper_pg_dir;
 
-	for (i=0;i<KERNEL_PGD_PTRS;i++) 
-	        pmd_clear((pmd_t*)pg_dir++);
+	for (i = 0; i < PTRS_PER_PGD; i++)
+		pmd_clear((pmd_t *) pg_dir++);
 		
 	/*
 	 * map whole physical memory to virtual memory (identity mapping) 
@@ -131,10 +138,7 @@
                  */
 		pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 
-                pg_dir->pgd0 =  (_PAGE_TABLE | __pa(pg_table));
-                pg_dir->pgd1 =  (_PAGE_TABLE | (__pa(pg_table)+1024));
-                pg_dir->pgd2 =  (_PAGE_TABLE | (__pa(pg_table)+2048));
-                pg_dir->pgd3 =  (_PAGE_TABLE | (__pa(pg_table)+3072));
+		pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
                 pg_dir++;
 
                 for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
@@ -143,8 +147,8 @@
 			else
 				pte = pfn_pte(pfn, PAGE_KERNEL);
                         if (pfn >= max_low_pfn)
-                                pte_clear(&init_mm, 0, &pte);
-                        set_pte(pg_table, pte);
+				pte_val(pte) = _PAGE_TYPE_EMPTY;
+			set_pte(pg_table, pte);
                         pfn++;
                 }
         }
@@ -159,16 +163,6 @@
 			     : : "m" (pgdir_k), "m" (ssm_mask));
 
         local_flush_tlb();
-
-	{
-		unsigned long zones_size[MAX_NR_ZONES];
-
-		memset(zones_size, 0, sizeof(zones_size));
-		zones_size[ZONE_DMA] = max_low_pfn;
-		free_area_init_node(0, &contig_page_data, zones_size,
-				    __pa(PAGE_OFFSET) >> PAGE_SHIFT,
-				    zholes_size);
-	}
         return;
 }
 
@@ -236,10 +230,8 @@
 					pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
 				else
 					pte = pfn_pte(pfn, PAGE_KERNEL);
-                                if (pfn >= max_low_pfn) {
-                                        pte_clear(&init_mm, 0, &pte); 
-                                        continue;
-                                }
+				if (pfn >= max_low_pfn)
+					pte_val(pte) = _PAGE_TYPE_EMPTY;
                                 set_pte(pt_dir, pte);
                                 pfn++;
                         }
diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c
index 81c0cbd..75ac24d 100644
--- a/arch/sparc/kernel/ebus.c
+++ b/arch/sparc/kernel/ebus.c
@@ -277,7 +277,7 @@
 	struct pci_dev *pdev;
 	struct pcidev_cookie *cookie;
 	struct device_node *dp;
-	unsigned long addr, *base;
+	struct resource *p;
 	unsigned short pci_command;
 	int len, reg, nreg;
 	int num_ebus = 0;
@@ -321,13 +321,12 @@
 		}
 		nreg = len / sizeof(struct linux_prom_pci_registers);
 
-		base = &ebus->self->resource[0].start;
+		p = &ebus->self->resource[0];
 		for (reg = 0; reg < nreg; reg++) {
 			if (!(regs[reg].which_io & 0x03000000))
 				continue;
 
-			addr = regs[reg].phys_lo;
-			*base++ = addr;
+			(p++)->start = regs[reg].phys_lo;
 		}
 
 		ebus->ofdev.node = dp;
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 8654b44..d33f8a0 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -508,6 +508,7 @@
 
 void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 {
+#ifndef CONFIG_SUN4
 	struct device_node *parent = dp->parent;
 
 	if (sparc_cpu_model != sun4d &&
@@ -524,6 +525,7 @@
 
 		iounit_init(dp->node, parent->node, sbus);
 	}
+#endif
 }
 
 void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp)
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 43d9229..51cf602 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.18-rc2
-# Fri Jul 21 14:19:24 2006
+# Linux kernel version: 2.6.18
+# Sat Sep 23 18:32:19 2006
 #
 CONFIG_SPARC=y
 CONFIG_SPARC64=y
@@ -9,6 +9,7 @@
 CONFIG_MMU=y
 CONFIG_TIME_INTERPOLATION=y
 CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_AUDIT_ARCH=y
 CONFIG_SPARC64_PAGE_SIZE_8KB=y
 # CONFIG_SPARC64_PAGE_SIZE_64KB is not set
 # CONFIG_SPARC64_PAGE_SIZE_512KB is not set
@@ -37,14 +38,14 @@
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_RELAY=y
 CONFIG_INITRAMFS_SOURCE=""
-CONFIG_UID16=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_EMBEDDED is not set
+CONFIG_UID16=y
+CONFIG_SYSCTL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -53,12 +54,12 @@
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
-CONFIG_RT_MUTEXES=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SLAB=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
@@ -169,6 +170,7 @@
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 CONFIG_XFRM_USER=m
+# CONFIG_XFRM_SUB_POLICY is not set
 CONFIG_NET_KEY=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
@@ -214,11 +216,15 @@
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+# CONFIG_IPV6_MIP6 is not set
 CONFIG_INET6_XFRM_TUNNEL=m
 CONFIG_INET6_TUNNEL=m
 CONFIG_INET6_XFRM_MODE_TRANSPORT=m
 CONFIG_INET6_XFRM_MODE_TUNNEL=m
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
 CONFIG_IPV6_TUNNEL=m
+# CONFIG_IPV6_SUBTREES is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
 # CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
 
@@ -233,6 +239,7 @@
 # DCCP CCIDs Configuration (EXPERIMENTAL)
 #
 CONFIG_IP_DCCP_CCID2=m
+# CONFIG_IP_DCCP_CCID2_DEBUG is not set
 CONFIG_IP_DCCP_CCID3=m
 CONFIG_IP_DCCP_TFRC_LIB=m
 
@@ -259,7 +266,6 @@
 # CONFIG_ATALK is not set
 # CONFIG_X25 is not set
 # CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 
@@ -1386,6 +1392,10 @@
 # Cryptographic options
 #
 CONFIG_CRYPTO=y
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_MANAGER=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_NULL=m
 CONFIG_CRYPTO_MD4=y
@@ -1395,9 +1405,12 @@
 CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_DES=y
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_CAST5=m
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 094d3e3..b0b4fee 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -983,7 +983,7 @@
 };
 
 /* The quotient formula is taken from the IA64 port. */
-#define SPARC64_NSEC_PER_CYC_SHIFT	30UL
+#define SPARC64_NSEC_PER_CYC_SHIFT	10UL
 void __init time_init(void)
 {
 	unsigned long clock = sparc64_init_timers();
diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile
index 426d20f..15b538a 100644
--- a/arch/x86_64/crypto/Makefile
+++ b/arch/x86_64/crypto/Makefile
@@ -5,5 +5,8 @@
 # 
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 
 aes-x86_64-y := aes-x86_64-asm.o aes.o
+twofish-x86_64-y := twofish-x86_64-asm.o twofish.o
+
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
index 68866fa..5cdb13e 100644
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
@@ -228,13 +228,14 @@
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	u32 i, j, t, u, v, w;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
+	if (key_len % 8) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
diff --git a/arch/x86_64/crypto/twofish-x86_64-asm.S b/arch/x86_64/crypto/twofish-x86_64-asm.S
new file mode 100644
index 0000000..35974a5
--- /dev/null
+++ b/arch/x86_64/crypto/twofish-x86_64-asm.S
@@ -0,0 +1,324 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-x86_64-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+#define a_offset	0
+#define b_offset	4
+#define c_offset	8
+#define d_offset	12
+
+/* Structure of the crypto context struct*/
+
+#define s0	0	/* S0 Array 256 Words each */
+#define s1	1024	/* S1 Array */
+#define s2	2048	/* S2 Array */
+#define s3	3072	/* S3 Array */
+#define w	4096	/* 8 whitening keys (word) */
+#define k	4128	/* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0     %rax
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1     %rbx
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2     %rcx
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3     %rdx
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+	xor	w+offset(context),	src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+	xor	w+16+offset(context),	src;
+
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+	movzx	b ## B,		%edi;\
+	mov	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$15,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	rol	$15,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;
+
+/*
+ * a input register containing a(rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * during the round a and b are prepared for the output whitening
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+	mov	b ## D,		%r10d;\
+	shl	$32,		%r10;\
+	movzx	b ## B,		%edi;\
+	mov	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	mov	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	(%r11,%rdi,4),	%r9d;\
+	xor	a,		%r10;\
+	movzx	b ## H,		%edi;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	ror	$1,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c (already rol $1)
+ * d input register containing d
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+	movzx	a ## B,		%edi;\
+	mov	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$16,		a ## D;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	ror	$15,		a ## D;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;\
+	rol	$15,		d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b
+ * c input register containing c (already rol $1)
+ * d input register containing d
+ * operations on a and b are interleaved to increase performance
+ * during the round a and b are prepared for the output whitening
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+	movzx	a ## B,		%edi;\
+	mov	(%r11,%rdi,4),	%r9d;\
+	movzx	b ## B,		%edi;\
+	mov	s3(%r11,%rdi,4),%r8d;\
+	movzx	b ## H,		%edi;\
+	ror	$16,		b ## D;\
+	xor	(%r11,%rdi,4),	%r8d;\
+	movzx	a ## H,		%edi;\
+	mov	b ## D,		%r10d;\
+	shl	$32,		%r10;\
+	xor	a,		%r10;\
+	ror	$16,		a ## D;\
+	xor	s1(%r11,%rdi,4),%r9d;\
+	movzx	b ## B,		%edi;\
+	xor	s1(%r11,%rdi,4),%r8d;\
+	movzx	a ## B,		%edi;\
+	xor	s2(%r11,%rdi,4),%r9d;\
+	movzx	b ## H,		%edi;\
+	xor	s2(%r11,%rdi,4),%r8d;\
+	movzx	a ## H,		%edi;\
+	xor	s3(%r11,%rdi,4),%r9d;\
+	add	%r8d,		%r9d;\
+	add	%r9d,		%r8d;\
+	add	k+round(%r11),	%r9d;\
+	xor	%r9d,		c ## D;\
+	add	k+4+round(%r11),%r8d;\
+	xor	%r8d,		d ## D;\
+	ror	$1,		d ## D;
+
+.align 8
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+	pushq    R1
+
+	/* %rdi contains the crypto tfm adress */
+	/* %rsi contains the output adress */
+	/* %rdx contains the input adress */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
+	/* ctx adress is moved to free one non-rex register
+	as target for the 8bit high operations */
+	mov	%rdi,		%r11
+
+	movq	(R3),	R1
+	movq	8(R3),	R3
+	input_whitening(R1,%r11,a_offset)
+	input_whitening(R3,%r11,c_offset)
+	mov	R1D,	R0D
+	rol	$16,	R0D
+	shr	$32,	R1
+	mov	R3D,	R2D
+	shr	$32,	R3
+	rol	$1,	R3D
+
+	encrypt_round(R0,R1,R2,R3,0);
+	encrypt_round(R2,R3,R0,R1,8);
+	encrypt_round(R0,R1,R2,R3,2*8);
+	encrypt_round(R2,R3,R0,R1,3*8);
+	encrypt_round(R0,R1,R2,R3,4*8);
+	encrypt_round(R2,R3,R0,R1,5*8);
+	encrypt_round(R0,R1,R2,R3,6*8);
+	encrypt_round(R2,R3,R0,R1,7*8);
+	encrypt_round(R0,R1,R2,R3,8*8);
+	encrypt_round(R2,R3,R0,R1,9*8);
+	encrypt_round(R0,R1,R2,R3,10*8);
+	encrypt_round(R2,R3,R0,R1,11*8);
+	encrypt_round(R0,R1,R2,R3,12*8);
+	encrypt_round(R2,R3,R0,R1,13*8);
+	encrypt_round(R0,R1,R2,R3,14*8);
+	encrypt_last_round(R2,R3,R0,R1,15*8);
+
+
+	output_whitening(%r10,%r11,a_offset)
+	movq	%r10,	(%rsi)
+
+	shl	$32,	R1
+	xor	R0,	R1
+
+	output_whitening(R1,%r11,c_offset)
+	movq	R1,	8(%rsi)
+
+	popq	R1
+	movq	$1,%rax
+	ret
+
+twofish_dec_blk:
+	pushq    R1
+
+	/* %rdi contains the crypto tfm adress */
+	/* %rsi contains the output adress */
+	/* %rdx contains the input adress */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
+	/* ctx adress is moved to free one non-rex register
+	as target for the 8bit high operations */
+	mov	%rdi,		%r11
+
+	movq	(R3),	R1
+	movq	8(R3),	R3
+	output_whitening(R1,%r11,a_offset)
+	output_whitening(R3,%r11,c_offset)
+	mov	R1D,	R0D
+	shr	$32,	R1
+	rol	$16,	R1D
+	mov	R3D,	R2D
+	shr	$32,	R3
+	rol	$1,	R2D
+
+	decrypt_round(R0,R1,R2,R3,15*8);
+	decrypt_round(R2,R3,R0,R1,14*8);
+	decrypt_round(R0,R1,R2,R3,13*8);
+	decrypt_round(R2,R3,R0,R1,12*8);
+	decrypt_round(R0,R1,R2,R3,11*8);
+	decrypt_round(R2,R3,R0,R1,10*8);
+	decrypt_round(R0,R1,R2,R3,9*8);
+	decrypt_round(R2,R3,R0,R1,8*8);
+	decrypt_round(R0,R1,R2,R3,7*8);
+	decrypt_round(R2,R3,R0,R1,6*8);
+	decrypt_round(R0,R1,R2,R3,5*8);
+	decrypt_round(R2,R3,R0,R1,4*8);
+	decrypt_round(R0,R1,R2,R3,3*8);
+	decrypt_round(R2,R3,R0,R1,2*8);
+	decrypt_round(R0,R1,R2,R3,1*8);
+	decrypt_last_round(R2,R3,R0,R1,0);
+
+	input_whitening(%r10,%r11,a_offset)
+	movq	%r10,	(%rsi)
+
+	shl	$32,	R1
+	xor	R0,	R1
+
+	input_whitening(R1,%r11,c_offset)
+	movq	R1,	8(%rsi)
+
+	popq	R1
+	movq	$1,%rax
+	ret
diff --git a/arch/x86_64/crypto/twofish.c b/arch/x86_64/crypto/twofish.c
new file mode 100644
index 0000000..182d91d
--- /dev/null
+++ b/arch/x86_64/crypto/twofish.c
@@ -0,0 +1,97 @@
+/*
+ * Glue Code for optimized x86_64 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		=	"twofish",
+	.cra_driver_name	=	"twofish-x86_64",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	TF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct twofish_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
+			.cia_setkey		=	twofish_setkey,
+			.cia_encrypt		=	twofish_encrypt,
+			.cia_decrypt		=	twofish_decrypt
+		}
+	}
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
+MODULE_ALIAS("twofish");
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 764bf23..d6d7f73 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -108,6 +108,35 @@
 	return 0;
 }
 
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
 /* 
  * Find a free area in a specific range. 
  */ 
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 2d48a79..3c55c76 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -9,7 +9,6 @@
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
-#include <linux/dmi.h>
 #include <asm/e820.h>
 
 #include "pci.h"
@@ -165,33 +164,11 @@
 	}
 }
 
-static int disable_mcfg(struct dmi_system_id *d)
-{
-	printk("PCI: %s detected. Disabling MCFG.\n", d->ident);
-	pci_probe &= ~PCI_PROBE_MMCONF;
-	return 0;
-}
-
-static struct dmi_system_id __initdata dmi_bad_mcfg[] = {
-	/* Has broken MCFG table that makes the system hang when used */
-        {
-         .callback = disable_mcfg,
-         .ident = "Intel D3C5105 SDV",
-         .matches = {
-                     DMI_MATCH(DMI_BIOS_VENDOR, "Intel"),
-                     DMI_MATCH(DMI_BOARD_NAME, "D26928"),
-                     },
-         },
-         {}
-};
-
 void __init pci_mmcfg_init(void)
 {
 	int i;
 
-	dmi_check_system(dmi_bad_mcfg);
-
-	if ((pci_probe & (PCI_PROBE_MMCONF|PCI_PROBE_MMCONF_FORCE)) == 0)
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return;
 
 	acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
@@ -200,6 +177,15 @@
 	    (pci_mmcfg_config[0].base_address == 0))
 		return;
 
+	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
+			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
+			E820_RESERVED)) {
+		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+				pci_mmcfg_config[0].base_address);
+		printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
+		return;
+	}
+
 	/* RED-PEN i386 doesn't do _nocache right now */
 	pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
 	if (pci_mmcfg_virt == NULL) {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index ddd9253..9c3a06b 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -848,6 +848,35 @@
 EXPORT_SYMBOL(blk_queue_find_tag);
 
 /**
+ * __blk_free_tags - release a given set of tag maintenance info
+ * @bqt:	the tag map to free
+ *
+ * Tries to free the specified @bqt@.  Returns true if it was
+ * actually freed and false if there are still references using it
+ */
+static int __blk_free_tags(struct blk_queue_tag *bqt)
+{
+	int retval;
+
+	retval = atomic_dec_and_test(&bqt->refcnt);
+	if (retval) {
+		BUG_ON(bqt->busy);
+		BUG_ON(!list_empty(&bqt->busy_list));
+
+		kfree(bqt->tag_index);
+		bqt->tag_index = NULL;
+
+		kfree(bqt->tag_map);
+		bqt->tag_map = NULL;
+
+		kfree(bqt);
+
+	}
+
+	return retval;
+}
+
+/**
  * __blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device
  *
@@ -862,23 +891,28 @@
 	if (!bqt)
 		return;
 
-	if (atomic_dec_and_test(&bqt->refcnt)) {
-		BUG_ON(bqt->busy);
-		BUG_ON(!list_empty(&bqt->busy_list));
-
-		kfree(bqt->tag_index);
-		bqt->tag_index = NULL;
-
-		kfree(bqt->tag_map);
-		bqt->tag_map = NULL;
-
-		kfree(bqt);
-	}
+	__blk_free_tags(bqt);
 
 	q->queue_tags = NULL;
 	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
 }
 
+
+/**
+ * blk_free_tags - release a given set of tag maintenance info
+ * @bqt:	the tag map to free
+ *
+ * For externally managed @bqt@ frees the map.  Callers of this
+ * function must guarantee to have released all the queues that
+ * might have been using this tag map.
+ */
+void blk_free_tags(struct blk_queue_tag *bqt)
+{
+	if (unlikely(!__blk_free_tags(bqt)))
+		BUG();
+}
+EXPORT_SYMBOL(blk_free_tags);
+
 /**
  * blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device
@@ -901,7 +935,7 @@
 	unsigned long *tag_map;
 	int nr_ulongs;
 
-	if (depth > q->nr_requests * 2) {
+	if (q && depth > q->nr_requests * 2) {
 		depth = q->nr_requests * 2;
 		printk(KERN_ERR "%s: adjusted depth to %d\n",
 				__FUNCTION__, depth);
@@ -927,6 +961,38 @@
 	return -ENOMEM;
 }
 
+static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
+						   int depth)
+{
+	struct blk_queue_tag *tags;
+
+	tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
+	if (!tags)
+		goto fail;
+
+	if (init_tag_map(q, tags, depth))
+		goto fail;
+
+	INIT_LIST_HEAD(&tags->busy_list);
+	tags->busy = 0;
+	atomic_set(&tags->refcnt, 1);
+	return tags;
+fail:
+	kfree(tags);
+	return NULL;
+}
+
+/**
+ * blk_init_tags - initialize the tag info for an external tag map
+ * @depth:	the maximum queue depth supported
+ * @tags: the tag to use
+ **/
+struct blk_queue_tag *blk_init_tags(int depth)
+{
+	return __blk_queue_init_tags(NULL, depth);
+}
+EXPORT_SYMBOL(blk_init_tags);
+
 /**
  * blk_queue_init_tags - initialize the queue tag info
  * @q:  the request queue for the device
@@ -941,16 +1007,10 @@
 	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
 
 	if (!tags && !q->queue_tags) {
-		tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
+		tags = __blk_queue_init_tags(q, depth);
+
 		if (!tags)
 			goto fail;
-
-		if (init_tag_map(q, tags, depth))
-			goto fail;
-
-		INIT_LIST_HEAD(&tags->busy_list);
-		tags->busy = 0;
-		atomic_set(&tags->refcnt, 1);
 	} else if (q->queue_tags) {
 		if ((rc = blk_queue_resize_tags(q, depth)))
 			return rc;
@@ -1002,6 +1062,13 @@
 	}
 
 	/*
+	 * Currently cannot replace a shared tag map with a new
+	 * one, so error out if this is the case
+	 */
+	if (atomic_read(&bqt->refcnt) != 1)
+		return -EBUSY;
+
+	/*
 	 * save the old state info, so we can copy it back
 	 */
 	tag_index = bqt->tag_index;
@@ -2734,6 +2801,18 @@
 
 EXPORT_SYMBOL(blk_congestion_wait);
 
+/**
+ * blk_congestion_end - wake up sleepers on a congestion queue
+ * @rw: READ or WRITE
+ */
+void blk_congestion_end(int rw)
+{
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+
 /*
  * Has to be called with the request spinlock acquired
  */
diff --git a/crypto/Kconfig b/crypto/Kconfig
index ba133d5..1e2f39c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -9,47 +9,71 @@
 	help
 	  This option provides the core Cryptographic API.
 
+if CRYPTO
+
+config CRYPTO_ALGAPI
+	tristate
+	help
+	  This option provides the API for cryptographic algorithms.
+
+config CRYPTO_BLKCIPHER
+	tristate
+	select CRYPTO_ALGAPI
+
+config CRYPTO_HASH
+	tristate
+	select CRYPTO_ALGAPI
+
+config CRYPTO_MANAGER
+	tristate "Cryptographic algorithm manager"
+	select CRYPTO_ALGAPI
+	default m
+	help
+	  Create default cryptographic template instantiations such as
+	  cbc(aes).
+
 config CRYPTO_HMAC
-	bool "HMAC support"
-	depends on CRYPTO
+	tristate "HMAC support"
+	select CRYPTO_HASH
 	help
 	  HMAC: Keyed-Hashing for Message Authentication (RFC2104).
 	  This is required for IPSec.
 
 config CRYPTO_NULL
 	tristate "Null algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  These are 'Null' algorithms, used by IPsec, which do nothing.
 
 config CRYPTO_MD4
 	tristate "MD4 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  MD4 message digest algorithm (RFC1320).
 
 config CRYPTO_MD5
 	tristate "MD5 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  MD5 message digest algorithm (RFC1321).
 
 config CRYPTO_SHA1
 	tristate "SHA1 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA1_S390
 	tristate "SHA1 digest algorithm (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA256
 	tristate "SHA256 digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  SHA256 secure hash standard (DFIPS 180-2).
 	  
@@ -58,7 +82,8 @@
 
 config CRYPTO_SHA256_S390
 	tristate "SHA256 digest algorithm (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  SHA256 secure hash standard (DFIPS 180-2).
@@ -68,7 +93,7 @@
 
 config CRYPTO_SHA512
 	tristate "SHA384 and SHA512 digest algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  SHA512 secure hash standard (DFIPS 180-2).
 	  
@@ -80,7 +105,7 @@
 
 config CRYPTO_WP512
 	tristate "Whirlpool digest algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Whirlpool hash algorithm 512, 384 and 256-bit hashes
 
@@ -92,7 +117,7 @@
 
 config CRYPTO_TGR192
 	tristate "Tiger digest algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Tiger hash algorithm 192, 160 and 128-bit hashes
 
@@ -103,21 +128,40 @@
 	  See also:
 	  <http://www.cs.technion.ac.il/~biham/Reports/Tiger/>.
 
+config CRYPTO_ECB
+	tristate "ECB support"
+	select CRYPTO_BLKCIPHER
+	default m
+	help
+	  ECB: Electronic CodeBook mode
+	  This is the simplest block cipher algorithm.  It simply encrypts
+	  the input block by block.
+
+config CRYPTO_CBC
+	tristate "CBC support"
+	select CRYPTO_BLKCIPHER
+	default m
+	help
+	  CBC: Cipher Block Chaining mode
+	  This block cipher algorithm is required for IPSec.
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
 config CRYPTO_DES_S390
 	tristate "DES and Triple DES cipher algorithms (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
 config CRYPTO_BLOWFISH
 	tristate "Blowfish cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Blowfish cipher algorithm, by Bruce Schneier.
 	  
@@ -130,7 +174,8 @@
 
 config CRYPTO_TWOFISH
 	tristate "Twofish cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
+	select CRYPTO_TWOFISH_COMMON
 	help
 	  Twofish cipher algorithm.
 	  
@@ -142,9 +187,47 @@
 	  See also:
 	  <http://www.schneier.com/twofish.html>
 
+config CRYPTO_TWOFISH_COMMON
+	tristate
+	help
+	  Common parts of the Twofish cipher algorithm shared by the
+	  generic c and the assembler implementations.
+
+config CRYPTO_TWOFISH_586
+	tristate "Twofish cipher algorithms (i586)"
+	depends on (X86 || UML_X86) && !64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_TWOFISH_COMMON
+	help
+	  Twofish cipher algorithm.
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
+config CRYPTO_TWOFISH_X86_64
+	tristate "Twofish cipher algorithm (x86_64)"
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_TWOFISH_COMMON
+	help
+	  Twofish cipher algorithm (x86_64).
+
+	  Twofish was submitted as an AES (Advanced Encryption Standard)
+	  candidate cipher by researchers at CounterPane Systems.  It is a
+	  16 round block cipher supporting key sizes of 128, 192, and 256
+	  bits.
+
+	  See also:
+	  <http://www.schneier.com/twofish.html>
+
 config CRYPTO_SERPENT
 	tristate "Serpent cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -157,7 +240,7 @@
 
 config CRYPTO_AES
 	tristate "AES cipher algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -177,7 +260,8 @@
 
 config CRYPTO_AES_586
 	tristate "AES cipher algorithms (i586)"
-	depends on CRYPTO && ((X86 || UML_X86) && !64BIT)
+	depends on (X86 || UML_X86) && !64BIT
+	select CRYPTO_ALGAPI
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -197,7 +281,8 @@
 
 config CRYPTO_AES_X86_64
 	tristate "AES cipher algorithms (x86_64)"
-	depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_ALGAPI
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -217,7 +302,9 @@
 
 config CRYPTO_AES_S390
 	tristate "AES cipher algorithms (s390)"
-	depends on CRYPTO && S390
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
@@ -237,21 +324,21 @@
 
 config CRYPTO_CAST5
 	tristate "CAST5 (CAST-128) cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
 
 config CRYPTO_CAST6
 	tristate "CAST6 (CAST-256) cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
 	  described in RFC2612.
 
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  TEA cipher algorithm.
 
@@ -268,7 +355,7 @@
 
 config CRYPTO_ARC4
 	tristate "ARC4 cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  ARC4 cipher algorithm.
 
@@ -279,7 +366,7 @@
 
 config CRYPTO_KHAZAD
 	tristate "Khazad cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Khazad cipher algorithm.
 
@@ -292,7 +379,7 @@
 
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Anubis cipher algorithm.
 
@@ -307,7 +394,7 @@
 
 config CRYPTO_DEFLATE
 	tristate "Deflate compression algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
 	help
@@ -318,7 +405,7 @@
 
 config CRYPTO_MICHAEL_MIC
 	tristate "Michael MIC keyed digest algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	help
 	  Michael MIC is used for message integrity protection in TKIP
 	  (IEEE 802.11i). This algorithm is required for TKIP, but it
@@ -327,7 +414,7 @@
 
 config CRYPTO_CRC32C
 	tristate "CRC32c CRC algorithm"
-	depends on CRYPTO
+	select CRYPTO_ALGAPI
 	select LIBCRC32C
 	help
 	  Castagnoli, et al Cyclic Redundancy-Check Algorithm.  Used
@@ -337,10 +424,13 @@
 
 config CRYPTO_TEST
 	tristate "Testing module"
-	depends on CRYPTO && m
+	depends on m
+	select CRYPTO_ALGAPI
 	help
 	  Quick & dirty crypto test module.
 
 source "drivers/crypto/Kconfig"
-endmenu
 
+endif	# if CRYPTO
+
+endmenu
diff --git a/crypto/Makefile b/crypto/Makefile
index d287b9e..7236620 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -2,11 +2,18 @@
 # Cryptographic API
 #
 
-proc-crypto-$(CONFIG_PROC_FS) = proc.o
+obj-$(CONFIG_CRYPTO) += api.o scatterwalk.o cipher.o digest.o compress.o
 
-obj-$(CONFIG_CRYPTO) += api.o scatterwalk.o cipher.o digest.o compress.o \
-			$(proc-crypto-y)
+crypto_algapi-$(CONFIG_PROC_FS) += proc.o
+crypto_algapi-objs := algapi.o $(crypto_algapi-y)
+obj-$(CONFIG_CRYPTO_ALGAPI) += crypto_algapi.o
 
+obj-$(CONFIG_CRYPTO_BLKCIPHER) += blkcipher.o
+
+crypto_hash-objs := hash.o
+obj-$(CONFIG_CRYPTO_HASH) += crypto_hash.o
+
+obj-$(CONFIG_CRYPTO_MANAGER) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
 obj-$(CONFIG_CRYPTO_MD4) += md4.o
@@ -16,9 +23,12 @@
 obj-$(CONFIG_CRYPTO_SHA512) += sha512.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
+obj-$(CONFIG_CRYPTO_ECB) += ecb.o
+obj-$(CONFIG_CRYPTO_CBC) += cbc.o
 obj-$(CONFIG_CRYPTO_DES) += des.o
 obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
 obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
+obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent.o
 obj-$(CONFIG_CRYPTO_AES) += aes.o
 obj-$(CONFIG_CRYPTO_CAST5) += cast5.o
diff --git a/crypto/aes.c b/crypto/aes.c
index a038711..e244077 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -249,13 +249,14 @@
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	u32 i, t, u, v, w;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
+	if (key_len % 8) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
diff --git a/crypto/algapi.c b/crypto/algapi.c
new file mode 100644
index 0000000..c915300
--- /dev/null
+++ b/crypto/algapi.c
@@ -0,0 +1,486 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/string.h>
+
+#include "internal.h"
+
+static LIST_HEAD(crypto_template_list);
+
+void crypto_larval_error(const char *name, u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+
+	down_read(&crypto_alg_sem);
+	alg = __crypto_alg_lookup(name, type, mask);
+	up_read(&crypto_alg_sem);
+
+	if (alg) {
+		if (crypto_is_larval(alg)) {
+			struct crypto_larval *larval = (void *)alg;
+			complete(&larval->completion);
+		}
+		crypto_mod_put(alg);
+	}
+}
+EXPORT_SYMBOL_GPL(crypto_larval_error);
+
+static inline int crypto_set_driver_name(struct crypto_alg *alg)
+{
+	static const char suffix[] = "-generic";
+	char *driver_name = alg->cra_driver_name;
+	int len;
+
+	if (*driver_name)
+		return 0;
+
+	len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+	if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	memcpy(driver_name + len, suffix, sizeof(suffix));
+	return 0;
+}
+
+static int crypto_check_alg(struct crypto_alg *alg)
+{
+	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
+		return -EINVAL;
+
+	if (alg->cra_alignmask & alg->cra_blocksize)
+		return -EINVAL;
+
+	if (alg->cra_blocksize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	if (alg->cra_priority < 0)
+		return -EINVAL;
+
+	return crypto_set_driver_name(alg);
+}
+
+static void crypto_destroy_instance(struct crypto_alg *alg)
+{
+	struct crypto_instance *inst = (void *)alg;
+	struct crypto_template *tmpl = inst->tmpl;
+
+	tmpl->free(inst);
+	crypto_tmpl_put(tmpl);
+}
+
+static void crypto_remove_spawns(struct list_head *spawns,
+				 struct list_head *list)
+{
+	struct crypto_spawn *spawn, *n;
+
+	list_for_each_entry_safe(spawn, n, spawns, list) {
+		struct crypto_instance *inst = spawn->inst;
+		struct crypto_template *tmpl = inst->tmpl;
+
+		list_del_init(&spawn->list);
+		spawn->alg = NULL;
+
+		if (crypto_is_dead(&inst->alg))
+			continue;
+
+		inst->alg.cra_flags |= CRYPTO_ALG_DEAD;
+		if (!tmpl || !crypto_tmpl_get(tmpl))
+			continue;
+
+		crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg);
+		list_move(&inst->alg.cra_list, list);
+		hlist_del(&inst->list);
+		inst->alg.cra_destroy = crypto_destroy_instance;
+
+		if (!list_empty(&inst->alg.cra_users)) {
+			if (&n->list == spawns)
+				n = list_entry(inst->alg.cra_users.next,
+					       typeof(*n), list);
+			__list_splice(&inst->alg.cra_users, spawns->prev);
+		}
+	}
+}
+
+static int __crypto_register_alg(struct crypto_alg *alg,
+				 struct list_head *list)
+{
+	struct crypto_alg *q;
+	int ret = -EAGAIN;
+
+	if (crypto_is_dead(alg))
+		goto out;
+
+	INIT_LIST_HEAD(&alg->cra_users);
+
+	ret = -EEXIST;
+
+	atomic_set(&alg->cra_refcnt, 1);
+	list_for_each_entry(q, &crypto_alg_list, cra_list) {
+		if (q == alg)
+			goto out;
+
+		if (crypto_is_moribund(q))
+			continue;
+
+		if (crypto_is_larval(q)) {
+			struct crypto_larval *larval = (void *)q;
+
+			if (strcmp(alg->cra_name, q->cra_name) &&
+			    strcmp(alg->cra_driver_name, q->cra_name))
+				continue;
+
+			if (larval->adult)
+				continue;
+			if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
+				continue;
+			if (!crypto_mod_get(alg))
+				continue;
+
+			larval->adult = alg;
+			complete(&larval->completion);
+			continue;
+		}
+
+		if (strcmp(alg->cra_name, q->cra_name))
+			continue;
+
+		if (strcmp(alg->cra_driver_name, q->cra_driver_name) &&
+		    q->cra_priority > alg->cra_priority)
+			continue;
+
+		crypto_remove_spawns(&q->cra_users, list);
+	}
+	
+	list_add(&alg->cra_list, &crypto_alg_list);
+
+	crypto_notify(CRYPTO_MSG_ALG_REGISTER, alg);
+	ret = 0;
+
+out:	
+	return ret;
+}
+
+static void crypto_remove_final(struct list_head *list)
+{
+	struct crypto_alg *alg;
+	struct crypto_alg *n;
+
+	list_for_each_entry_safe(alg, n, list, cra_list) {
+		list_del_init(&alg->cra_list);
+		crypto_alg_put(alg);
+	}
+}
+
+int crypto_register_alg(struct crypto_alg *alg)
+{
+	LIST_HEAD(list);
+	int err;
+
+	err = crypto_check_alg(alg);
+	if (err)
+		return err;
+
+	down_write(&crypto_alg_sem);
+	err = __crypto_register_alg(alg, &list);
+	up_write(&crypto_alg_sem);
+
+	crypto_remove_final(&list);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_register_alg);
+
+static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list)
+{
+	if (unlikely(list_empty(&alg->cra_list)))
+		return -ENOENT;
+
+	alg->cra_flags |= CRYPTO_ALG_DEAD;
+
+	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg);
+	list_del_init(&alg->cra_list);
+	crypto_remove_spawns(&alg->cra_users, list);
+
+	return 0;
+}
+
+int crypto_unregister_alg(struct crypto_alg *alg)
+{
+	int ret;
+	LIST_HEAD(list);
+	
+	down_write(&crypto_alg_sem);
+	ret = crypto_remove_alg(alg, &list);
+	up_write(&crypto_alg_sem);
+
+	if (ret)
+		return ret;
+
+	BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
+	if (alg->cra_destroy)
+		alg->cra_destroy(alg);
+
+	crypto_remove_final(&list);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_alg);
+
+int crypto_register_template(struct crypto_template *tmpl)
+{
+	struct crypto_template *q;
+	int err = -EEXIST;
+
+	down_write(&crypto_alg_sem);
+
+	list_for_each_entry(q, &crypto_template_list, list) {
+		if (q == tmpl)
+			goto out;
+	}
+
+	list_add(&tmpl->list, &crypto_template_list);
+	crypto_notify(CRYPTO_MSG_TMPL_REGISTER, tmpl);
+	err = 0;
+out:
+	up_write(&crypto_alg_sem);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_register_template);
+
+void crypto_unregister_template(struct crypto_template *tmpl)
+{
+	struct crypto_instance *inst;
+	struct hlist_node *p, *n;
+	struct hlist_head *list;
+	LIST_HEAD(users);
+
+	down_write(&crypto_alg_sem);
+
+	BUG_ON(list_empty(&tmpl->list));
+	list_del_init(&tmpl->list);
+
+	list = &tmpl->instances;
+	hlist_for_each_entry(inst, p, list, list) {
+		int err = crypto_remove_alg(&inst->alg, &users);
+		BUG_ON(err);
+	}
+
+	crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl);
+
+	up_write(&crypto_alg_sem);
+
+	hlist_for_each_entry_safe(inst, p, n, list, list) {
+		BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
+		tmpl->free(inst);
+	}
+	crypto_remove_final(&users);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_template);
+
+static struct crypto_template *__crypto_lookup_template(const char *name)
+{
+	struct crypto_template *q, *tmpl = NULL;
+
+	down_read(&crypto_alg_sem);
+	list_for_each_entry(q, &crypto_template_list, list) {
+		if (strcmp(q->name, name))
+			continue;
+		if (unlikely(!crypto_tmpl_get(q)))
+			continue;
+
+		tmpl = q;
+		break;
+	}
+	up_read(&crypto_alg_sem);
+
+	return tmpl;
+}
+
+struct crypto_template *crypto_lookup_template(const char *name)
+{
+	return try_then_request_module(__crypto_lookup_template(name), name);
+}
+EXPORT_SYMBOL_GPL(crypto_lookup_template);
+
+int crypto_register_instance(struct crypto_template *tmpl,
+			     struct crypto_instance *inst)
+{
+	LIST_HEAD(list);
+	int err = -EINVAL;
+
+	if (inst->alg.cra_destroy)
+		goto err;
+
+	err = crypto_check_alg(&inst->alg);
+	if (err)
+		goto err;
+
+	inst->alg.cra_module = tmpl->module;
+
+	down_write(&crypto_alg_sem);
+
+	err = __crypto_register_alg(&inst->alg, &list);
+	if (err)
+		goto unlock;
+
+	hlist_add_head(&inst->list, &tmpl->instances);
+	inst->tmpl = tmpl;
+
+unlock:
+	up_write(&crypto_alg_sem);
+
+	crypto_remove_final(&list);
+
+err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_register_instance);
+
+int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
+		      struct crypto_instance *inst)
+{
+	int err = -EAGAIN;
+
+	spawn->inst = inst;
+
+	down_write(&crypto_alg_sem);
+	if (!crypto_is_moribund(alg)) {
+		list_add(&spawn->list, &alg->cra_users);
+		spawn->alg = alg;
+		err = 0;
+	}
+	up_write(&crypto_alg_sem);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_init_spawn);
+
+void crypto_drop_spawn(struct crypto_spawn *spawn)
+{
+	down_write(&crypto_alg_sem);
+	list_del(&spawn->list);
+	up_write(&crypto_alg_sem);
+}
+EXPORT_SYMBOL_GPL(crypto_drop_spawn);
+
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn)
+{
+	struct crypto_alg *alg;
+	struct crypto_alg *alg2;
+	struct crypto_tfm *tfm;
+
+	down_read(&crypto_alg_sem);
+	alg = spawn->alg;
+	alg2 = alg;
+	if (alg2)
+		alg2 = crypto_mod_get(alg2);
+	up_read(&crypto_alg_sem);
+
+	if (!alg2) {
+		if (alg)
+			crypto_shoot_alg(alg);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	tfm = __crypto_alloc_tfm(alg, 0);
+	if (IS_ERR(tfm))
+		crypto_mod_put(alg);
+
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_spawn_tfm);
+
+int crypto_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&crypto_chain, nb);
+}
+EXPORT_SYMBOL_GPL(crypto_register_notifier);
+
+int crypto_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&crypto_chain, nb);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
+
+struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len,
+				       u32 type, u32 mask)
+{
+	struct rtattr *rta = param;
+	struct crypto_attr_alg *alga;
+
+	if (!RTA_OK(rta, len))
+		return ERR_PTR(-EBADR);
+	if (rta->rta_type != CRYPTOA_ALG || RTA_PAYLOAD(rta) < sizeof(*alga))
+		return ERR_PTR(-EINVAL);
+
+	alga = RTA_DATA(rta);
+	alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0;
+
+	return crypto_alg_mod_lookup(alga->name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_get_attr_alg);
+
+struct crypto_instance *crypto_alloc_instance(const char *name,
+					      struct crypto_alg *alg)
+{
+	struct crypto_instance *inst;
+	struct crypto_spawn *spawn;
+	int err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name,
+		     alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
+
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+		     name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto err_free_inst;
+
+	spawn = crypto_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, inst);
+
+	if (err)
+		goto err_free_inst;
+
+	return inst;
+
+err_free_inst:
+	kfree(inst);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_instance);
+
+static int __init crypto_algapi_init(void)
+{
+	crypto_init_proc();
+	return 0;
+}
+
+static void __exit crypto_algapi_exit(void)
+{
+	crypto_exit_proc();
+}
+
+module_init(crypto_algapi_init);
+module_exit(crypto_algapi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Cryptographic algorithms API");
diff --git a/crypto/anubis.c b/crypto/anubis.c
index 7e2e1a2..1c771f7 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -461,10 +461,11 @@
 };
 
 static int anubis_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			 unsigned int key_len, u32 *flags)
+			 unsigned int key_len)
 {
 	struct anubis_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *key = (const __be32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	int N, R, i, r;
 	u32 kappa[ANUBIS_MAX_N];
 	u32 inter[ANUBIS_MAX_N];
diff --git a/crypto/api.c b/crypto/api.c
index c11ec1f..2e84d4b 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -15,71 +15,203 @@
  *
  */
 
-#include <linux/compiler.h>
-#include <linux/init.h>
-#include <linux/crypto.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kmod.h>
-#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "internal.h"
 
 LIST_HEAD(crypto_alg_list);
+EXPORT_SYMBOL_GPL(crypto_alg_list);
 DECLARE_RWSEM(crypto_alg_sem);
+EXPORT_SYMBOL_GPL(crypto_alg_sem);
 
-static inline int crypto_alg_get(struct crypto_alg *alg)
+BLOCKING_NOTIFIER_HEAD(crypto_chain);
+EXPORT_SYMBOL_GPL(crypto_chain);
+
+static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
-	return try_module_get(alg->cra_module);
+	atomic_inc(&alg->cra_refcnt);
+	return alg;
 }
 
-static inline void crypto_alg_put(struct crypto_alg *alg)
+struct crypto_alg *crypto_mod_get(struct crypto_alg *alg)
 {
+	return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL;
+}
+EXPORT_SYMBOL_GPL(crypto_mod_get);
+
+void crypto_mod_put(struct crypto_alg *alg)
+{
+	crypto_alg_put(alg);
 	module_put(alg->cra_module);
 }
+EXPORT_SYMBOL_GPL(crypto_mod_put);
 
-static struct crypto_alg *crypto_alg_lookup(const char *name)
+struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask)
 {
 	struct crypto_alg *q, *alg = NULL;
-	int best = -1;
+	int best = -2;
 
-	if (!name)
-		return NULL;
-	
-	down_read(&crypto_alg_sem);
-	
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		int exact, fuzzy;
 
+		if (crypto_is_moribund(q))
+			continue;
+
+		if ((q->cra_flags ^ type) & mask)
+			continue;
+
+		if (crypto_is_larval(q) &&
+		    ((struct crypto_larval *)q)->mask != mask)
+			continue;
+
 		exact = !strcmp(q->cra_driver_name, name);
 		fuzzy = !strcmp(q->cra_name, name);
 		if (!exact && !(fuzzy && q->cra_priority > best))
 			continue;
 
-		if (unlikely(!crypto_alg_get(q)))
+		if (unlikely(!crypto_mod_get(q)))
 			continue;
 
 		best = q->cra_priority;
 		if (alg)
-			crypto_alg_put(alg);
+			crypto_mod_put(alg);
 		alg = q;
 
 		if (exact)
 			break;
 	}
-	
-	up_read(&crypto_alg_sem);
+
+	return alg;
+}
+EXPORT_SYMBOL_GPL(__crypto_alg_lookup);
+
+static void crypto_larval_destroy(struct crypto_alg *alg)
+{
+	struct crypto_larval *larval = (void *)alg;
+
+	BUG_ON(!crypto_is_larval(alg));
+	if (larval->adult)
+		crypto_mod_put(larval->adult);
+	kfree(larval);
+}
+
+static struct crypto_alg *crypto_larval_alloc(const char *name, u32 type,
+					      u32 mask)
+{
+	struct crypto_alg *alg;
+	struct crypto_larval *larval;
+
+	larval = kzalloc(sizeof(*larval), GFP_KERNEL);
+	if (!larval)
+		return ERR_PTR(-ENOMEM);
+
+	larval->mask = mask;
+	larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type;
+	larval->alg.cra_priority = -1;
+	larval->alg.cra_destroy = crypto_larval_destroy;
+
+	atomic_set(&larval->alg.cra_refcnt, 2);
+	strlcpy(larval->alg.cra_name, name, CRYPTO_MAX_ALG_NAME);
+	init_completion(&larval->completion);
+
+	down_write(&crypto_alg_sem);
+	alg = __crypto_alg_lookup(name, type, mask);
+	if (!alg) {
+		alg = &larval->alg;
+		list_add(&alg->cra_list, &crypto_alg_list);
+	}
+	up_write(&crypto_alg_sem);
+
+	if (alg != &larval->alg)
+		kfree(larval);
+
 	return alg;
 }
 
-/* A far more intelligent version of this is planned.  For now, just
- * try an exact match on the name of the algorithm. */
-static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name)
+static void crypto_larval_kill(struct crypto_alg *alg)
 {
-	return try_then_request_module(crypto_alg_lookup(name), name);
+	struct crypto_larval *larval = (void *)alg;
+
+	down_write(&crypto_alg_sem);
+	list_del(&alg->cra_list);
+	up_write(&crypto_alg_sem);
+	complete(&larval->completion);
+	crypto_alg_put(alg);
 }
 
+static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
+{
+	struct crypto_larval *larval = (void *)alg;
+
+	wait_for_completion_interruptible_timeout(&larval->completion, 60 * HZ);
+	alg = larval->adult;
+	if (alg) {
+		if (!crypto_mod_get(alg))
+			alg = ERR_PTR(-EAGAIN);
+	} else
+		alg = ERR_PTR(-ENOENT);
+	crypto_mod_put(&larval->alg);
+
+	return alg;
+}
+
+static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
+					    u32 mask)
+{
+	struct crypto_alg *alg;
+
+	down_read(&crypto_alg_sem);
+	alg = __crypto_alg_lookup(name, type, mask);
+	up_read(&crypto_alg_sem);
+
+	return alg;
+}
+
+struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	struct crypto_alg *larval;
+	int ok;
+
+	if (!name)
+		return ERR_PTR(-ENOENT);
+
+	mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
+	type &= mask;
+
+	alg = try_then_request_module(crypto_alg_lookup(name, type, mask),
+				      name);
+	if (alg)
+		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
+
+	larval = crypto_larval_alloc(name, type, mask);
+	if (IS_ERR(larval) || !crypto_is_larval(larval))
+		return larval;
+
+	ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval);
+	if (ok == NOTIFY_DONE) {
+		request_module("cryptomgr");
+		ok = crypto_notify(CRYPTO_MSG_ALG_REQUEST, larval);
+	}
+
+	if (ok == NOTIFY_STOP)
+		alg = crypto_larval_wait(larval);
+	else {
+		crypto_mod_put(larval);
+		alg = ERR_PTR(-ENOENT);
+	}
+	crypto_larval_kill(larval);
+	return alg;
+}
+EXPORT_SYMBOL_GPL(crypto_alg_mod_lookup);
+
 static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags)
 {
 	tfm->crt_flags = flags & CRYPTO_TFM_REQ_MASK;
@@ -94,17 +226,18 @@
 		
 	case CRYPTO_ALG_TYPE_COMPRESS:
 		return crypto_init_compress_flags(tfm, flags);
-	
-	default:
-		break;
 	}
 	
-	BUG();
-	return -EINVAL;
+	return 0;
 }
 
 static int crypto_init_ops(struct crypto_tfm *tfm)
 {
+	const struct crypto_type *type = tfm->__crt_alg->cra_type;
+
+	if (type)
+		return type->init(tfm);
+
 	switch (crypto_tfm_alg_type(tfm)) {
 	case CRYPTO_ALG_TYPE_CIPHER:
 		return crypto_init_cipher_ops(tfm);
@@ -125,6 +258,14 @@
 
 static void crypto_exit_ops(struct crypto_tfm *tfm)
 {
+	const struct crypto_type *type = tfm->__crt_alg->cra_type;
+
+	if (type) {
+		if (type->exit)
+			type->exit(tfm);
+		return;
+	}
+
 	switch (crypto_tfm_alg_type(tfm)) {
 	case CRYPTO_ALG_TYPE_CIPHER:
 		crypto_exit_cipher_ops(tfm);
@@ -146,53 +287,67 @@
 
 static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags)
 {
+	const struct crypto_type *type = alg->cra_type;
 	unsigned int len;
 
+	len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	if (type)
+		return len + type->ctxsize(alg);
+
 	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	default:
 		BUG();
 
 	case CRYPTO_ALG_TYPE_CIPHER:
-		len = crypto_cipher_ctxsize(alg, flags);
+		len += crypto_cipher_ctxsize(alg, flags);
 		break;
 		
 	case CRYPTO_ALG_TYPE_DIGEST:
-		len = crypto_digest_ctxsize(alg, flags);
+		len += crypto_digest_ctxsize(alg, flags);
 		break;
 		
 	case CRYPTO_ALG_TYPE_COMPRESS:
-		len = crypto_compress_ctxsize(alg, flags);
+		len += crypto_compress_ctxsize(alg, flags);
 		break;
 	}
 
-	return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+	return len;
 }
 
-struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
+void crypto_shoot_alg(struct crypto_alg *alg)
+{
+	down_write(&crypto_alg_sem);
+	alg->cra_flags |= CRYPTO_ALG_DYING;
+	up_write(&crypto_alg_sem);
+}
+EXPORT_SYMBOL_GPL(crypto_shoot_alg);
+
+struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags)
 {
 	struct crypto_tfm *tfm = NULL;
-	struct crypto_alg *alg;
 	unsigned int tfm_size;
-
-	alg = crypto_alg_mod_lookup(name);
-	if (alg == NULL)
-		goto out;
+	int err = -ENOMEM;
 
 	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags);
 	tfm = kzalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
-		goto out_put;
+		goto out;
 
 	tfm->__crt_alg = alg;
-	
-	if (crypto_init_flags(tfm, flags))
+
+	err = crypto_init_flags(tfm, flags);
+	if (err)
 		goto out_free_tfm;
 		
-	if (crypto_init_ops(tfm))
+	err = crypto_init_ops(tfm);
+	if (err)
 		goto out_free_tfm;
 
-	if (alg->cra_init && alg->cra_init(tfm))
+	if (alg->cra_init && (err = alg->cra_init(tfm))) {
+		if (err == -EAGAIN)
+			crypto_shoot_alg(alg);
 		goto cra_init_failed;
+	}
 
 	goto out;
 
@@ -200,13 +355,97 @@
 	crypto_exit_ops(tfm);
 out_free_tfm:
 	kfree(tfm);
-	tfm = NULL;
-out_put:
-	crypto_alg_put(alg);
+	tfm = ERR_PTR(err);
 out:
 	return tfm;
 }
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
 
+struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
+{
+	struct crypto_tfm *tfm = NULL;
+	int err;
+
+	do {
+		struct crypto_alg *alg;
+
+		alg = crypto_alg_mod_lookup(name, 0, CRYPTO_ALG_ASYNC);
+		err = PTR_ERR(alg);
+		if (IS_ERR(alg))
+			continue;
+
+		tfm = __crypto_alloc_tfm(alg, flags);
+		err = 0;
+		if (IS_ERR(tfm)) {
+			crypto_mod_put(alg);
+			err = PTR_ERR(tfm);
+			tfm = NULL;
+		}
+	} while (err == -EAGAIN && !signal_pending(current));
+
+	return tfm;
+}
+
+/*
+ *	crypto_alloc_base - Locate algorithm and allocate transform
+ *	@alg_name: Name of algorithm
+ *	@type: Type of algorithm
+ *	@mask: Mask for type comparison
+ *
+ *	crypto_alloc_base() will first attempt to locate an already loaded
+ *	algorithm.  If that fails and the kernel supports dynamically loadable
+ *	modules, it will then attempt to load a module of the same name or
+ *	alias.  If that fails it will send a query to any loaded crypto manager
+ *	to construct an algorithm on the fly.  A refcount is grabbed on the
+ *	algorithm which is then associated with the new transform.
+ *
+ *	The returned transform is of a non-determinate type.  Most people
+ *	should use one of the more specific allocation functions such as
+ *	crypto_alloc_blkcipher.
+ *
+ *	In case of error the return value is an error pointer.
+ */
+struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask)
+{
+	struct crypto_tfm *tfm;
+	int err;
+
+	for (;;) {
+		struct crypto_alg *alg;
+
+		alg = crypto_alg_mod_lookup(alg_name, type, mask);
+		err = PTR_ERR(alg);
+		tfm = ERR_PTR(err);
+		if (IS_ERR(alg))
+			goto err;
+
+		tfm = __crypto_alloc_tfm(alg, 0);
+		if (!IS_ERR(tfm))
+			break;
+
+		crypto_mod_put(alg);
+		err = PTR_ERR(tfm);
+
+err:
+		if (err != -EAGAIN)
+			break;
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+	};
+
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base);
+ 
+/*
+ *	crypto_free_tfm - Free crypto transform
+ *	@tfm: Transform to free
+ *
+ *	crypto_free_tfm() frees up the transform and any associated resources,
+ *	then drops the refcount on the associated algorithm.
+ */
 void crypto_free_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg;
@@ -221,108 +460,39 @@
 	if (alg->cra_exit)
 		alg->cra_exit(tfm);
 	crypto_exit_ops(tfm);
-	crypto_alg_put(alg);
+	crypto_mod_put(alg);
 	memset(tfm, 0, size);
 	kfree(tfm);
 }
 
-static inline int crypto_set_driver_name(struct crypto_alg *alg)
-{
-	static const char suffix[] = "-generic";
-	char *driver_name = alg->cra_driver_name;
-	int len;
-
-	if (*driver_name)
-		return 0;
-
-	len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-	if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME)
-		return -ENAMETOOLONG;
-
-	memcpy(driver_name + len, suffix, sizeof(suffix));
-	return 0;
-}
-
-int crypto_register_alg(struct crypto_alg *alg)
-{
-	int ret;
-	struct crypto_alg *q;
-
-	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
-		return -EINVAL;
-
-	if (alg->cra_alignmask & alg->cra_blocksize)
-		return -EINVAL;
-
-	if (alg->cra_blocksize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	if (alg->cra_priority < 0)
-		return -EINVAL;
-	
-	ret = crypto_set_driver_name(alg);
-	if (unlikely(ret))
-		return ret;
-
-	down_write(&crypto_alg_sem);
-	
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (q == alg) {
-			ret = -EEXIST;
-			goto out;
-		}
-	}
-	
-	list_add(&alg->cra_list, &crypto_alg_list);
-out:	
-	up_write(&crypto_alg_sem);
-	return ret;
-}
-
-int crypto_unregister_alg(struct crypto_alg *alg)
-{
-	int ret = -ENOENT;
-	struct crypto_alg *q;
-	
-	BUG_ON(!alg->cra_module);
-	
-	down_write(&crypto_alg_sem);
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (alg == q) {
-			list_del(&alg->cra_list);
-			ret = 0;
-			goto out;
-		}
-	}
-out:	
-	up_write(&crypto_alg_sem);
-	return ret;
-}
-
 int crypto_alg_available(const char *name, u32 flags)
 {
 	int ret = 0;
-	struct crypto_alg *alg = crypto_alg_mod_lookup(name);
+	struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0,
+						       CRYPTO_ALG_ASYNC);
 	
-	if (alg) {
-		crypto_alg_put(alg);
+	if (!IS_ERR(alg)) {
+		crypto_mod_put(alg);
 		ret = 1;
 	}
 	
 	return ret;
 }
 
-static int __init init_crypto(void)
-{
-	printk(KERN_INFO "Initializing Cryptographic API\n");
-	crypto_init_proc();
-	return 0;
-}
-
-__initcall(init_crypto);
-
-EXPORT_SYMBOL_GPL(crypto_register_alg);
-EXPORT_SYMBOL_GPL(crypto_unregister_alg);
 EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
 EXPORT_SYMBOL_GPL(crypto_free_tfm);
 EXPORT_SYMBOL_GPL(crypto_alg_available);
+
+int crypto_has_alg(const char *name, u32 type, u32 mask)
+{
+	int ret = 0;
+	struct crypto_alg *alg = crypto_alg_mod_lookup(name, type, mask);
+	
+	if (!IS_ERR(alg)) {
+		crypto_mod_put(alg);
+		ret = 1;
+	}
+	
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_has_alg);
diff --git a/crypto/arc4.c b/crypto/arc4.c
index 5edc6a6..8be47e1 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -25,7 +25,7 @@
 };
 
 static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-			unsigned int key_len, u32 *flags)
+			unsigned int key_len)
 {
 	struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
 	int i, j = 0, k = 0;
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
new file mode 100644
index 0000000..034c939
--- /dev/null
+++ b/crypto/blkcipher.c
@@ -0,0 +1,405 @@
+/*
+ * Block chaining cipher operations.
+ * 
+ * Generic encrypt/decrypt wrapper for ciphers, handles operations across
+ * multiple page boundaries by using temporary blocks.  In user context,
+ * the kernel is given a chance to schedule us once per page.
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "internal.h"
+#include "scatterwalk.h"
+
+enum {
+	BLKCIPHER_WALK_PHYS = 1 << 0,
+	BLKCIPHER_WALK_SLOW = 1 << 1,
+	BLKCIPHER_WALK_COPY = 1 << 2,
+	BLKCIPHER_WALK_DIFF = 1 << 3,
+};
+
+static int blkcipher_walk_next(struct blkcipher_desc *desc,
+			       struct blkcipher_walk *walk);
+static int blkcipher_walk_first(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk);
+
+static inline void blkcipher_map_src(struct blkcipher_walk *walk)
+{
+	walk->src.virt.addr = scatterwalk_map(&walk->in, 0);
+}
+
+static inline void blkcipher_map_dst(struct blkcipher_walk *walk)
+{
+	walk->dst.virt.addr = scatterwalk_map(&walk->out, 1);
+}
+
+static inline void blkcipher_unmap_src(struct blkcipher_walk *walk)
+{
+	scatterwalk_unmap(walk->src.virt.addr, 0);
+}
+
+static inline void blkcipher_unmap_dst(struct blkcipher_walk *walk)
+{
+	scatterwalk_unmap(walk->dst.virt.addr, 1);
+}
+
+static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len)
+{
+	if (offset_in_page(start + len) < len)
+		return (u8 *)((unsigned long)(start + len) & PAGE_MASK);
+	return start;
+}
+
+static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm,
+					       struct blkcipher_walk *walk,
+					       unsigned int bsize)
+{
+	u8 *addr;
+	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
+
+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	addr = blkcipher_get_spot(addr, bsize);
+	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
+	return bsize;
+}
+
+static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk,
+					       unsigned int n)
+{
+	n = walk->nbytes - n;
+
+	if (walk->flags & BLKCIPHER_WALK_COPY) {
+		blkcipher_map_dst(walk);
+		memcpy(walk->dst.virt.addr, walk->page, n);
+		blkcipher_unmap_dst(walk);
+	} else if (!(walk->flags & BLKCIPHER_WALK_PHYS)) {
+		blkcipher_unmap_src(walk);
+		if (walk->flags & BLKCIPHER_WALK_DIFF)
+			blkcipher_unmap_dst(walk);
+	}
+
+	scatterwalk_advance(&walk->in, n);
+	scatterwalk_advance(&walk->out, n);
+
+	return n;
+}
+
+int blkcipher_walk_done(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk, int err)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	unsigned int nbytes = 0;
+
+	if (likely(err >= 0)) {
+		unsigned int bsize = crypto_blkcipher_blocksize(tfm);
+		unsigned int n;
+
+		if (likely(!(walk->flags & BLKCIPHER_WALK_SLOW)))
+			n = blkcipher_done_fast(walk, err);
+		else
+			n = blkcipher_done_slow(tfm, walk, bsize);
+
+		nbytes = walk->total - n;
+		err = 0;
+	}
+
+	scatterwalk_done(&walk->in, 0, nbytes);
+	scatterwalk_done(&walk->out, 1, nbytes);
+
+	walk->total = nbytes;
+	walk->nbytes = nbytes;
+
+	if (nbytes) {
+		crypto_yield(desc->flags);
+		return blkcipher_walk_next(desc, walk);
+	}
+
+	if (walk->iv != desc->info)
+		memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm));
+	if (walk->buffer != walk->page)
+		kfree(walk->buffer);
+	if (walk->page)
+		free_page((unsigned long)walk->page);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(blkcipher_walk_done);
+
+static inline int blkcipher_next_slow(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      unsigned int bsize,
+				      unsigned int alignmask)
+{
+	unsigned int n;
+
+	if (walk->buffer)
+		goto ok;
+
+	walk->buffer = walk->page;
+	if (walk->buffer)
+		goto ok;
+
+	n = bsize * 2 + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+	walk->buffer = kmalloc(n, GFP_ATOMIC);
+	if (!walk->buffer)
+		return blkcipher_walk_done(desc, walk, -ENOMEM);
+
+ok:
+	walk->dst.virt.addr = (u8 *)ALIGN((unsigned long)walk->buffer,
+					  alignmask + 1);
+	walk->dst.virt.addr = blkcipher_get_spot(walk->dst.virt.addr, bsize);
+	walk->src.virt.addr = blkcipher_get_spot(walk->dst.virt.addr + bsize,
+						 bsize);
+
+	scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0);
+
+	walk->nbytes = bsize;
+	walk->flags |= BLKCIPHER_WALK_SLOW;
+
+	return 0;
+}
+
+static inline int blkcipher_next_copy(struct blkcipher_walk *walk)
+{
+	u8 *tmp = walk->page;
+
+	blkcipher_map_src(walk);
+	memcpy(tmp, walk->src.virt.addr, walk->nbytes);
+	blkcipher_unmap_src(walk);
+
+	walk->src.virt.addr = tmp;
+	walk->dst.virt.addr = tmp;
+
+	return 0;
+}
+
+static inline int blkcipher_next_fast(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk)
+{
+	unsigned long diff;
+
+	walk->src.phys.page = scatterwalk_page(&walk->in);
+	walk->src.phys.offset = offset_in_page(walk->in.offset);
+	walk->dst.phys.page = scatterwalk_page(&walk->out);
+	walk->dst.phys.offset = offset_in_page(walk->out.offset);
+
+	if (walk->flags & BLKCIPHER_WALK_PHYS)
+		return 0;
+
+	diff = walk->src.phys.offset - walk->dst.phys.offset;
+	diff |= walk->src.virt.page - walk->dst.virt.page;
+
+	blkcipher_map_src(walk);
+	walk->dst.virt.addr = walk->src.virt.addr;
+
+	if (diff) {
+		walk->flags |= BLKCIPHER_WALK_DIFF;
+		blkcipher_map_dst(walk);
+	}
+
+	return 0;
+}
+
+static int blkcipher_walk_next(struct blkcipher_desc *desc,
+			       struct blkcipher_walk *walk)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
+	unsigned int bsize = crypto_blkcipher_blocksize(tfm);
+	unsigned int n;
+	int err;
+
+	n = walk->total;
+	if (unlikely(n < bsize)) {
+		desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
+		return blkcipher_walk_done(desc, walk, -EINVAL);
+	}
+
+	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
+			 BLKCIPHER_WALK_DIFF);
+	if (!scatterwalk_aligned(&walk->in, alignmask) ||
+	    !scatterwalk_aligned(&walk->out, alignmask)) {
+		walk->flags |= BLKCIPHER_WALK_COPY;
+		if (!walk->page) {
+			walk->page = (void *)__get_free_page(GFP_ATOMIC);
+			if (!walk->page)
+				n = 0;
+		}
+	}
+
+	n = scatterwalk_clamp(&walk->in, n);
+	n = scatterwalk_clamp(&walk->out, n);
+
+	if (unlikely(n < bsize)) {
+		err = blkcipher_next_slow(desc, walk, bsize, alignmask);
+		goto set_phys_lowmem;
+	}
+
+	walk->nbytes = n;
+	if (walk->flags & BLKCIPHER_WALK_COPY) {
+		err = blkcipher_next_copy(walk);
+		goto set_phys_lowmem;
+	}
+
+	return blkcipher_next_fast(desc, walk);
+
+set_phys_lowmem:
+	if (walk->flags & BLKCIPHER_WALK_PHYS) {
+		walk->src.phys.page = virt_to_page(walk->src.virt.addr);
+		walk->dst.phys.page = virt_to_page(walk->dst.virt.addr);
+		walk->src.phys.offset &= PAGE_SIZE - 1;
+		walk->dst.phys.offset &= PAGE_SIZE - 1;
+	}
+	return err;
+}
+
+static inline int blkcipher_copy_iv(struct blkcipher_walk *walk,
+				    struct crypto_blkcipher *tfm,
+				    unsigned int alignmask)
+{
+	unsigned bs = crypto_blkcipher_blocksize(tfm);
+	unsigned int ivsize = crypto_blkcipher_ivsize(tfm);
+	unsigned int size = bs * 2 + ivsize + max(bs, ivsize) - (alignmask + 1);
+	u8 *iv;
+
+	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	walk->buffer = kmalloc(size, GFP_ATOMIC);
+	if (!walk->buffer)
+		return -ENOMEM;
+
+	iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	iv = blkcipher_get_spot(iv, bs) + bs;
+	iv = blkcipher_get_spot(iv, bs) + bs;
+	iv = blkcipher_get_spot(iv, ivsize);
+
+	walk->iv = memcpy(iv, walk->iv, ivsize);
+	return 0;
+}
+
+int blkcipher_walk_virt(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk)
+{
+	walk->flags &= ~BLKCIPHER_WALK_PHYS;
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_walk_virt);
+
+int blkcipher_walk_phys(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk)
+{
+	walk->flags |= BLKCIPHER_WALK_PHYS;
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
+
+static int blkcipher_walk_first(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct crypto_blkcipher *tfm = desc->tfm;
+	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
+
+	walk->nbytes = walk->total;
+	if (unlikely(!walk->total))
+		return 0;
+
+	walk->buffer = NULL;
+	walk->iv = desc->info;
+	if (unlikely(((unsigned long)walk->iv & alignmask))) {
+		int err = blkcipher_copy_iv(walk, tfm, alignmask);
+		if (err)
+			return err;
+	}
+
+	scatterwalk_start(&walk->in, walk->in.sg);
+	scatterwalk_start(&walk->out, walk->out.sg);
+	walk->page = NULL;
+
+	return blkcipher_walk_next(desc, walk);
+}
+
+static int setkey(struct crypto_tfm *tfm, const u8 *key,
+		  unsigned int keylen)
+{
+	struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher;
+
+	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	return cipher->setkey(tfm, key, keylen);
+}
+
+static unsigned int crypto_blkcipher_ctxsize(struct crypto_alg *alg)
+{
+	struct blkcipher_alg *cipher = &alg->cra_blkcipher;
+	unsigned int len = alg->cra_ctxsize;
+
+	if (cipher->ivsize) {
+		len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1);
+		len += cipher->ivsize;
+	}
+
+	return len;
+}
+
+static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm)
+{
+	struct blkcipher_tfm *crt = &tfm->crt_blkcipher;
+	struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
+	unsigned long align = crypto_tfm_alg_alignmask(tfm) + 1;
+	unsigned long addr;
+
+	if (alg->ivsize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	crt->setkey = setkey;
+	crt->encrypt = alg->encrypt;
+	crt->decrypt = alg->decrypt;
+
+	addr = (unsigned long)crypto_tfm_ctx(tfm);
+	addr = ALIGN(addr, align);
+	addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align);
+	crt->iv = (void *)addr;
+
+	return 0;
+}
+
+static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute_used__;
+static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_printf(m, "type         : blkcipher\n");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "min keysize  : %u\n", alg->cra_blkcipher.min_keysize);
+	seq_printf(m, "max keysize  : %u\n", alg->cra_blkcipher.max_keysize);
+	seq_printf(m, "ivsize       : %u\n", alg->cra_blkcipher.ivsize);
+}
+
+const struct crypto_type crypto_blkcipher_type = {
+	.ctxsize = crypto_blkcipher_ctxsize,
+	.init = crypto_init_blkcipher_ops,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_blkcipher_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_blkcipher_type);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic block chaining cipher type");
diff --git a/crypto/blowfish.c b/crypto/blowfish.c
index 490265f..55238c4 100644
--- a/crypto/blowfish.c
+++ b/crypto/blowfish.c
@@ -399,8 +399,7 @@
 /* 
  * Calculates the blowfish S and P boxes for encryption and decryption.
  */
-static int bf_setkey(struct crypto_tfm *tfm, const u8 *key,
-		     unsigned int keylen, u32 *flags)
+static int bf_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
 {
 	struct bf_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *P = ctx->p;
diff --git a/crypto/cast5.c b/crypto/cast5.c
index 08eef58..13ea60a 100644
--- a/crypto/cast5.c
+++ b/crypto/cast5.c
@@ -769,8 +769,7 @@
 }
 
 
-static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned key_len, u32 *flags)
+static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned key_len)
 {
 	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	int i;
@@ -778,11 +777,6 @@
 	u32 z[4];
 	u32 k[16];
 	__be32 p_key[4];
-	
-	if (key_len < 5 || key_len > 16) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	c->rr = key_len <= 10 ? 1 : 0;
 
diff --git a/crypto/cast6.c b/crypto/cast6.c
index 08e33bf..136ab6d 100644
--- a/crypto/cast6.c
+++ b/crypto/cast6.c
@@ -382,14 +382,15 @@
 }
 
 static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			unsigned key_len, u32 *flags)
+			unsigned key_len)
 {
 	int i;
 	u32 key[8];
 	__be32 p_key[8]; /* padded key */
 	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 
-	if (key_len < 16 || key_len > 32 || key_len % 4 != 0) {
+	if (key_len % 4 != 0) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}	
diff --git a/crypto/cbc.c b/crypto/cbc.c
new file mode 100644
index 0000000..f5542b4
--- /dev/null
+++ b/crypto/cbc.c
@@ -0,0 +1,344 @@
+/*
+ * CBC: Cipher Block Chaining mode
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct crypto_cbc_ctx {
+	struct crypto_cipher *child;
+	void (*xor)(u8 *dst, const u8 *src, unsigned int bs);
+};
+
+static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
+				     CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_encrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		xor(iv, src, bsize);
+		fn(crypto_cipher_tfm(tfm), dst, iv);
+		memcpy(iv, dst, bsize);
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_encrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		xor(src, iv, bsize);
+		fn(crypto_cipher_tfm(tfm), src, src);
+		iv = src;
+
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_encrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+	void (*xor)(u8 *, const u8 *, unsigned int bs) = ctx->xor;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child,
+							    xor);
+		else
+			nbytes = crypto_cbc_encrypt_segment(desc, &walk, child,
+							    xor);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_decrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		fn(crypto_cipher_tfm(tfm), dst, src);
+		xor(dst, iv, bsize);
+		iv = src;
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
+				      struct blkcipher_walk *walk,
+				      struct crypto_cipher *tfm,
+				      void (*xor)(u8 *, const u8 *,
+						  unsigned int))
+{
+	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
+		crypto_cipher_alg(tfm)->cia_decrypt;
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned long alignmask = crypto_cipher_alignmask(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 stack[bsize + alignmask];
+	u8 *first_iv = (u8 *)ALIGN((unsigned long)stack, alignmask + 1);
+
+	memcpy(first_iv, walk->iv, bsize);
+
+	/* Start of the last block. */
+	src += nbytes - nbytes % bsize - bsize;
+	memcpy(walk->iv, src, bsize);
+
+	for (;;) {
+		fn(crypto_cipher_tfm(tfm), src, src);
+		if ((nbytes -= bsize) < bsize)
+			break;
+		xor(src, src - bsize, bsize);
+		src -= bsize;
+	}
+
+	xor(src, first_iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cbc_decrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+	void (*xor)(u8 *, const u8 *, unsigned int bs) = ctx->xor;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child,
+							    xor);
+		else
+			nbytes = crypto_cbc_decrypt_segment(desc, &walk, child,
+							    xor);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static void xor_byte(u8 *a, const u8 *b, unsigned int bs)
+{
+	do {
+		*a++ ^= *b++;
+	} while (--bs);
+}
+
+static void xor_quad(u8 *dst, const u8 *src, unsigned int bs)
+{
+	u32 *a = (u32 *)dst;
+	u32 *b = (u32 *)src;
+
+	do {
+		*a++ ^= *b++;
+	} while ((bs -= 4));
+}
+
+static void xor_64(u8 *a, const u8 *b, unsigned int bs)
+{
+	((u32 *)a)[0] ^= ((u32 *)b)[0];
+	((u32 *)a)[1] ^= ((u32 *)b)[1];
+}
+
+static void xor_128(u8 *a, const u8 *b, unsigned int bs)
+{
+	((u32 *)a)[0] ^= ((u32 *)b)[0];
+	((u32 *)a)[1] ^= ((u32 *)b)[1];
+	((u32 *)a)[2] ^= ((u32 *)b)[2];
+	((u32 *)a)[3] ^= ((u32 *)b)[3];
+}
+
+static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	switch (crypto_tfm_alg_blocksize(tfm)) {
+	case 8:
+		ctx->xor = xor_64;
+		break;
+
+	case 16:
+		ctx->xor = xor_128;
+		break;
+
+	default:
+		if (crypto_tfm_alg_blocksize(tfm) % 4)
+			ctx->xor = xor_byte;
+		else
+			ctx->xor = xor_quad;
+	}
+
+	tfm = crypto_spawn_tfm(spawn);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ctx->child = crypto_cipher_cast(tfm);
+	return 0;
+}
+
+static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_cbc_alloc(void *param, unsigned int len)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+
+	alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(alg))
+		return ERR_PTR(PTR_ERR(alg));
+
+	inst = crypto_alloc_instance("cbc", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_blkcipher_type;
+
+	if (!(alg->cra_blocksize % 4))
+		inst->alg.cra_alignmask |= 3;
+	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
+	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
+
+	inst->alg.cra_init = crypto_cbc_init_tfm;
+	inst->alg.cra_exit = crypto_cbc_exit_tfm;
+
+	inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey;
+	inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt;
+	inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static void crypto_cbc_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_cbc_tmpl = {
+	.name = "cbc",
+	.alloc = crypto_cbc_alloc,
+	.free = crypto_cbc_free,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_cbc_module_init(void)
+{
+	return crypto_register_template(&crypto_cbc_tmpl);
+}
+
+static void __exit crypto_cbc_module_exit(void)
+{
+	crypto_unregister_template(&crypto_cbc_tmpl);
+}
+
+module_init(crypto_cbc_module_init);
+module_exit(crypto_cbc_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CBC block cipher algorithm");
diff --git a/crypto/cipher.c b/crypto/cipher.c
index b899eb9..9e03701 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -23,6 +23,28 @@
 #include "internal.h"
 #include "scatterwalk.h"
 
+struct cipher_alg_compat {
+	unsigned int cia_min_keysize;
+	unsigned int cia_max_keysize;
+	int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key,
+	                  unsigned int keylen);
+	void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+};
+
 static inline void xor_64(u8 *a, const u8 *b)
 {
 	((u32 *)a)[0] ^= ((u32 *)b)[0];
@@ -45,15 +67,10 @@
 	u8 buffer[bsize * 2 + alignmask];
 	u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 	u8 *dst = src + bsize;
-	unsigned int n;
 
-	n = scatterwalk_copychunks(src, in, bsize, 0);
-	scatterwalk_advance(in, n);
-
+	scatterwalk_copychunks(src, in, bsize, 0);
 	desc->prfn(desc, dst, src, bsize);
-
-	n = scatterwalk_copychunks(dst, out, bsize, 1);
-	scatterwalk_advance(out, n);
+	scatterwalk_copychunks(dst, out, bsize, 1);
 
 	return bsize;
 }
@@ -64,12 +81,16 @@
 				      unsigned int nbytes, u8 *tmp)
 {
 	u8 *src, *dst;
+	u8 *real_src, *real_dst;
 
-	src = in->data;
-	dst = scatterwalk_samebuf(in, out) ? src : out->data;
+	real_src = scatterwalk_map(in, 0);
+	real_dst = scatterwalk_map(out, 1);
+
+	src = real_src;
+	dst = scatterwalk_samebuf(in, out) ? src : real_dst;
 
 	if (tmp) {
-		memcpy(tmp, in->data, nbytes);
+		memcpy(tmp, src, nbytes);
 		src = tmp;
 		dst = tmp;
 	}
@@ -77,7 +98,10 @@
 	nbytes = desc->prfn(desc, dst, src, nbytes);
 
 	if (tmp)
-		memcpy(out->data, tmp, nbytes);
+		memcpy(real_dst, tmp, nbytes);
+
+	scatterwalk_unmap(real_src, 0);
+	scatterwalk_unmap(real_dst, 1);
 
 	scatterwalk_advance(in, nbytes);
 	scatterwalk_advance(out, nbytes);
@@ -126,9 +150,6 @@
 			tmp = (u8 *)buffer;
 		}
 
-		scatterwalk_map(&walk_in, 0);
-		scatterwalk_map(&walk_out, 1);
-
 		n = scatterwalk_clamp(&walk_in, n);
 		n = scatterwalk_clamp(&walk_out, n);
 
@@ -145,7 +166,7 @@
 		if (!nbytes)
 			break;
 
-		crypto_yield(tfm);
+		crypto_yield(tfm->crt_flags);
 	}
 
 	if (buffer)
@@ -264,12 +285,12 @@
 {
 	struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher;
 	
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
 	if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	} else
-		return cia->cia_setkey(tfm, key, keylen,
-		                       &tfm->crt_flags);
+		return cia->cia_setkey(tfm, key, keylen);
 }
 
 static int ecb_encrypt(struct crypto_tfm *tfm,
@@ -277,7 +298,7 @@
                        struct scatterlist *src, unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_encrypt;
@@ -292,7 +313,7 @@
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_decrypt;
@@ -307,7 +328,7 @@
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_encrypt;
@@ -323,7 +344,7 @@
                           unsigned int nbytes, u8 *iv)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_encrypt;
@@ -339,7 +360,7 @@
 		       unsigned int nbytes)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_decrypt;
@@ -355,7 +376,7 @@
                           unsigned int nbytes, u8 *iv)
 {
 	struct cipher_desc desc;
-	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+	struct cipher_alg_compat *cipher = (void *)&tfm->__crt_alg->cra_cipher;
 
 	desc.tfm = tfm;
 	desc.crfn = cipher->cia_decrypt;
@@ -388,17 +409,67 @@
 	return 0;
 }
 
+static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *,
+					      const u8 *),
+				   struct crypto_tfm *tfm,
+				   u8 *dst, const u8 *src)
+{
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	unsigned int size = crypto_tfm_alg_blocksize(tfm);
+	u8 buffer[size + alignmask];
+	u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+
+	memcpy(tmp, src, size);
+	fn(tfm, tmp, tmp);
+	memcpy(dst, tmp, size);
+}
+
+static void cipher_encrypt_unaligned(struct crypto_tfm *tfm,
+				     u8 *dst, const u8 *src)
+{
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+
+	if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
+		cipher_crypt_unaligned(cipher->cia_encrypt, tfm, dst, src);
+		return;
+	}
+
+	cipher->cia_encrypt(tfm, dst, src);
+}
+
+static void cipher_decrypt_unaligned(struct crypto_tfm *tfm,
+				     u8 *dst, const u8 *src)
+{
+	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
+
+	if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
+		cipher_crypt_unaligned(cipher->cia_decrypt, tfm, dst, src);
+		return;
+	}
+
+	cipher->cia_decrypt(tfm, dst, src);
+}
+
 int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 {
 	int ret = 0;
 	struct cipher_tfm *ops = &tfm->crt_cipher;
+	struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
 
 	ops->cit_setkey = setkey;
+	ops->cit_encrypt_one = crypto_tfm_alg_alignmask(tfm) ?
+		cipher_encrypt_unaligned : cipher->cia_encrypt;
+	ops->cit_decrypt_one = crypto_tfm_alg_alignmask(tfm) ?
+		cipher_decrypt_unaligned : cipher->cia_decrypt;
 
 	switch (tfm->crt_cipher.cit_mode) {
 	case CRYPTO_TFM_MODE_ECB:
 		ops->cit_encrypt = ecb_encrypt;
 		ops->cit_decrypt = ecb_decrypt;
+		ops->cit_encrypt_iv = nocrypt_iv;
+		ops->cit_decrypt_iv = nocrypt_iv;
 		break;
 		
 	case CRYPTO_TFM_MODE_CBC:
diff --git a/crypto/crc32c.c b/crypto/crc32c.c
index f266012..0fa7443 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c.c
@@ -16,14 +16,14 @@
 #include <linux/string.h>
 #include <linux/crypto.h>
 #include <linux/crc32c.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
 
 #define CHKSUM_BLOCK_SIZE	32
 #define CHKSUM_DIGEST_SIZE	4
 
 struct chksum_ctx {
 	u32 crc;
+	u32 key;
 };
 
 /*
@@ -35,7 +35,7 @@
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
-	mctx->crc = ~(u32)0;			/* common usage */
+	mctx->crc = mctx->key;
 }
 
 /*
@@ -44,16 +44,15 @@
  * the seed.
  */
 static int chksum_setkey(struct crypto_tfm *tfm, const u8 *key,
-			 unsigned int keylen, u32 *flags)
+			 unsigned int keylen)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
 	if (keylen != sizeof(mctx->crc)) {
-		if (flags)
-			*flags = CRYPTO_TFM_RES_BAD_KEY_LEN;
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
-	mctx->crc = __cpu_to_le32(*(u32 *)key);
+	mctx->key = le32_to_cpu(*(__le32 *)key);
 	return 0;
 }
 
@@ -61,19 +60,23 @@
 			  unsigned int length)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-	u32 mcrc;
 
-	mcrc = crc32c(mctx->crc, data, (size_t)length);
-
-	mctx->crc = mcrc;
+	mctx->crc = crc32c(mctx->crc, data, length);
 }
 
 static void chksum_final(struct crypto_tfm *tfm, u8 *out)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-	u32 mcrc = (mctx->crc ^ ~(u32)0);
 	
-	*(u32 *)out = __le32_to_cpu(mcrc);
+	*(__le32 *)out = ~cpu_to_le32(mctx->crc);
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
 }
 
 static struct crypto_alg alg = {
@@ -83,6 +86,7 @@
 	.cra_ctxsize	=	sizeof(struct chksum_ctx),
 	.cra_module	=	THIS_MODULE,
 	.cra_list	=	LIST_HEAD_INIT(alg.cra_list),
+	.cra_init	=	crc32c_cra_init,
 	.cra_u		=	{
 		.digest = {
 			 .dia_digestsize=	CHKSUM_DIGEST_SIZE,
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index a0d956b..24dbb5d 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -48,7 +48,7 @@
 { }
 
 static int null_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen, u32 *flags)
+		       unsigned int keylen)
 { return 0; }
 
 static void null_crypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
new file mode 100644
index 0000000..9b5b156
--- /dev/null
+++ b/crypto/cryptomgr.c
@@ -0,0 +1,156 @@
+/*
+ * Create default crypto algorithm instances.
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/ctype.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+
+#include "internal.h"
+
+struct cryptomgr_param {
+	struct work_struct work;
+
+	struct {
+		struct rtattr attr;
+		struct crypto_attr_alg data;
+	} alg;
+
+	struct {
+		u32 type;
+		u32 mask;
+		char name[CRYPTO_MAX_ALG_NAME];
+	} larval;
+
+	char template[CRYPTO_MAX_ALG_NAME];
+};
+
+static void cryptomgr_probe(void *data)
+{
+	struct cryptomgr_param *param = data;
+	struct crypto_template *tmpl;
+	struct crypto_instance *inst;
+	int err;
+
+	tmpl = crypto_lookup_template(param->template);
+	if (!tmpl)
+		goto err;
+
+	do {
+		inst = tmpl->alloc(&param->alg, sizeof(param->alg));
+		if (IS_ERR(inst))
+			err = PTR_ERR(inst);
+		else if ((err = crypto_register_instance(tmpl, inst)))
+			tmpl->free(inst);
+	} while (err == -EAGAIN && !signal_pending(current));
+
+	crypto_tmpl_put(tmpl);
+
+	if (err)
+		goto err;
+
+out:
+	kfree(param);
+	return;
+
+err:
+	crypto_larval_error(param->larval.name, param->larval.type,
+			    param->larval.mask);
+	goto out;
+}
+
+static int cryptomgr_schedule_probe(struct crypto_larval *larval)
+{
+	struct cryptomgr_param *param;
+	const char *name = larval->alg.cra_name;
+	const char *p;
+	unsigned int len;
+
+	param = kmalloc(sizeof(*param), GFP_KERNEL);
+	if (!param)
+		goto err;
+
+	for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++)
+		;
+
+	len = p - name;
+	if (!len || *p != '(')
+		goto err_free_param;
+
+	memcpy(param->template, name, len);
+	param->template[len] = 0;
+
+	name = p + 1;
+	for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++)
+		;
+
+	len = p - name;
+	if (!len || *p != ')' || p[1])
+		goto err_free_param;
+
+	param->alg.attr.rta_len = sizeof(param->alg);
+	param->alg.attr.rta_type = CRYPTOA_ALG;
+	memcpy(param->alg.data.name, name, len);
+	param->alg.data.name[len] = 0;
+
+	memcpy(param->larval.name, larval->alg.cra_name, CRYPTO_MAX_ALG_NAME);
+	param->larval.type = larval->alg.cra_flags;
+	param->larval.mask = larval->mask;
+
+	INIT_WORK(&param->work, cryptomgr_probe, param);
+	schedule_work(&param->work);
+
+	return NOTIFY_STOP;
+
+err_free_param:
+	kfree(param);
+err:
+	return NOTIFY_OK;
+}
+
+static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
+			    void *data)
+{
+	switch (msg) {
+	case CRYPTO_MSG_ALG_REQUEST:
+		return cryptomgr_schedule_probe(data);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cryptomgr_notifier = {
+	.notifier_call = cryptomgr_notify,
+};
+
+static int __init cryptomgr_init(void)
+{
+	return crypto_register_notifier(&cryptomgr_notifier);
+}
+
+static void __exit cryptomgr_exit(void)
+{
+	int err = crypto_unregister_notifier(&cryptomgr_notifier);
+	BUG_ON(err);
+}
+
+module_init(cryptomgr_init);
+module_exit(cryptomgr_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crypto Algorithm Manager");
diff --git a/crypto/des.c b/crypto/des.c
index a9d3c23..1df3a71 100644
--- a/crypto/des.c
+++ b/crypto/des.c
@@ -784,9 +784,10 @@
 }
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
-		      unsigned int keylen, u32 *flags)
+		      unsigned int keylen)
 {
 	struct des_ctx *dctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 	int ret;
 
@@ -864,11 +865,12 @@
  *
  */
 static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen, u32 *flags)
+			   unsigned int keylen)
 {
 	const u32 *K = (const u32 *)key;
 	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 	u32 *expkey = dctx->expkey;
+	u32 *flags = &tfm->crt_flags;
 
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
 		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))))
diff --git a/crypto/digest.c b/crypto/digest.c
index 603006a..0155a94 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -11,29 +11,89 @@
  * any later version.
  *
  */
-#include <linux/crypto.h>
+
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/highmem.h>
-#include <asm/scatterlist.h>
-#include "internal.h"
+#include <linux/module.h>
+#include <linux/scatterlist.h>
 
-static void init(struct crypto_tfm *tfm)
+#include "internal.h"
+#include "scatterwalk.h"
+
+void crypto_digest_init(struct crypto_tfm *tfm)
 {
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+
+	crypto_hash_init(&desc);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_init);
+
+void crypto_digest_update(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg)
+{
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+	unsigned int nbytes = 0;
+	unsigned int i;
+
+	for (i = 0; i < nsg; i++)
+		nbytes += sg[i].length;
+
+	crypto_hash_update(&desc, sg, nbytes);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_update);
+
+void crypto_digest_final(struct crypto_tfm *tfm, u8 *out)
+{
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+
+	crypto_hash_final(&desc, out);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_final);
+
+void crypto_digest_digest(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg, u8 *out)
+{
+	struct crypto_hash *hash = crypto_hash_cast(tfm);
+	struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags };
+	unsigned int nbytes = 0;
+	unsigned int i;
+
+	for (i = 0; i < nsg; i++)
+		nbytes += sg[i].length;
+
+	crypto_hash_digest(&desc, sg, nbytes, out);
+}
+EXPORT_SYMBOL_GPL(crypto_digest_digest);
+
+static int init(struct hash_desc *desc)
+{
+	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
+
 	tfm->__crt_alg->cra_digest.dia_init(tfm);
+	return 0;
 }
 
-static void update(struct crypto_tfm *tfm,
-                   struct scatterlist *sg, unsigned int nsg)
+static int update(struct hash_desc *desc,
+		  struct scatterlist *sg, unsigned int nbytes)
 {
-	unsigned int i;
+	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
 	unsigned int alignmask = crypto_tfm_alg_alignmask(tfm);
 
-	for (i = 0; i < nsg; i++) {
+	if (!nbytes)
+		return 0;
 
-		struct page *pg = sg[i].page;
-		unsigned int offset = sg[i].offset;
-		unsigned int l = sg[i].length;
+	for (;;) {
+		struct page *pg = sg->page;
+		unsigned int offset = sg->offset;
+		unsigned int l = sg->length;
+
+		if (unlikely(l > nbytes))
+			l = nbytes;
+		nbytes -= l;
 
 		do {
 			unsigned int bytes_from_page = min(l, ((unsigned int)
@@ -55,41 +115,60 @@
 			tfm->__crt_alg->cra_digest.dia_update(tfm, p,
 							      bytes_from_page);
 			crypto_kunmap(src, 0);
-			crypto_yield(tfm);
+			crypto_yield(desc->flags);
 			offset = 0;
 			pg++;
 			l -= bytes_from_page;
 		} while (l > 0);
+
+		if (!nbytes)
+			break;
+		sg = sg_next(sg);
 	}
+
+	return 0;
 }
 
-static void final(struct crypto_tfm *tfm, u8 *out)
+static int final(struct hash_desc *desc, u8 *out)
 {
+	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
 	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
+	struct digest_alg *digest = &tfm->__crt_alg->cra_digest;
+
 	if (unlikely((unsigned long)out & alignmask)) {
-		unsigned int size = crypto_tfm_alg_digestsize(tfm);
-		u8 buffer[size + alignmask];
-		u8 *dst = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-		tfm->__crt_alg->cra_digest.dia_final(tfm, dst);
-		memcpy(out, dst, size);
+		unsigned long align = alignmask + 1;
+		unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
+		u8 *dst = (u8 *)ALIGN(addr, align) +
+			  ALIGN(tfm->__crt_alg->cra_ctxsize, align);
+
+		digest->dia_final(tfm, dst);
+		memcpy(out, dst, digest->dia_digestsize);
 	} else
-		tfm->__crt_alg->cra_digest.dia_final(tfm, out);
+		digest->dia_final(tfm, out);
+
+	return 0;
 }
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen)
 {
-	u32 flags;
-	if (tfm->__crt_alg->cra_digest.dia_setkey == NULL)
-		return -ENOSYS;
-	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen, &flags);
+	crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK);
+	return -ENOSYS;
 }
 
-static void digest(struct crypto_tfm *tfm,
-                   struct scatterlist *sg, unsigned int nsg, u8 *out)
+static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen)
 {
-	init(tfm);
-	update(tfm, sg, nsg);
-	final(tfm, out);
+	struct crypto_tfm *tfm = crypto_hash_tfm(hash);
+
+	crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK);
+	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen);
+}
+
+static int digest(struct hash_desc *desc,
+		  struct scatterlist *sg, unsigned int nbytes, u8 *out)
+{
+	init(desc);
+	update(desc, sg, nbytes);
+	return final(desc, out);
 }
 
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags)
@@ -99,18 +178,22 @@
 
 int crypto_init_digest_ops(struct crypto_tfm *tfm)
 {
-	struct digest_tfm *ops = &tfm->crt_digest;
+	struct hash_tfm *ops = &tfm->crt_hash;
+	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
+
+	if (dalg->dia_digestsize > crypto_tfm_alg_blocksize(tfm))
+		return -EINVAL;
 	
-	ops->dit_init	= init;
-	ops->dit_update	= update;
-	ops->dit_final	= final;
-	ops->dit_digest	= digest;
-	ops->dit_setkey	= setkey;
+	ops->init	= init;
+	ops->update	= update;
+	ops->final	= final;
+	ops->digest	= digest;
+	ops->setkey	= dalg->dia_setkey ? setkey : nosetkey;
+	ops->digestsize	= dalg->dia_digestsize;
 	
-	return crypto_alloc_hmac_block(tfm);
+	return 0;
 }
 
 void crypto_exit_digest_ops(struct crypto_tfm *tfm)
 {
-	crypto_free_hmac_block(tfm);
 }
diff --git a/crypto/ecb.c b/crypto/ecb.c
new file mode 100644
index 0000000..f239aa9
--- /dev/null
+++ b/crypto/ecb.c
@@ -0,0 +1,181 @@
+/*
+ * ECB: Electronic CodeBook mode
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+struct crypto_ecb_ctx {
+	struct crypto_cipher *child;
+};
+
+static int crypto_ecb_setkey(struct crypto_tfm *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
+				     CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_ecb_crypt(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk,
+			    struct crypto_cipher *tfm,
+			    void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
+{
+	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		do {
+			fn(crypto_cipher_tfm(tfm), wdst, wsrc);
+	
+			wsrc += bsize;
+			wdst += bsize;
+		} while ((nbytes -= bsize) >= bsize);
+
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int crypto_ecb_encrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_ecb_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return crypto_ecb_crypt(desc, &walk, child,
+				crypto_cipher_alg(child)->cia_encrypt);
+}
+
+static int crypto_ecb_decrypt(struct blkcipher_desc *desc,
+			      struct scatterlist *dst, struct scatterlist *src,
+			      unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	struct crypto_blkcipher *tfm = desc->tfm;
+	struct crypto_ecb_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return crypto_ecb_crypt(desc, &walk, child,
+				crypto_cipher_alg(child)->cia_decrypt);
+}
+
+static int crypto_ecb_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	tfm = crypto_spawn_tfm(spawn);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ctx->child = crypto_cipher_cast(tfm);
+	return 0;
+}
+
+static void crypto_ecb_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_ecb_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_ecb_alloc(void *param, unsigned int len)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+
+	alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(alg))
+		return ERR_PTR(PTR_ERR(alg));
+
+	inst = crypto_alloc_instance("ecb", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_blkcipher_type;
+
+	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.cra_ctxsize = sizeof(struct crypto_ecb_ctx);
+
+	inst->alg.cra_init = crypto_ecb_init_tfm;
+	inst->alg.cra_exit = crypto_ecb_exit_tfm;
+
+	inst->alg.cra_blkcipher.setkey = crypto_ecb_setkey;
+	inst->alg.cra_blkcipher.encrypt = crypto_ecb_encrypt;
+	inst->alg.cra_blkcipher.decrypt = crypto_ecb_decrypt;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static void crypto_ecb_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_ecb_tmpl = {
+	.name = "ecb",
+	.alloc = crypto_ecb_alloc,
+	.free = crypto_ecb_free,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_ecb_module_init(void)
+{
+	return crypto_register_template(&crypto_ecb_tmpl);
+}
+
+static void __exit crypto_ecb_module_exit(void)
+{
+	crypto_unregister_template(&crypto_ecb_tmpl);
+}
+
+module_init(crypto_ecb_module_init);
+module_exit(crypto_ecb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ECB block cipher algorithm");
diff --git a/crypto/hash.c b/crypto/hash.c
new file mode 100644
index 0000000..cdec23d
--- /dev/null
+++ b/crypto/hash.c
@@ -0,0 +1,61 @@
+/*
+ * Cryptographic Hash operations.
+ * 
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+
+static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg)
+{
+	return alg->cra_ctxsize;
+}
+
+static int crypto_init_hash_ops(struct crypto_tfm *tfm)
+{
+	struct hash_tfm *crt = &tfm->crt_hash;
+	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
+
+	if (alg->digestsize > crypto_tfm_alg_blocksize(tfm))
+		return -EINVAL;
+
+	crt->init = alg->init;
+	crt->update = alg->update;
+	crt->final = alg->final;
+	crt->digest = alg->digest;
+	crt->setkey = alg->setkey;
+	crt->digestsize = alg->digestsize;
+
+	return 0;
+}
+
+static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute_used__;
+static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_printf(m, "type         : hash\n");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "digestsize   : %u\n", alg->cra_hash.digestsize);
+}
+
+const struct crypto_type crypto_hash_type = {
+	.ctxsize = crypto_hash_ctxsize,
+	.init = crypto_init_hash_ops,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_hash_show,
+#endif
+};
+EXPORT_SYMBOL_GPL(crypto_hash_type);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic cryptographic hash type");
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 46120de..b521bcd 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -4,121 +4,261 @@
  * HMAC: Keyed-Hashing for Message Authentication (RFC2104).
  *
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * The HMAC implementation is derived from USAGI.
  * Copyright (c) 2002 Kazunori Miyazawa <miyazawa@linux-ipv6.org> / USAGI
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
+ * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
  */
-#include <linux/crypto.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/scatterlist.h>
-#include "internal.h"
+#include <linux/slab.h>
+#include <linux/string.h>
 
-static void hash_key(struct crypto_tfm *tfm, u8 *key, unsigned int keylen)
+struct hmac_ctx {
+	struct crypto_hash *child;
+};
+
+static inline void *align_ptr(void *p, unsigned int align)
 {
-	struct scatterlist tmp;
-	
-	sg_set_buf(&tmp, key, keylen);
-	crypto_digest_digest(tfm, &tmp, 1, key);
+	return (void *)ALIGN((unsigned long)p, align);
 }
 
-int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
+static inline struct hmac_ctx *hmac_ctx(struct crypto_hash *tfm)
 {
-	int ret = 0;
-
-	BUG_ON(!crypto_tfm_alg_blocksize(tfm));
-	
-	tfm->crt_digest.dit_hmac_block = kmalloc(crypto_tfm_alg_blocksize(tfm),
-	                                         GFP_KERNEL);
-	if (tfm->crt_digest.dit_hmac_block == NULL)
-		ret = -ENOMEM;
-
-	return ret;
-		
+	return align_ptr(crypto_hash_ctx_aligned(tfm) +
+			 crypto_hash_blocksize(tfm) * 2 +
+			 crypto_hash_digestsize(tfm), sizeof(void *));
 }
 
-void crypto_free_hmac_block(struct crypto_tfm *tfm)
+static int hmac_setkey(struct crypto_hash *parent,
+		       const u8 *inkey, unsigned int keylen)
 {
-	kfree(tfm->crt_digest.dit_hmac_block);
-}
-
-void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen)
-{
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *ipad = crypto_hash_ctx_aligned(parent);
+	char *opad = ipad + bs;
+	char *digest = opad + bs;
+	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
+	struct crypto_hash *tfm = ctx->child;
 	unsigned int i;
-	struct scatterlist tmp;
-	char *ipad = tfm->crt_digest.dit_hmac_block;
-	
-	if (*keylen > crypto_tfm_alg_blocksize(tfm)) {
-		hash_key(tfm, key, *keylen);
-		*keylen = crypto_tfm_alg_digestsize(tfm);
+
+	if (keylen > bs) {
+		struct hash_desc desc;
+		struct scatterlist tmp;
+		int err;
+
+		desc.tfm = tfm;
+		desc.flags = crypto_hash_get_flags(parent);
+		desc.flags &= CRYPTO_TFM_REQ_MAY_SLEEP;
+		sg_set_buf(&tmp, inkey, keylen);
+
+		err = crypto_hash_digest(&desc, &tmp, keylen, digest);
+		if (err)
+			return err;
+
+		inkey = digest;
+		keylen = ds;
 	}
 
-	memset(ipad, 0, crypto_tfm_alg_blocksize(tfm));
-	memcpy(ipad, key, *keylen);
+	memcpy(ipad, inkey, keylen);
+	memset(ipad + keylen, 0, bs - keylen);
+	memcpy(opad, ipad, bs);
 
-	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
+	for (i = 0; i < bs; i++) {
 		ipad[i] ^= 0x36;
-
-	sg_set_buf(&tmp, ipad, crypto_tfm_alg_blocksize(tfm));
-	
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &tmp, 1);
-}
-
-void crypto_hmac_update(struct crypto_tfm *tfm,
-                        struct scatterlist *sg, unsigned int nsg)
-{
-	crypto_digest_update(tfm, sg, nsg);
-}
-
-void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key,
-                       unsigned int *keylen, u8 *out)
-{
-	unsigned int i;
-	struct scatterlist tmp;
-	char *opad = tfm->crt_digest.dit_hmac_block;
-	
-	if (*keylen > crypto_tfm_alg_blocksize(tfm)) {
-		hash_key(tfm, key, *keylen);
-		*keylen = crypto_tfm_alg_digestsize(tfm);
+		opad[i] ^= 0x5c;
 	}
 
-	crypto_digest_final(tfm, out);
-
-	memset(opad, 0, crypto_tfm_alg_blocksize(tfm));
-	memcpy(opad, key, *keylen);
-		
-	for (i = 0; i < crypto_tfm_alg_blocksize(tfm); i++)
-		opad[i] ^= 0x5c;
-
-	sg_set_buf(&tmp, opad, crypto_tfm_alg_blocksize(tfm));
-
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &tmp, 1);
-	
-	sg_set_buf(&tmp, out, crypto_tfm_alg_digestsize(tfm));
-	
-	crypto_digest_update(tfm, &tmp, 1);
-	crypto_digest_final(tfm, out);
+	return 0;
 }
 
-void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen,
-                 struct scatterlist *sg, unsigned int nsg, u8 *out)
+static int hmac_init(struct hash_desc *pdesc)
 {
-	crypto_hmac_init(tfm, key, keylen);
-	crypto_hmac_update(tfm, sg, nsg);
-	crypto_hmac_final(tfm, key, keylen, out);
+	struct crypto_hash *parent = pdesc->tfm;
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *ipad = crypto_hash_ctx_aligned(parent);
+	struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *));
+	struct hash_desc desc;
+	struct scatterlist tmp;
+	int err;
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	sg_set_buf(&tmp, ipad, bs);
+
+	err = crypto_hash_init(&desc);
+	if (unlikely(err))
+		return err;
+
+	return crypto_hash_update(&desc, &tmp, bs);
 }
 
-EXPORT_SYMBOL_GPL(crypto_hmac_init);
-EXPORT_SYMBOL_GPL(crypto_hmac_update);
-EXPORT_SYMBOL_GPL(crypto_hmac_final);
-EXPORT_SYMBOL_GPL(crypto_hmac);
+static int hmac_update(struct hash_desc *pdesc,
+		       struct scatterlist *sg, unsigned int nbytes)
+{
+	struct hmac_ctx *ctx = hmac_ctx(pdesc->tfm);
+	struct hash_desc desc;
 
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_hash_update(&desc, sg, nbytes);
+}
+
+static int hmac_final(struct hash_desc *pdesc, u8 *out)
+{
+	struct crypto_hash *parent = pdesc->tfm;
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *opad = crypto_hash_ctx_aligned(parent) + bs;
+	char *digest = opad + bs;
+	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
+	struct hash_desc desc;
+	struct scatterlist tmp;
+	int err;
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+	sg_set_buf(&tmp, opad, bs + ds);
+
+	err = crypto_hash_final(&desc, digest);
+	if (unlikely(err))
+		return err;
+
+	return crypto_hash_digest(&desc, &tmp, bs + ds, out);
+}
+
+static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
+		       unsigned int nbytes, u8 *out)
+{
+	struct crypto_hash *parent = pdesc->tfm;
+	int bs = crypto_hash_blocksize(parent);
+	int ds = crypto_hash_digestsize(parent);
+	char *ipad = crypto_hash_ctx_aligned(parent);
+	char *opad = ipad + bs;
+	char *digest = opad + bs;
+	struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
+	struct hash_desc desc;
+	struct scatterlist sg1[2];
+	struct scatterlist sg2[1];
+	int err;
+
+	desc.tfm = ctx->child;
+	desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	sg_set_buf(sg1, ipad, bs);
+	sg1[1].page = (void *)sg;
+	sg1[1].length = 0;
+	sg_set_buf(sg2, opad, bs + ds);
+
+	err = crypto_hash_digest(&desc, sg1, nbytes + bs, digest);
+	if (unlikely(err))
+		return err;
+
+	return crypto_hash_digest(&desc, sg2, bs + ds, out);
+}
+
+static int hmac_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm));
+
+	tfm = crypto_spawn_tfm(spawn);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ctx->child = crypto_hash_cast(tfm);
+	return 0;
+}
+
+static void hmac_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct hmac_ctx *ctx = hmac_ctx(__crypto_hash_cast(tfm));
+	crypto_free_hash(ctx->child);
+}
+
+static void hmac_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_instance *hmac_alloc(void *param, unsigned int len)
+{
+	struct crypto_instance *inst;
+	struct crypto_alg *alg;
+
+	alg = crypto_get_attr_alg(param, len, CRYPTO_ALG_TYPE_HASH,
+				  CRYPTO_ALG_TYPE_HASH_MASK | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(alg))
+		return ERR_PTR(PTR_ERR(alg));
+
+	inst = crypto_alloc_instance("hmac", alg);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_HASH;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_hash_type;
+
+	inst->alg.cra_hash.digestsize =
+		(alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+		CRYPTO_ALG_TYPE_HASH ? alg->cra_hash.digestsize :
+				       alg->cra_digest.dia_digestsize;
+
+	inst->alg.cra_ctxsize = sizeof(struct hmac_ctx) +
+				ALIGN(inst->alg.cra_blocksize * 2 +
+				      inst->alg.cra_hash.digestsize,
+				      sizeof(void *));
+
+	inst->alg.cra_init = hmac_init_tfm;
+	inst->alg.cra_exit = hmac_exit_tfm;
+
+	inst->alg.cra_hash.init = hmac_init;
+	inst->alg.cra_hash.update = hmac_update;
+	inst->alg.cra_hash.final = hmac_final;
+	inst->alg.cra_hash.digest = hmac_digest;
+	inst->alg.cra_hash.setkey = hmac_setkey;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return inst;
+}
+
+static struct crypto_template hmac_tmpl = {
+	.name = "hmac",
+	.alloc = hmac_alloc,
+	.free = hmac_free,
+	.module = THIS_MODULE,
+};
+
+static int __init hmac_module_init(void)
+{
+	return crypto_register_template(&hmac_tmpl);
+}
+
+static void __exit hmac_module_exit(void)
+{
+	crypto_unregister_template(&hmac_tmpl);
+}
+
+module_init(hmac_module_init);
+module_exit(hmac_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HMAC hash algorithm");
diff --git a/crypto/internal.h b/crypto/internal.h
index 959e602..2da6ad4 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -12,19 +12,43 @@
  */
 #ifndef _CRYPTO_INTERNAL_H
 #define _CRYPTO_INTERNAL_H
-#include <linux/crypto.h>
+
+#include <crypto/algapi.h>
+#include <linux/completion.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/notifier.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <asm/kmap_types.h>
 
+/* Crypto notification events. */
+enum {
+	CRYPTO_MSG_ALG_REQUEST,
+	CRYPTO_MSG_ALG_REGISTER,
+	CRYPTO_MSG_ALG_UNREGISTER,
+	CRYPTO_MSG_TMPL_REGISTER,
+	CRYPTO_MSG_TMPL_UNREGISTER,
+};
+
+struct crypto_instance;
+struct crypto_template;
+
+struct crypto_larval {
+	struct crypto_alg alg;
+	struct crypto_alg *adult;
+	struct completion completion;
+	u32 mask;
+};
+
 extern struct list_head crypto_alg_list;
 extern struct rw_semaphore crypto_alg_sem;
+extern struct blocking_notifier_head crypto_chain;
 
 extern enum km_type crypto_km_types[];
 
@@ -43,36 +67,33 @@
 	kunmap_atomic(vaddr, crypto_kmap_type(out));
 }
 
-static inline void crypto_yield(struct crypto_tfm *tfm)
+static inline void crypto_yield(u32 flags)
 {
-	if (tfm->crt_flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+	if (flags & CRYPTO_TFM_REQ_MAY_SLEEP)
 		cond_resched();
 }
 
-#ifdef CONFIG_CRYPTO_HMAC
-int crypto_alloc_hmac_block(struct crypto_tfm *tfm);
-void crypto_free_hmac_block(struct crypto_tfm *tfm);
-#else
-static inline int crypto_alloc_hmac_block(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
-static inline void crypto_free_hmac_block(struct crypto_tfm *tfm)
-{ }
-#endif
-
 #ifdef CONFIG_PROC_FS
 void __init crypto_init_proc(void);
+void __exit crypto_exit_proc(void);
 #else
 static inline void crypto_init_proc(void)
 { }
+static inline void crypto_exit_proc(void)
+{ }
 #endif
 
 static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg,
 						 int flags)
 {
-	return alg->cra_ctxsize;
+	unsigned int len = alg->cra_ctxsize;
+
+	if (alg->cra_alignmask) {
+		len = ALIGN(len, (unsigned long)alg->cra_alignmask + 1);
+		len += alg->cra_digest.dia_digestsize;
+	}
+
+	return len;
 }
 
 static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg,
@@ -96,6 +117,10 @@
 	return alg->cra_ctxsize;
 }
 
+struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
+struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask);
+struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
+
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags);
@@ -108,5 +133,52 @@
 void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
 void crypto_exit_compress_ops(struct crypto_tfm *tfm);
 
+void crypto_larval_error(const char *name, u32 type, u32 mask);
+
+void crypto_shoot_alg(struct crypto_alg *alg);
+struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 flags);
+
+int crypto_register_instance(struct crypto_template *tmpl,
+			     struct crypto_instance *inst);
+
+int crypto_register_notifier(struct notifier_block *nb);
+int crypto_unregister_notifier(struct notifier_block *nb);
+
+static inline void crypto_alg_put(struct crypto_alg *alg)
+{
+	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
+		alg->cra_destroy(alg);
+}
+
+static inline int crypto_tmpl_get(struct crypto_template *tmpl)
+{
+	return try_module_get(tmpl->module);
+}
+
+static inline void crypto_tmpl_put(struct crypto_template *tmpl)
+{
+	module_put(tmpl->module);
+}
+
+static inline int crypto_is_larval(struct crypto_alg *alg)
+{
+	return alg->cra_flags & CRYPTO_ALG_LARVAL;
+}
+
+static inline int crypto_is_dead(struct crypto_alg *alg)
+{
+	return alg->cra_flags & CRYPTO_ALG_DEAD;
+}
+
+static inline int crypto_is_moribund(struct crypto_alg *alg)
+{
+	return alg->cra_flags & (CRYPTO_ALG_DEAD | CRYPTO_ALG_DYING);
+}
+
+static inline int crypto_notify(unsigned long val, void *v)
+{
+	return blocking_notifier_call_chain(&crypto_chain, val, v);
+}
+
 #endif	/* _CRYPTO_INTERNAL_H */
 
diff --git a/crypto/khazad.c b/crypto/khazad.c
index d4c9d36..9fa24a2 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -755,19 +755,13 @@
 };
 
 static int khazad_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			 unsigned int key_len, u32 *flags)
+			 unsigned int key_len)
 {
 	struct khazad_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __be32 *key = (const __be32 *)in_key;
 	int r;
 	const u64 *S = T7;
 	u64 K2, K1;
-	
-	if (key_len != 16)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	/* key is supposed to be 32-bit aligned */
 	K2 = ((u64)be32_to_cpu(key[0]) << 32) | be32_to_cpu(key[1]);
diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c
index d061da2..094397b 100644
--- a/crypto/michael_mic.c
+++ b/crypto/michael_mic.c
@@ -123,14 +123,13 @@
 
 
 static int michael_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen, u32 *flags)
+			  unsigned int keylen)
 {
 	struct michael_mic_ctx *mctx = crypto_tfm_ctx(tfm);
 	const __le32 *data = (const __le32 *)key;
 
 	if (keylen != 8) {
-		if (flags)
-			*flags = CRYPTO_TFM_RES_BAD_KEY_LEN;
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
 
diff --git a/crypto/proc.c b/crypto/proc.c
index c0a5dd7..dabce06 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -12,6 +12,8 @@
  * any later version.
  *
  */
+
+#include <asm/atomic.h>
 #include <linux/init.h>
 #include <linux/crypto.h>
 #include <linux/rwsem.h>
@@ -54,6 +56,7 @@
 	seq_printf(m, "driver       : %s\n", alg->cra_driver_name);
 	seq_printf(m, "module       : %s\n", module_name(alg->cra_module));
 	seq_printf(m, "priority     : %d\n", alg->cra_priority);
+	seq_printf(m, "refcnt       : %d\n", atomic_read(&alg->cra_refcnt));
 	
 	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_CIPHER:
@@ -75,7 +78,10 @@
 		seq_printf(m, "type         : compression\n");
 		break;
 	default:
-		seq_printf(m, "type         : unknown\n");
+		if (alg->cra_type && alg->cra_type->show)
+			alg->cra_type->show(m, alg);
+		else
+			seq_printf(m, "type         : unknown\n");
 		break;
 	}
 
@@ -110,3 +116,8 @@
 	if (proc)
 		proc->proc_fops = &proc_crypto_ops;
 }
+
+void __exit crypto_exit_proc(void)
+{
+	remove_proc_entry("crypto", NULL);
+}
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 2953e2c..35172d3 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -15,9 +15,11 @@
  */
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
+
 #include "internal.h"
 #include "scatterwalk.h"
 
@@ -27,88 +29,77 @@
 	KM_SOFTIRQ0,
 	KM_SOFTIRQ1,
 };
+EXPORT_SYMBOL_GPL(crypto_km_types);
 
-static void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out)
+static inline void memcpy_dir(void *buf, void *sgdata, size_t nbytes, int out)
 {
-	if (out)
-		memcpy(sgdata, buf, nbytes);
-	else
-		memcpy(buf, sgdata, nbytes);
+	void *src = out ? buf : sgdata;
+	void *dst = out ? sgdata : buf;
+
+	memcpy(dst, src, nbytes);
 }
 
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg)
 {
-	unsigned int rest_of_page;
-
 	walk->sg = sg;
 
-	walk->page = sg->page;
-	walk->len_this_segment = sg->length;
-
 	BUG_ON(!sg->length);
 
-	rest_of_page = PAGE_CACHE_SIZE - (sg->offset & (PAGE_CACHE_SIZE - 1));
-	walk->len_this_page = min(sg->length, rest_of_page);
 	walk->offset = sg->offset;
 }
+EXPORT_SYMBOL_GPL(scatterwalk_start);
 
-void scatterwalk_map(struct scatter_walk *walk, int out)
+void *scatterwalk_map(struct scatter_walk *walk, int out)
 {
-	walk->data = crypto_kmap(walk->page, out) + walk->offset;
+	return crypto_kmap(scatterwalk_page(walk), out) +
+	       offset_in_page(walk->offset);
 }
-
-static inline void scatterwalk_unmap(struct scatter_walk *walk, int out)
-{
-	/* walk->data may be pointing the first byte of the next page;
-	   however, we know we transfered at least one byte.  So,
-	   walk->data - 1 will be a virtual address in the mapped page. */
-	crypto_kunmap(walk->data - 1, out);
-}
+EXPORT_SYMBOL_GPL(scatterwalk_map);
 
 static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 				 unsigned int more)
 {
 	if (out)
-		flush_dcache_page(walk->page);
+		flush_dcache_page(scatterwalk_page(walk));
 
 	if (more) {
-		walk->len_this_segment -= walk->len_this_page;
-
-		if (walk->len_this_segment) {
-			walk->page++;
-			walk->len_this_page = min(walk->len_this_segment,
-						  (unsigned)PAGE_CACHE_SIZE);
-			walk->offset = 0;
-		}
-		else
+		walk->offset += PAGE_SIZE - 1;
+		walk->offset &= PAGE_MASK;
+		if (walk->offset >= walk->sg->offset + walk->sg->length)
 			scatterwalk_start(walk, sg_next(walk->sg));
 	}
 }
 
 void scatterwalk_done(struct scatter_walk *walk, int out, int more)
 {
-	scatterwalk_unmap(walk, out);
-	if (walk->len_this_page == 0 || !more)
+	if (!offset_in_page(walk->offset) || !more)
 		scatterwalk_pagedone(walk, out, more);
 }
+EXPORT_SYMBOL_GPL(scatterwalk_done);
 
-/*
- * Do not call this unless the total length of all of the fragments
- * has been verified as multiple of the block size.
- */
-int scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
-			   size_t nbytes, int out)
+void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
+			    size_t nbytes, int out)
 {
-	while (nbytes > walk->len_this_page) {
-		memcpy_dir(buf, walk->data, walk->len_this_page, out);
-		buf += walk->len_this_page;
-		nbytes -= walk->len_this_page;
+	for (;;) {
+		unsigned int len_this_page = scatterwalk_pagelen(walk);
+		u8 *vaddr;
 
-		scatterwalk_unmap(walk, out);
+		if (len_this_page > nbytes)
+			len_this_page = nbytes;
+
+		vaddr = scatterwalk_map(walk, out);
+		memcpy_dir(buf, vaddr, len_this_page, out);
+		scatterwalk_unmap(vaddr, out);
+
+		if (nbytes == len_this_page)
+			break;
+
+		buf += len_this_page;
+		nbytes -= len_this_page;
+
 		scatterwalk_pagedone(walk, out, 1);
-		scatterwalk_map(walk, out);
 	}
 
-	memcpy_dir(buf, walk->data, nbytes, out);
-	return nbytes;
+	scatterwalk_advance(walk, nbytes);
 }
+EXPORT_SYMBOL_GPL(scatterwalk_copychunks);
diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index e79925c..f1592cc 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -14,45 +14,42 @@
 
 #ifndef _CRYPTO_SCATTERWALK_H
 #define _CRYPTO_SCATTERWALK_H
+
 #include <linux/mm.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 
-struct scatter_walk {
-	struct scatterlist	*sg;
-	struct page		*page;
-	void			*data;
-	unsigned int		len_this_page;
-	unsigned int		len_this_segment;
-	unsigned int		offset;
-};
+#include "internal.h"
 
-/* Define sg_next is an inline routine now in case we want to change
-   scatterlist to a linked list later. */
 static inline struct scatterlist *sg_next(struct scatterlist *sg)
 {
-	return sg + 1;
+	return (++sg)->length ? sg : (void *)sg->page;
 }
 
-static inline int scatterwalk_samebuf(struct scatter_walk *walk_in,
-				      struct scatter_walk *walk_out)
+static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
+						struct scatter_walk *walk_out)
 {
-	return walk_in->page == walk_out->page &&
-	       walk_in->offset == walk_out->offset;
+	return !(((walk_in->sg->page - walk_out->sg->page) << PAGE_SHIFT) +
+		 (int)(walk_in->offset - walk_out->offset));
+}
+
+static inline unsigned int scatterwalk_pagelen(struct scatter_walk *walk)
+{
+	unsigned int len = walk->sg->offset + walk->sg->length - walk->offset;
+	unsigned int len_this_page = offset_in_page(~walk->offset) + 1;
+	return len_this_page > len ? len : len_this_page;
 }
 
 static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk,
 					     unsigned int nbytes)
 {
-	return nbytes > walk->len_this_page ? walk->len_this_page : nbytes;
+	unsigned int len_this_page = scatterwalk_pagelen(walk);
+	return nbytes > len_this_page ? len_this_page : nbytes;
 }
 
 static inline void scatterwalk_advance(struct scatter_walk *walk,
 				       unsigned int nbytes)
 {
-	walk->data += nbytes;
 	walk->offset += nbytes;
-	walk->len_this_page -= nbytes;
-	walk->len_this_segment -= nbytes;
 }
 
 static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,
@@ -61,9 +58,20 @@
 	return !(walk->offset & alignmask);
 }
 
+static inline struct page *scatterwalk_page(struct scatter_walk *walk)
+{
+	return walk->sg->page + (walk->offset >> PAGE_SHIFT);
+}
+
+static inline void scatterwalk_unmap(void *vaddr, int out)
+{
+	crypto_kunmap(vaddr, out);
+}
+
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg);
-int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out);
-void scatterwalk_map(struct scatter_walk *walk, int out);
+void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
+			    size_t nbytes, int out);
+void *scatterwalk_map(struct scatter_walk *walk, int out);
 void scatterwalk_done(struct scatter_walk *walk, int out, int more);
 
 #endif  /* _CRYPTO_SCATTERWALK_H */
diff --git a/crypto/serpent.c b/crypto/serpent.c
index de60cdd..465d091 100644
--- a/crypto/serpent.c
+++ b/crypto/serpent.c
@@ -216,7 +216,7 @@
 
 
 static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen, u32 *flags)
+			  unsigned int keylen)
 {
 	struct serpent_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *k = ctx->expkey;
@@ -224,13 +224,6 @@
 	u32 r0,r1,r2,r3,r4;
 	int i;
 
-	if ((keylen < SERPENT_MIN_KEY_SIZE)
-			|| (keylen > SERPENT_MAX_KEY_SIZE))
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
 	/* Copy key, add padding */
 
 	for (i = 0; i < keylen; ++i)
@@ -497,21 +490,15 @@
 };
 
 static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int keylen, u32 *flags)
+			  unsigned int keylen)
 {
 	u8 rev_key[SERPENT_MAX_KEY_SIZE];
 	int i;
 
-	if ((keylen < SERPENT_MIN_KEY_SIZE)
-	    || (keylen > SERPENT_MAX_KEY_SIZE)) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	} 
-
 	for (i = 0; i < keylen; ++i)
 		rev_key[keylen - i - 1] = key[i];
  
-	return serpent_setkey(tfm, rev_key, keylen, flags);
+	return serpent_setkey(tfm, rev_key, keylen);
 }
 
 static void tnepres_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
diff --git a/crypto/sha1.c b/crypto/sha1.c
index 6c77b68..1bba551 100644
--- a/crypto/sha1.c
+++ b/crypto/sha1.c
@@ -109,6 +109,7 @@
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha1",
+	.cra_driver_name=	"sha1-generic",
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA1_HMAC_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct sha1_ctx),
@@ -137,3 +138,5 @@
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
+
+MODULE_ALIAS("sha1-generic");
diff --git a/crypto/sha256.c b/crypto/sha256.c
index bc71d85..716195b 100644
--- a/crypto/sha256.c
+++ b/crypto/sha256.c
@@ -309,6 +309,7 @@
 
 static struct crypto_alg alg = {
 	.cra_name	=	"sha256",
+	.cra_driver_name=	"sha256-generic",
 	.cra_flags	=	CRYPTO_ALG_TYPE_DIGEST,
 	.cra_blocksize	=	SHA256_HMAC_BLOCK_SIZE,
 	.cra_ctxsize	=	sizeof(struct sha256_ctx),
@@ -337,3 +338,5 @@
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm");
+
+MODULE_ALIAS("sha256-generic");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index e52f56c..8330742 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -54,8 +55,6 @@
 */
 #define ENCRYPT 1
 #define DECRYPT 0
-#define MODE_ECB 1
-#define MODE_CBC 0
 
 static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 };
 
@@ -89,9 +88,11 @@
 	unsigned int i, j, k, temp;
 	struct scatterlist sg[8];
 	char result[64];
-	struct crypto_tfm *tfm;
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
 	struct hash_testvec *hash_tv;
 	unsigned int tsize;
+	int ret;
 
 	printk("\ntesting %s\n", algo);
 
@@ -105,30 +106,42 @@
 
 	memcpy(tvmem, template, tsize);
 	hash_tv = (void *)tvmem;
-	tfm = crypto_alloc_tfm(algo, 0);
-	if (tfm == NULL) {
-		printk("failed to load transform for %s\n", algo);
+
+	tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
 
+	desc.tfm = tfm;
+	desc.flags = 0;
+
 	for (i = 0; i < tcount; i++) {
 		printk("test %u:\n", i + 1);
 		memset(result, 0, 64);
 
 		sg_set_buf(&sg[0], hash_tv[i].plaintext, hash_tv[i].psize);
 
-		crypto_digest_init(tfm);
-		if (tfm->crt_u.digest.dit_setkey) {
-			crypto_digest_setkey(tfm, hash_tv[i].key,
-					     hash_tv[i].ksize);
+		if (hash_tv[i].ksize) {
+			ret = crypto_hash_setkey(tfm, hash_tv[i].key,
+						 hash_tv[i].ksize);
+			if (ret) {
+				printk("setkey() failed ret=%d\n", ret);
+				goto out;
+			}
 		}
-		crypto_digest_update(tfm, sg, 1);
-		crypto_digest_final(tfm, result);
 
-		hexdump(result, crypto_tfm_alg_digestsize(tfm));
+		ret = crypto_hash_digest(&desc, sg, hash_tv[i].psize, result);
+		if (ret) {
+			printk("digest () failed ret=%d\n", ret);
+			goto out;
+		}
+
+		hexdump(result, crypto_hash_digestsize(tfm));
 		printk("%s\n",
 		       memcmp(result, hash_tv[i].digest,
-			      crypto_tfm_alg_digestsize(tfm)) ?
+			      crypto_hash_digestsize(tfm)) ?
 		       "fail" : "pass");
 	}
 
@@ -154,127 +167,56 @@
 					    hash_tv[i].tap[k]);
 			}
 
-			crypto_digest_digest(tfm, sg, hash_tv[i].np, result);
+			if (hash_tv[i].ksize) {
+				ret = crypto_hash_setkey(tfm, hash_tv[i].key,
+							 hash_tv[i].ksize);
 
-			hexdump(result, crypto_tfm_alg_digestsize(tfm));
-			printk("%s\n",
-			       memcmp(result, hash_tv[i].digest,
-				      crypto_tfm_alg_digestsize(tfm)) ?
-			       "fail" : "pass");
-		}
-	}
-
-	crypto_free_tfm(tfm);
-}
-
-
-#ifdef CONFIG_CRYPTO_HMAC
-
-static void test_hmac(char *algo, struct hmac_testvec *template,
-		      unsigned int tcount)
-{
-	unsigned int i, j, k, temp;
-	struct scatterlist sg[8];
-	char result[64];
-	struct crypto_tfm *tfm;
-	struct hmac_testvec *hmac_tv;
-	unsigned int tsize, klen;
-
-	tfm = crypto_alloc_tfm(algo, 0);
-	if (tfm == NULL) {
-		printk("failed to load transform for %s\n", algo);
-		return;
-	}
-
-	printk("\ntesting hmac_%s\n", algo);
-
-	tsize = sizeof(struct hmac_testvec);
-	tsize *= tcount;
-	if (tsize > TVMEMSIZE) {
-		printk("template (%u) too big for tvmem (%u)\n", tsize,
-		       TVMEMSIZE);
-		goto out;
-	}
-
-	memcpy(tvmem, template, tsize);
-	hmac_tv = (void *)tvmem;
-
-	for (i = 0; i < tcount; i++) {
-		printk("test %u:\n", i + 1);
-		memset(result, 0, sizeof (result));
-
-		klen = hmac_tv[i].ksize;
-		sg_set_buf(&sg[0], hmac_tv[i].plaintext, hmac_tv[i].psize);
-
-		crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, 1, result);
-
-		hexdump(result, crypto_tfm_alg_digestsize(tfm));
-		printk("%s\n",
-		       memcmp(result, hmac_tv[i].digest,
-			      crypto_tfm_alg_digestsize(tfm)) ? "fail" :
-		       "pass");
-	}
-
-	printk("\ntesting hmac_%s across pages\n", algo);
-
-	memset(xbuf, 0, XBUFSIZE);
-
-	j = 0;
-	for (i = 0; i < tcount; i++) {
-		if (hmac_tv[i].np) {
-			j++;
-			printk("test %u:\n",j);
-			memset(result, 0, 64);
-
-			temp = 0;
-			klen = hmac_tv[i].ksize;
-			for (k = 0; k < hmac_tv[i].np; k++) {
-				memcpy(&xbuf[IDX[k]],
-				       hmac_tv[i].plaintext + temp,
-				       hmac_tv[i].tap[k]);
-				temp += hmac_tv[i].tap[k];
-				sg_set_buf(&sg[k], &xbuf[IDX[k]],
-					    hmac_tv[i].tap[k]);
+				if (ret) {
+					printk("setkey() failed ret=%d\n", ret);
+					goto out;
+				}
 			}
 
-			crypto_hmac(tfm, hmac_tv[i].key, &klen, sg,
-				    hmac_tv[i].np, result);
-			hexdump(result, crypto_tfm_alg_digestsize(tfm));
+			ret = crypto_hash_digest(&desc, sg, hash_tv[i].psize,
+						 result);
+			if (ret) {
+				printk("digest () failed ret=%d\n", ret);
+				goto out;
+			}
 
+			hexdump(result, crypto_hash_digestsize(tfm));
 			printk("%s\n",
-			       memcmp(result, hmac_tv[i].digest,
-				      crypto_tfm_alg_digestsize(tfm)) ?
+			       memcmp(result, hash_tv[i].digest,
+				      crypto_hash_digestsize(tfm)) ?
 			       "fail" : "pass");
 		}
 	}
+
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 }
 
-#endif	/* CONFIG_CRYPTO_HMAC */
-
-static void test_cipher(char *algo, int mode, int enc,
+static void test_cipher(char *algo, int enc,
 			struct cipher_testvec *template, unsigned int tcount)
 {
 	unsigned int ret, i, j, k, temp;
 	unsigned int tsize;
+	unsigned int iv_len;
+	unsigned int len;
 	char *q;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 	char *key;
 	struct cipher_testvec *cipher_tv;
+	struct blkcipher_desc desc;
 	struct scatterlist sg[8];
-	const char *e, *m;
+	const char *e;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
 	else
 		e = "decryption";
-	if (mode == MODE_ECB)
-		m = "ECB";
-	else
-		m = "CBC";
 
-	printk("\ntesting %s %s %s\n", algo, m, e);
+	printk("\ntesting %s %s\n", algo, e);
 
 	tsize = sizeof (struct cipher_testvec);
 	tsize *= tcount;
@@ -288,15 +230,15 @@
 	memcpy(tvmem, template, tsize);
 	cipher_tv = (void *)tvmem;
 
-	if (mode)
-		tfm = crypto_alloc_tfm(algo, 0);
-	else
-		tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC);
+	tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC);
 
-	if (tfm == NULL) {
-		printk("failed to load transform for %s %s\n", algo, m);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
+	desc.tfm = tfm;
+	desc.flags = 0;
 
 	j = 0;
 	for (i = 0; i < tcount; i++) {
@@ -305,14 +247,17 @@
 			printk("test %u (%d bit key):\n",
 			j, cipher_tv[i].klen * 8);
 
-			tfm->crt_flags = 0;
+			crypto_blkcipher_clear_flags(tfm, ~0);
 			if (cipher_tv[i].wk)
-				tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY;
+				crypto_blkcipher_set_flags(
+					tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 			key = cipher_tv[i].key;
 
-			ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen);
+			ret = crypto_blkcipher_setkey(tfm, key,
+						      cipher_tv[i].klen);
 			if (ret) {
-				printk("setkey() failed flags=%x\n", tfm->crt_flags);
+				printk("setkey() failed flags=%x\n",
+				       crypto_blkcipher_get_flags(tfm));
 
 				if (!cipher_tv[i].fail)
 					goto out;
@@ -321,19 +266,19 @@
 			sg_set_buf(&sg[0], cipher_tv[i].input,
 				   cipher_tv[i].ilen);
 
-			if (!mode) {
-				crypto_cipher_set_iv(tfm, cipher_tv[i].iv,
-					crypto_tfm_alg_ivsize(tfm));
-			}
+			iv_len = crypto_blkcipher_ivsize(tfm);
+			if (iv_len)
+				crypto_blkcipher_set_iv(tfm, cipher_tv[i].iv,
+							iv_len);
 
-			if (enc)
-				ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen);
-			else
-				ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen);
-
+			len = cipher_tv[i].ilen;
+			ret = enc ?
+				crypto_blkcipher_encrypt(&desc, sg, sg, len) :
+				crypto_blkcipher_decrypt(&desc, sg, sg, len);
 
 			if (ret) {
-				printk("%s () failed flags=%x\n", e, tfm->crt_flags);
+				printk("%s () failed flags=%x\n", e,
+				       desc.flags);
 				goto out;
 			}
 
@@ -346,7 +291,7 @@
 		}
 	}
 
-	printk("\ntesting %s %s %s across pages (chunking)\n", algo, m, e);
+	printk("\ntesting %s %s across pages (chunking)\n", algo, e);
 	memset(xbuf, 0, XBUFSIZE);
 
 	j = 0;
@@ -356,14 +301,17 @@
 			printk("test %u (%d bit key):\n",
 			j, cipher_tv[i].klen * 8);
 
-			tfm->crt_flags = 0;
+			crypto_blkcipher_clear_flags(tfm, ~0);
 			if (cipher_tv[i].wk)
-				tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY;
+				crypto_blkcipher_set_flags(
+					tfm, CRYPTO_TFM_REQ_WEAK_KEY);
 			key = cipher_tv[i].key;
 
-			ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen);
+			ret = crypto_blkcipher_setkey(tfm, key,
+						      cipher_tv[i].klen);
 			if (ret) {
-				printk("setkey() failed flags=%x\n", tfm->crt_flags);
+				printk("setkey() failed flags=%x\n",
+				       crypto_blkcipher_get_flags(tfm));
 
 				if (!cipher_tv[i].fail)
 					goto out;
@@ -379,18 +327,19 @@
 					   cipher_tv[i].tap[k]);
 			}
 
-			if (!mode) {
-				crypto_cipher_set_iv(tfm, cipher_tv[i].iv,
-						crypto_tfm_alg_ivsize(tfm));
-			}
+			iv_len = crypto_blkcipher_ivsize(tfm);
+			if (iv_len)
+				crypto_blkcipher_set_iv(tfm, cipher_tv[i].iv,
+							iv_len);
 
-			if (enc)
-				ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen);
-			else
-				ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen);
+			len = cipher_tv[i].ilen;
+			ret = enc ?
+				crypto_blkcipher_encrypt(&desc, sg, sg, len) :
+				crypto_blkcipher_decrypt(&desc, sg, sg, len);
 
 			if (ret) {
-				printk("%s () failed flags=%x\n", e, tfm->crt_flags);
+				printk("%s () failed flags=%x\n", e,
+				       desc.flags);
 				goto out;
 			}
 
@@ -409,10 +358,10 @@
 	}
 
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 }
 
-static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p,
+static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc, char *p,
 			       int blen, int sec)
 {
 	struct scatterlist sg[1];
@@ -425,9 +374,9 @@
 	for (start = jiffies, end = start + sec * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
 		if (enc)
-			ret = crypto_cipher_encrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
 		else
-			ret = crypto_cipher_decrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
 
 		if (ret)
 			return ret;
@@ -438,7 +387,7 @@
 	return 0;
 }
 
-static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p,
+static int test_cipher_cycles(struct blkcipher_desc *desc, int enc, char *p,
 			      int blen)
 {
 	struct scatterlist sg[1];
@@ -454,9 +403,9 @@
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
 		if (enc)
-			ret = crypto_cipher_encrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
 		else
-			ret = crypto_cipher_decrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
 
 		if (ret)
 			goto out;
@@ -468,9 +417,9 @@
 
 		start = get_cycles();
 		if (enc)
-			ret = crypto_cipher_encrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_encrypt(desc, sg, sg, blen);
 		else
-			ret = crypto_cipher_decrypt(tfm, sg, sg, blen);
+			ret = crypto_blkcipher_decrypt(desc, sg, sg, blen);
 		end = get_cycles();
 
 		if (ret)
@@ -490,35 +439,32 @@
 	return ret;
 }
 
-static void test_cipher_speed(char *algo, int mode, int enc, unsigned int sec,
+static void test_cipher_speed(char *algo, int enc, unsigned int sec,
 			      struct cipher_testvec *template,
 			      unsigned int tcount, struct cipher_speed *speed)
 {
 	unsigned int ret, i, j, iv_len;
 	unsigned char *key, *p, iv[128];
-	struct crypto_tfm *tfm;
-	const char *e, *m;
+	struct crypto_blkcipher *tfm;
+	struct blkcipher_desc desc;
+	const char *e;
 
 	if (enc == ENCRYPT)
 	        e = "encryption";
 	else
 		e = "decryption";
-	if (mode == MODE_ECB)
-		m = "ECB";
-	else
-		m = "CBC";
 
-	printk("\ntesting speed of %s %s %s\n", algo, m, e);
+	printk("\ntesting speed of %s %s\n", algo, e);
 
-	if (mode)
-		tfm = crypto_alloc_tfm(algo, 0);
-	else
-		tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC);
+	tfm = crypto_alloc_blkcipher(algo, 0, CRYPTO_ALG_ASYNC);
 
-	if (tfm == NULL) {
-		printk("failed to load transform for %s %s\n", algo, m);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
+	desc.tfm = tfm;
+	desc.flags = 0;
 
 	for (i = 0; speed[i].klen != 0; i++) {
 		if ((speed[i].blen + speed[i].klen) > TVMEMSIZE) {
@@ -542,125 +488,231 @@
 		}
 		p = (unsigned char *)tvmem + speed[i].klen;
 
-		ret = crypto_cipher_setkey(tfm, key, speed[i].klen);
+		ret = crypto_blkcipher_setkey(tfm, key, speed[i].klen);
 		if (ret) {
-			printk("setkey() failed flags=%x\n", tfm->crt_flags);
+			printk("setkey() failed flags=%x\n",
+			       crypto_blkcipher_get_flags(tfm));
 			goto out;
 		}
 
-		if (!mode) {
-			iv_len = crypto_tfm_alg_ivsize(tfm);
+		iv_len = crypto_blkcipher_ivsize(tfm);
+		if (iv_len) {
 			memset(&iv, 0xff, iv_len);
-			crypto_cipher_set_iv(tfm, iv, iv_len);
+			crypto_blkcipher_set_iv(tfm, iv, iv_len);
 		}
 
 		if (sec)
-			ret = test_cipher_jiffies(tfm, enc, p, speed[i].blen,
+			ret = test_cipher_jiffies(&desc, enc, p, speed[i].blen,
 						  sec);
 		else
-			ret = test_cipher_cycles(tfm, enc, p, speed[i].blen);
+			ret = test_cipher_cycles(&desc, enc, p, speed[i].blen);
 
 		if (ret) {
-			printk("%s() failed flags=%x\n", e, tfm->crt_flags);
+			printk("%s() failed flags=%x\n", e, desc.flags);
 			break;
 		}
 	}
 
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 }
 
-static void test_digest_jiffies(struct crypto_tfm *tfm, char *p, int blen,
-				int plen, char *out, int sec)
+static int test_hash_jiffies_digest(struct hash_desc *desc, char *p, int blen,
+				    char *out, int sec)
 {
 	struct scatterlist sg[1];
 	unsigned long start, end;
-	int bcount, pcount;
+	int bcount;
+	int ret;
 
 	for (start = jiffies, end = start + sec * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		crypto_digest_init(tfm);
-		for (pcount = 0; pcount < blen; pcount += plen) {
-			sg_set_buf(sg, p + pcount, plen);
-			crypto_digest_update(tfm, sg, 1);
-		}
-		/* we assume there is enough space in 'out' for the result */
-		crypto_digest_final(tfm, out);
+		sg_set_buf(sg, p, blen);
+		ret = crypto_hash_digest(desc, sg, blen, out);
+		if (ret)
+			return ret;
 	}
 
 	printk("%6u opers/sec, %9lu bytes/sec\n",
 	       bcount / sec, ((long)bcount * blen) / sec);
 
-	return;
+	return 0;
 }
 
-static void test_digest_cycles(struct crypto_tfm *tfm, char *p, int blen,
-			       int plen, char *out)
+static int test_hash_jiffies(struct hash_desc *desc, char *p, int blen,
+			     int plen, char *out, int sec)
+{
+	struct scatterlist sg[1];
+	unsigned long start, end;
+	int bcount, pcount;
+	int ret;
+
+	if (plen == blen)
+		return test_hash_jiffies_digest(desc, p, blen, out, sec);
+
+	for (start = jiffies, end = start + sec * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = crypto_hash_init(desc);
+		if (ret)
+			return ret;
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			sg_set_buf(sg, p + pcount, plen);
+			ret = crypto_hash_update(desc, sg, plen);
+			if (ret)
+				return ret;
+		}
+		/* we assume there is enough space in 'out' for the result */
+		ret = crypto_hash_final(desc, out);
+		if (ret)
+			return ret;
+	}
+
+	printk("%6u opers/sec, %9lu bytes/sec\n",
+	       bcount / sec, ((long)bcount * blen) / sec);
+
+	return 0;
+}
+
+static int test_hash_cycles_digest(struct hash_desc *desc, char *p, int blen,
+				   char *out)
 {
 	struct scatterlist sg[1];
 	unsigned long cycles = 0;
-	int i, pcount;
+	int i;
+	int ret;
 
 	local_bh_disable();
 	local_irq_disable();
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		crypto_digest_init(tfm);
-		for (pcount = 0; pcount < blen; pcount += plen) {
-			sg_set_buf(sg, p + pcount, plen);
-			crypto_digest_update(tfm, sg, 1);
-		}
-		crypto_digest_final(tfm, out);
+		sg_set_buf(sg, p, blen);
+		ret = crypto_hash_digest(desc, sg, blen, out);
+		if (ret)
+			goto out;
 	}
 
 	/* The real thing. */
 	for (i = 0; i < 8; i++) {
 		cycles_t start, end;
 
-		crypto_digest_init(tfm);
-
 		start = get_cycles();
 
-		for (pcount = 0; pcount < blen; pcount += plen) {
-			sg_set_buf(sg, p + pcount, plen);
-			crypto_digest_update(tfm, sg, 1);
-		}
-		crypto_digest_final(tfm, out);
+		sg_set_buf(sg, p, blen);
+		ret = crypto_hash_digest(desc, sg, blen, out);
+		if (ret)
+			goto out;
 
 		end = get_cycles();
 
 		cycles += end - start;
 	}
 
+out:
 	local_irq_enable();
 	local_bh_enable();
 
+	if (ret)
+		return ret;
+
 	printk("%6lu cycles/operation, %4lu cycles/byte\n",
 	       cycles / 8, cycles / (8 * blen));
 
-	return;
+	return 0;
 }
 
-static void test_digest_speed(char *algo, unsigned int sec,
-			      struct digest_speed *speed)
+static int test_hash_cycles(struct hash_desc *desc, char *p, int blen,
+			    int plen, char *out)
 {
-	struct crypto_tfm *tfm;
+	struct scatterlist sg[1];
+	unsigned long cycles = 0;
+	int i, pcount;
+	int ret;
+
+	if (plen == blen)
+		return test_hash_cycles_digest(desc, p, blen, out);
+
+	local_bh_disable();
+	local_irq_disable();
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = crypto_hash_init(desc);
+		if (ret)
+			goto out;
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			sg_set_buf(sg, p + pcount, plen);
+			ret = crypto_hash_update(desc, sg, plen);
+			if (ret)
+				goto out;
+		}
+		crypto_hash_final(desc, out);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+
+		ret = crypto_hash_init(desc);
+		if (ret)
+			goto out;
+		for (pcount = 0; pcount < blen; pcount += plen) {
+			sg_set_buf(sg, p + pcount, plen);
+			ret = crypto_hash_update(desc, sg, plen);
+			if (ret)
+				goto out;
+		}
+		ret = crypto_hash_final(desc, out);
+		if (ret)
+			goto out;
+
+		end = get_cycles();
+
+		cycles += end - start;
+	}
+
+out:
+	local_irq_enable();
+	local_bh_enable();
+
+	if (ret)
+		return ret;
+
+	printk("%6lu cycles/operation, %4lu cycles/byte\n",
+	       cycles / 8, cycles / (8 * blen));
+
+	return 0;
+}
+
+static void test_hash_speed(char *algo, unsigned int sec,
+			      struct hash_speed *speed)
+{
+	struct crypto_hash *tfm;
+	struct hash_desc desc;
 	char output[1024];
 	int i;
+	int ret;
 
 	printk("\ntesting speed of %s\n", algo);
 
-	tfm = crypto_alloc_tfm(algo, 0);
+	tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
 
-	if (tfm == NULL) {
-		printk("failed to load transform for %s\n", algo);
+	if (IS_ERR(tfm)) {
+		printk("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
 		return;
 	}
 
-	if (crypto_tfm_alg_digestsize(tfm) > sizeof(output)) {
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	if (crypto_hash_digestsize(tfm) > sizeof(output)) {
 		printk("digestsize(%u) > outputbuffer(%zu)\n",
-		       crypto_tfm_alg_digestsize(tfm), sizeof(output));
+		       crypto_hash_digestsize(tfm), sizeof(output));
 		goto out;
 	}
 
@@ -677,20 +729,27 @@
 		memset(tvmem, 0xff, speed[i].blen);
 
 		if (sec)
-			test_digest_jiffies(tfm, tvmem, speed[i].blen, speed[i].plen, output, sec);
+			ret = test_hash_jiffies(&desc, tvmem, speed[i].blen,
+						speed[i].plen, output, sec);
 		else
-			test_digest_cycles(tfm, tvmem, speed[i].blen, speed[i].plen, output);
+			ret = test_hash_cycles(&desc, tvmem, speed[i].blen,
+					       speed[i].plen, output);
+
+		if (ret) {
+			printk("hashing failed ret=%d\n", ret);
+			break;
+		}
 	}
 
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 }
 
 static void test_deflate(void)
 {
 	unsigned int i;
 	char result[COMP_BUF_SIZE];
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	struct comp_testvec *tv;
 	unsigned int tsize;
 
@@ -762,105 +821,7 @@
 		       ilen, dlen);
 	}
 out:
-	crypto_free_tfm(tfm);
-}
-
-static void test_crc32c(void)
-{
-#define NUMVEC 6
-#define VECSIZE 40
-
-	int i, j, pass;
-	u32 crc;
-	u8 b, test_vec[NUMVEC][VECSIZE];
-	static u32 vec_results[NUMVEC] = {
-		0x0e2c157f, 0xe980ebf6, 0xde74bded,
-		0xd579c862, 0xba979ad0, 0x2b29d913
-	};
-	static u32 tot_vec_results = 0x24c5d375;
-
-	struct scatterlist sg[NUMVEC];
-	struct crypto_tfm *tfm;
-	char *fmtdata = "testing crc32c initialized to %08x: %s\n";
-#define SEEDTESTVAL 0xedcba987
-	u32 seed;
-
-	printk("\ntesting crc32c\n");
-
-	tfm = crypto_alloc_tfm("crc32c", 0);
-	if (tfm == NULL) {
-		printk("failed to load transform for crc32c\n");
-		return;
-	}
-
-	crypto_digest_init(tfm);
-	crypto_digest_final(tfm, (u8*)&crc);
-	printk(fmtdata, crc, (crc == 0) ? "pass" : "ERROR");
-
-	/*
-	 * stuff test_vec with known values, simple incrementing
-	 * byte values.
-	 */
-	b = 0;
-	for (i = 0; i < NUMVEC; i++) {
-		for (j = 0; j < VECSIZE; j++)
-			test_vec[i][j] = ++b;
-		sg_set_buf(&sg[i], test_vec[i], VECSIZE);
-	}
-
-	seed = SEEDTESTVAL;
-	(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-	crypto_digest_final(tfm, (u8*)&crc);
-	printk("testing crc32c setkey returns %08x : %s\n", crc, (crc == (SEEDTESTVAL ^ ~(u32)0)) ?
-	       "pass" : "ERROR");
-
-	printk("testing crc32c using update/final:\n");
-
-	pass = 1;		    /* assume all is well */
-
-	for (i = 0; i < NUMVEC; i++) {
-		seed = ~(u32)0;
-		(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-		crypto_digest_update(tfm, &sg[i], 1);
-		crypto_digest_final(tfm, (u8*)&crc);
-		if (crc == vec_results[i]) {
-			printk(" %08x:OK", crc);
-		} else {
-			printk(" %08x:BAD, wanted %08x\n", crc, vec_results[i]);
-			pass = 0;
-		}
-	}
-
-	printk("\ntesting crc32c using incremental accumulator:\n");
-	crc = 0;
-	for (i = 0; i < NUMVEC; i++) {
-		seed = (crc ^ ~(u32)0);
-		(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-		crypto_digest_update(tfm, &sg[i], 1);
-		crypto_digest_final(tfm, (u8*)&crc);
-	}
-	if (crc == tot_vec_results) {
-		printk(" %08x:OK", crc);
-	} else {
-		printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results);
-		pass = 0;
-	}
-
-	printk("\ntesting crc32c using digest:\n");
-	seed = ~(u32)0;
-	(void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32));
-	crypto_digest_digest(tfm, sg, NUMVEC, (u8*)&crc);
-	if (crc == tot_vec_results) {
-		printk(" %08x:OK", crc);
-	} else {
-		printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results);
-		pass = 0;
-	}
-
-	printk("\n%s\n", pass ? "pass" : "ERROR");
-
-	crypto_free_tfm(tfm);
-	printk("crc32c test complete\n");
+	crypto_free_comp(tfm);
 }
 
 static void test_available(void)
@@ -869,8 +830,8 @@
 
 	while (*name) {
 		printk("alg %s ", *name);
-		printk((crypto_alg_available(*name, 0)) ?
-			"found\n" : "not found\n");
+		printk(crypto_has_alg(*name, 0, CRYPTO_ALG_ASYNC) ?
+		       "found\n" : "not found\n");
 		name++;
 	}
 }
@@ -885,79 +846,119 @@
 		test_hash("sha1", sha1_tv_template, SHA1_TEST_VECTORS);
 
 		//DES
-		test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(des)", ENCRYPT, des_enc_tv_template,
+			    DES_ENC_TEST_VECTORS);
+		test_cipher("ecb(des)", DECRYPT, des_dec_tv_template,
+			    DES_DEC_TEST_VECTORS);
+		test_cipher("cbc(des)", ENCRYPT, des_cbc_enc_tv_template,
+			    DES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(des)", DECRYPT, des_cbc_dec_tv_template,
+			    DES_CBC_DEC_TEST_VECTORS);
 
 		//DES3_EDE
-		test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS);
-		test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", ENCRYPT, des3_ede_enc_tv_template,
+			    DES3_EDE_ENC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", DECRYPT, des3_ede_dec_tv_template,
+			    DES3_EDE_DEC_TEST_VECTORS);
 
 		test_hash("md4", md4_tv_template, MD4_TEST_VECTORS);
 
 		test_hash("sha256", sha256_tv_template, SHA256_TEST_VECTORS);
 
 		//BLOWFISH
-		test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", ENCRYPT, bf_enc_tv_template,
+			    BF_ENC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", DECRYPT, bf_dec_tv_template,
+			    BF_DEC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", ENCRYPT, bf_cbc_enc_tv_template,
+			    BF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", DECRYPT, bf_cbc_dec_tv_template,
+			    BF_CBC_DEC_TEST_VECTORS);
 
 		//TWOFISH
-		test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", ENCRYPT, tf_enc_tv_template,
+			    TF_ENC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", DECRYPT, tf_dec_tv_template,
+			    TF_DEC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", ENCRYPT, tf_cbc_enc_tv_template,
+			    TF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", DECRYPT, tf_cbc_dec_tv_template,
+			    TF_CBC_DEC_TEST_VECTORS);
 
 		//SERPENT
-		test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS);
-		test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", ENCRYPT, serpent_enc_tv_template,
+			    SERPENT_ENC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", DECRYPT, serpent_dec_tv_template,
+			    SERPENT_DEC_TEST_VECTORS);
 
 		//TNEPRES
-		test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS);
-		test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", ENCRYPT, tnepres_enc_tv_template,
+			    TNEPRES_ENC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", DECRYPT, tnepres_dec_tv_template,
+			    TNEPRES_DEC_TEST_VECTORS);
 
 		//AES
-		test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, ENCRYPT, aes_cbc_enc_tv_template, AES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(aes)", ENCRYPT, aes_enc_tv_template,
+			    AES_ENC_TEST_VECTORS);
+		test_cipher("ecb(aes)", DECRYPT, aes_dec_tv_template,
+			    AES_DEC_TEST_VECTORS);
+		test_cipher("cbc(aes)", ENCRYPT, aes_cbc_enc_tv_template,
+			    AES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template,
+			    AES_CBC_DEC_TEST_VECTORS);
 
 		//CAST5
-		test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS);
-		test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template,
+			    CAST5_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", DECRYPT, cast5_dec_tv_template,
+			    CAST5_DEC_TEST_VECTORS);
 
 		//CAST6
-		test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS);
-		test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", ENCRYPT, cast6_enc_tv_template,
+			    CAST6_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", DECRYPT, cast6_dec_tv_template,
+			    CAST6_DEC_TEST_VECTORS);
 
 		//ARC4
-		test_cipher ("arc4", MODE_ECB, ENCRYPT, arc4_enc_tv_template, ARC4_ENC_TEST_VECTORS);
-		test_cipher ("arc4", MODE_ECB, DECRYPT, arc4_dec_tv_template, ARC4_DEC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", ENCRYPT, arc4_enc_tv_template,
+			    ARC4_ENC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", DECRYPT, arc4_dec_tv_template,
+			    ARC4_DEC_TEST_VECTORS);
 
 		//TEA
-		test_cipher ("tea", MODE_ECB, ENCRYPT, tea_enc_tv_template, TEA_ENC_TEST_VECTORS);
-		test_cipher ("tea", MODE_ECB, DECRYPT, tea_dec_tv_template, TEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(tea)", ENCRYPT, tea_enc_tv_template,
+			    TEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(tea)", DECRYPT, tea_dec_tv_template,
+			    TEA_DEC_TEST_VECTORS);
 
 
 		//XTEA
-		test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS);
-		test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", ENCRYPT, xtea_enc_tv_template,
+			    XTEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", DECRYPT, xtea_dec_tv_template,
+			    XTEA_DEC_TEST_VECTORS);
 
 		//KHAZAD
-		test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS);
-		test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", ENCRYPT, khazad_enc_tv_template,
+			    KHAZAD_ENC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", DECRYPT, khazad_dec_tv_template,
+			    KHAZAD_DEC_TEST_VECTORS);
 
 		//ANUBIS
-		test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", ENCRYPT, anubis_enc_tv_template,
+			    ANUBIS_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", DECRYPT, anubis_dec_tv_template,
+			    ANUBIS_DEC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", ENCRYPT, anubis_cbc_enc_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", DECRYPT, anubis_cbc_dec_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
 
 		//XETA
-		test_cipher ("xeta", MODE_ECB, ENCRYPT, xeta_enc_tv_template, XETA_ENC_TEST_VECTORS);
-		test_cipher ("xeta", MODE_ECB, DECRYPT, xeta_dec_tv_template, XETA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", ENCRYPT, xeta_enc_tv_template,
+			    XETA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", DECRYPT, xeta_dec_tv_template,
+			    XETA_DEC_TEST_VECTORS);
 
 		test_hash("sha384", sha384_tv_template, SHA384_TEST_VECTORS);
 		test_hash("sha512", sha512_tv_template, SHA512_TEST_VECTORS);
@@ -968,12 +969,13 @@
 		test_hash("tgr160", tgr160_tv_template, TGR160_TEST_VECTORS);
 		test_hash("tgr128", tgr128_tv_template, TGR128_TEST_VECTORS);
 		test_deflate();
-		test_crc32c();
-#ifdef CONFIG_CRYPTO_HMAC
-		test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS);
-		test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS);
-		test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS);
-#endif
+		test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS);
+		test_hash("hmac(md5)", hmac_md5_tv_template,
+			  HMAC_MD5_TEST_VECTORS);
+		test_hash("hmac(sha1)", hmac_sha1_tv_template,
+			  HMAC_SHA1_TEST_VECTORS);
+		test_hash("hmac(sha256)", hmac_sha256_tv_template,
+			  HMAC_SHA256_TEST_VECTORS);
 
 		test_hash("michael_mic", michael_mic_tv_template, MICHAEL_MIC_TEST_VECTORS);
 		break;
@@ -987,15 +989,21 @@
 		break;
 
 	case 3:
-		test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(des)", ENCRYPT, des_enc_tv_template,
+			    DES_ENC_TEST_VECTORS);
+		test_cipher("ecb(des)", DECRYPT, des_dec_tv_template,
+			    DES_DEC_TEST_VECTORS);
+		test_cipher("cbc(des)", ENCRYPT, des_cbc_enc_tv_template,
+			    DES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(des)", DECRYPT, des_cbc_dec_tv_template,
+			    DES_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 4:
-		test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS);
-		test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", ENCRYPT, des3_ede_enc_tv_template,
+			    DES3_EDE_ENC_TEST_VECTORS);
+		test_cipher("ecb(des3_ede)", DECRYPT, des3_ede_dec_tv_template,
+			    DES3_EDE_DEC_TEST_VECTORS);
 		break;
 
 	case 5:
@@ -1007,29 +1015,43 @@
 		break;
 
 	case 7:
-		test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", ENCRYPT, bf_enc_tv_template,
+			    BF_ENC_TEST_VECTORS);
+		test_cipher("ecb(blowfish)", DECRYPT, bf_dec_tv_template,
+			    BF_DEC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", ENCRYPT, bf_cbc_enc_tv_template,
+			    BF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(blowfish)", DECRYPT, bf_cbc_dec_tv_template,
+			    BF_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 8:
-		test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS);
-		test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", ENCRYPT, tf_enc_tv_template,
+			    TF_ENC_TEST_VECTORS);
+		test_cipher("ecb(twofish)", DECRYPT, tf_dec_tv_template,
+			    TF_DEC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", ENCRYPT, tf_cbc_enc_tv_template,
+			    TF_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(twofish)", DECRYPT, tf_cbc_dec_tv_template,
+			    TF_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 9:
-		test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS);
-		test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", ENCRYPT, serpent_enc_tv_template,
+			    SERPENT_ENC_TEST_VECTORS);
+		test_cipher("ecb(serpent)", DECRYPT, serpent_dec_tv_template,
+			    SERPENT_DEC_TEST_VECTORS);
 		break;
 
 	case 10:
-		test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, ENCRYPT, aes_cbc_enc_tv_template, AES_CBC_ENC_TEST_VECTORS);
-		test_cipher ("aes", MODE_CBC, DECRYPT, aes_cbc_dec_tv_template, AES_CBC_DEC_TEST_VECTORS);
+		test_cipher("ecb(aes)", ENCRYPT, aes_enc_tv_template,
+			    AES_ENC_TEST_VECTORS);
+		test_cipher("ecb(aes)", DECRYPT, aes_dec_tv_template,
+			    AES_DEC_TEST_VECTORS);
+		test_cipher("cbc(aes)", ENCRYPT, aes_cbc_enc_tv_template,
+			    AES_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(aes)", DECRYPT, aes_cbc_dec_tv_template,
+			    AES_CBC_DEC_TEST_VECTORS);
 		break;
 
 	case 11:
@@ -1045,18 +1067,24 @@
 		break;
 
 	case 14:
-		test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS);
-		test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", ENCRYPT, cast5_enc_tv_template,
+			    CAST5_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast5)", DECRYPT, cast5_dec_tv_template,
+			    CAST5_DEC_TEST_VECTORS);
 		break;
 
 	case 15:
-		test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS);
-		test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", ENCRYPT, cast6_enc_tv_template,
+			    CAST6_ENC_TEST_VECTORS);
+		test_cipher("ecb(cast6)", DECRYPT, cast6_dec_tv_template,
+			    CAST6_DEC_TEST_VECTORS);
 		break;
 
 	case 16:
-		test_cipher ("arc4", MODE_ECB, ENCRYPT, arc4_enc_tv_template, ARC4_ENC_TEST_VECTORS);
-		test_cipher ("arc4", MODE_ECB, DECRYPT, arc4_dec_tv_template, ARC4_DEC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", ENCRYPT, arc4_enc_tv_template,
+			    ARC4_ENC_TEST_VECTORS);
+		test_cipher("ecb(arc4)", DECRYPT, arc4_dec_tv_template,
+			    ARC4_DEC_TEST_VECTORS);
 		break;
 
 	case 17:
@@ -1064,22 +1092,28 @@
 		break;
 
 	case 18:
-		test_crc32c();
+		test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS);
 		break;
 
 	case 19:
-		test_cipher ("tea", MODE_ECB, ENCRYPT, tea_enc_tv_template, TEA_ENC_TEST_VECTORS);
-		test_cipher ("tea", MODE_ECB, DECRYPT, tea_dec_tv_template, TEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(tea)", ENCRYPT, tea_enc_tv_template,
+			    TEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(tea)", DECRYPT, tea_dec_tv_template,
+			    TEA_DEC_TEST_VECTORS);
 		break;
 
 	case 20:
-		test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS);
-		test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", ENCRYPT, xtea_enc_tv_template,
+			    XTEA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xtea)", DECRYPT, xtea_dec_tv_template,
+			    XTEA_DEC_TEST_VECTORS);
 		break;
 
 	case 21:
-		test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS);
-		test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", ENCRYPT, khazad_enc_tv_template,
+			    KHAZAD_ENC_TEST_VECTORS);
+		test_cipher("ecb(khazad)", DECRYPT, khazad_dec_tv_template,
+			    KHAZAD_DEC_TEST_VECTORS);
 		break;
 
 	case 22:
@@ -1095,15 +1129,21 @@
 		break;
 
 	case 25:
-		test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS);
-		test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", ENCRYPT, tnepres_enc_tv_template,
+			    TNEPRES_ENC_TEST_VECTORS);
+		test_cipher("ecb(tnepres)", DECRYPT, tnepres_dec_tv_template,
+			    TNEPRES_DEC_TEST_VECTORS);
 		break;
 
 	case 26:
-		test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
-		test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", ENCRYPT, anubis_enc_tv_template,
+			    ANUBIS_ENC_TEST_VECTORS);
+		test_cipher("ecb(anubis)", DECRYPT, anubis_dec_tv_template,
+			    ANUBIS_DEC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", ENCRYPT, anubis_cbc_enc_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
+		test_cipher("cbc(anubis)", DECRYPT, anubis_cbc_dec_tv_template,
+			    ANUBIS_CBC_ENC_TEST_VECTORS);
 		break;
 
 	case 27:
@@ -1120,85 +1160,88 @@
 		break;
 		
 	case 30:
-		test_cipher ("xeta", MODE_ECB, ENCRYPT, xeta_enc_tv_template, XETA_ENC_TEST_VECTORS);
-		test_cipher ("xeta", MODE_ECB, DECRYPT, xeta_dec_tv_template, XETA_DEC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", ENCRYPT, xeta_enc_tv_template,
+			    XETA_ENC_TEST_VECTORS);
+		test_cipher("ecb(xeta)", DECRYPT, xeta_dec_tv_template,
+			    XETA_DEC_TEST_VECTORS);
 		break;
 
-#ifdef CONFIG_CRYPTO_HMAC
 	case 100:
-		test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS);
+		test_hash("hmac(md5)", hmac_md5_tv_template,
+			  HMAC_MD5_TEST_VECTORS);
 		break;
 
 	case 101:
-		test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS);
+		test_hash("hmac(sha1)", hmac_sha1_tv_template,
+			  HMAC_SHA1_TEST_VECTORS);
 		break;
 
 	case 102:
-		test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS);
+		test_hash("hmac(sha256)", hmac_sha256_tv_template,
+			  HMAC_SHA256_TEST_VECTORS);
 		break;
 
-#endif
 
 	case 200:
-		test_cipher_speed("aes", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				  aes_speed_template);
-		test_cipher_speed("aes", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(aes)", DECRYPT, sec, NULL, 0,
 				  aes_speed_template);
-		test_cipher_speed("aes", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(aes)", ENCRYPT, sec, NULL, 0,
 				  aes_speed_template);
-		test_cipher_speed("aes", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(aes)", DECRYPT, sec, NULL, 0,
 				  aes_speed_template);
 		break;
 
 	case 201:
-		test_cipher_speed("des3_ede", MODE_ECB, ENCRYPT, sec,
+		test_cipher_speed("ecb(des3_ede)", ENCRYPT, sec,
 				  des3_ede_enc_tv_template,
 				  DES3_EDE_ENC_TEST_VECTORS,
 				  des3_ede_speed_template);
-		test_cipher_speed("des3_ede", MODE_ECB, DECRYPT, sec,
+		test_cipher_speed("ecb(des3_ede)", DECRYPT, sec,
 				  des3_ede_dec_tv_template,
 				  DES3_EDE_DEC_TEST_VECTORS,
 				  des3_ede_speed_template);
-		test_cipher_speed("des3_ede", MODE_CBC, ENCRYPT, sec,
+		test_cipher_speed("cbc(des3_ede)", ENCRYPT, sec,
 				  des3_ede_enc_tv_template,
 				  DES3_EDE_ENC_TEST_VECTORS,
 				  des3_ede_speed_template);
-		test_cipher_speed("des3_ede", MODE_CBC, DECRYPT, sec,
+		test_cipher_speed("cbc(des3_ede)", DECRYPT, sec,
 				  des3_ede_dec_tv_template,
 				  DES3_EDE_DEC_TEST_VECTORS,
 				  des3_ede_speed_template);
 		break;
 
 	case 202:
-		test_cipher_speed("twofish", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
 				  twofish_speed_template);
-		test_cipher_speed("twofish", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
 				  twofish_speed_template);
-		test_cipher_speed("twofish", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
 				  twofish_speed_template);
-		test_cipher_speed("twofish", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
 				  twofish_speed_template);
 		break;
 
 	case 203:
-		test_cipher_speed("blowfish", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
-		test_cipher_speed("blowfish", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
-		test_cipher_speed("blowfish", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
-		test_cipher_speed("blowfish", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0,
 				  blowfish_speed_template);
 		break;
 
 	case 204:
-		test_cipher_speed("des", MODE_ECB, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(des)", ENCRYPT, sec, NULL, 0,
 				  des_speed_template);
-		test_cipher_speed("des", MODE_ECB, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("ecb(des)", DECRYPT, sec, NULL, 0,
 				  des_speed_template);
-		test_cipher_speed("des", MODE_CBC, ENCRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(des)", ENCRYPT, sec, NULL, 0,
 				  des_speed_template);
-		test_cipher_speed("des", MODE_CBC, DECRYPT, sec, NULL, 0,
+		test_cipher_speed("cbc(des)", DECRYPT, sec, NULL, 0,
 				  des_speed_template);
 		break;
 
@@ -1206,51 +1249,51 @@
 		/* fall through */
 
 	case 301:
-		test_digest_speed("md4", sec, generic_digest_speed_template);
+		test_hash_speed("md4", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 302:
-		test_digest_speed("md5", sec, generic_digest_speed_template);
+		test_hash_speed("md5", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 303:
-		test_digest_speed("sha1", sec, generic_digest_speed_template);
+		test_hash_speed("sha1", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 304:
-		test_digest_speed("sha256", sec, generic_digest_speed_template);
+		test_hash_speed("sha256", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 305:
-		test_digest_speed("sha384", sec, generic_digest_speed_template);
+		test_hash_speed("sha384", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 306:
-		test_digest_speed("sha512", sec, generic_digest_speed_template);
+		test_hash_speed("sha512", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 307:
-		test_digest_speed("wp256", sec, generic_digest_speed_template);
+		test_hash_speed("wp256", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 308:
-		test_digest_speed("wp384", sec, generic_digest_speed_template);
+		test_hash_speed("wp384", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 309:
-		test_digest_speed("wp512", sec, generic_digest_speed_template);
+		test_hash_speed("wp512", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 310:
-		test_digest_speed("tgr128", sec, generic_digest_speed_template);
+		test_hash_speed("tgr128", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 311:
-		test_digest_speed("tgr160", sec, generic_digest_speed_template);
+		test_hash_speed("tgr160", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 312:
-		test_digest_speed("tgr192", sec, generic_digest_speed_template);
+		test_hash_speed("tgr192", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
 	case 399:
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 1fac560..a40c441 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -28,7 +28,7 @@
 struct hash_testvec {
 	/* only used with keyed hash algorithms */
 	char key[128] __attribute__ ((__aligned__(4)));
-	char plaintext[128];
+	char plaintext[240];
 	char digest[MAX_DIGEST_SIZE];
 	unsigned char tap[MAX_TAP];
 	unsigned char psize;
@@ -36,16 +36,6 @@
 	unsigned char ksize;
 };
 
-struct hmac_testvec {
-	char key[128];
-	char plaintext[128];
-	char digest[MAX_DIGEST_SIZE];
-	unsigned char tap[MAX_TAP];
-	unsigned char ksize;
-	unsigned char psize;
-	unsigned char np;
-};
-
 struct cipher_testvec {
 	char key[MAX_KEYLEN] __attribute__ ((__aligned__(4)));
 	char iv[MAX_IVLEN];
@@ -65,7 +55,7 @@
 	unsigned int blen;
 };
 
-struct digest_speed {
+struct hash_speed {
 	unsigned int blen;	/* buffer length */
 	unsigned int plen;	/* per-update length */
 };
@@ -697,14 +687,13 @@
 	},
 };
 
-#ifdef CONFIG_CRYPTO_HMAC
 /*
  * HMAC-MD5 test vectors from RFC2202
  * (These need to be fixed to not use strlen).
  */
 #define HMAC_MD5_TEST_VECTORS	7
 
-static struct hmac_testvec hmac_md5_tv_template[] =
+static struct hash_testvec hmac_md5_tv_template[] =
 {
 	{
 		.key	= { [0 ... 15] =  0x0b },
@@ -768,7 +757,7 @@
  */
 #define HMAC_SHA1_TEST_VECTORS	7
 
-static struct hmac_testvec hmac_sha1_tv_template[] = {
+static struct hash_testvec hmac_sha1_tv_template[] = {
 	{
 		.key	= { [0 ... 19] = 0x0b },
 		.ksize	= 20,
@@ -833,7 +822,7 @@
  */
 #define HMAC_SHA256_TEST_VECTORS	10
 
-static struct hmac_testvec hmac_sha256_tv_template[] = {
+static struct hash_testvec hmac_sha256_tv_template[] = {
 	{
 		.key	= { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
 			    0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
@@ -944,8 +933,6 @@
 	},
 };
 
-#endif	/* CONFIG_CRYPTO_HMAC */
-
 /*
  * DES test vectors.
  */
@@ -2897,6 +2884,183 @@
 };
 
 /*
+ * CRC32C test vectors
+ */
+#define CRC32C_TEST_VECTORS 14
+
+static struct hash_testvec crc32c_tv_template[] = {
+	{
+		.psize = 0,
+		.digest = { 0x00, 0x00, 0x00, 0x00 }
+	},
+	{
+		.key = { 0x87, 0xa9, 0xcb, 0xed },
+		.ksize = 4,
+		.psize = 0,
+		.digest = { 0x78, 0x56, 0x34, 0x12 },
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+			       0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+			       0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+			       0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+			       0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28 },
+		.psize = 40,
+		.digest = { 0x7f, 0x15, 0x2c, 0x0e }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+			       0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+			       0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+			       0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+			       0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50 },
+		.psize = 40,
+		.digest = { 0xf6, 0xeb, 0x80, 0xe9 }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+			       0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+			       0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+			       0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
+			       0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78 },
+		.psize = 40,
+		.digest = { 0xed, 0xbd, 0x74, 0xde }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+			       0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+			       0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
+			       0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+			       0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0 },
+		.psize = 40,
+		.digest = { 0x62, 0xc8, 0x79, 0xd5 }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
+			       0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+			       0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
+			       0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+			       0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8 },
+		.psize = 40,
+		.digest = { 0xd0, 0x9a, 0x97, 0xba }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+			       0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,
+			       0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+			       0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
+			       0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 },
+		.psize = 40,
+		.digest = { 0x13, 0xd9, 0x29, 0x2b }
+	},
+	{
+		.key = { 0x80, 0xea, 0xd3, 0xf1 },
+		.ksize = 4,
+		.plaintext = { 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+			       0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+			       0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+			       0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+			       0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50 },
+		.psize = 40,
+		.digest = { 0x0c, 0xb5, 0xe2, 0xa2 }
+	},
+	{
+		.key = { 0xf3, 0x4a, 0x1d, 0x5d },
+		.ksize = 4,
+		.plaintext = { 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+			       0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+			       0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+			       0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
+			       0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78 },
+		.psize = 40,
+		.digest = { 0xd1, 0x7f, 0xfb, 0xa6 }
+	},
+	{
+		.key = { 0x2e, 0x80, 0x04, 0x59 },
+		.ksize = 4,
+		.plaintext = { 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+			       0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+			       0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
+			       0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+			       0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0 },
+		.psize = 40,
+		.digest = { 0x59, 0x33, 0xe6, 0x7a }
+	},
+	{
+		.key = { 0xa6, 0xcc, 0x19, 0x85 },
+		.ksize = 4,
+		.plaintext = { 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
+			       0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+			       0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
+			       0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+			       0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8 },
+		.psize = 40,
+		.digest = { 0xbe, 0x03, 0x01, 0xd2 }
+	},
+	{
+		.key = { 0x41, 0xfc, 0xfe, 0x2d },
+		.ksize = 4,
+		.plaintext = { 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+			       0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,
+			       0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+			       0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
+			       0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 },
+		.psize = 40,
+		.digest = { 0x75, 0xd3, 0xc5, 0x24 }
+	},
+	{
+		.key = { 0xff, 0xff, 0xff, 0xff },
+		.ksize = 4,
+		.plaintext = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+			       0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+			       0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+			       0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+			       0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+			       0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+			       0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+			       0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+			       0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+			       0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+			       0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+			       0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+			       0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+			       0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
+			       0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+			       0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+			       0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
+			       0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90,
+			       0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+			       0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0,
+			       0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8,
+			       0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+			       0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8,
+			       0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+			       0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+			       0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+			       0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8,
+			       0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+			       0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
+			       0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0 },
+		.psize = 240,
+		.digest = { 0x75, 0xd3, 0xc5, 0x24 },
+		.np = 2,
+		.tap = { 31, 209 }
+	},
+};
+
+/*
  * Cipher speed tests
  */
 static struct cipher_speed aes_speed_template[] = {
@@ -2983,7 +3147,7 @@
 /*
  * Digest speed tests
  */
-static struct digest_speed generic_digest_speed_template[] = {
+static struct hash_speed generic_hash_speed_template[] = {
 	{ .blen = 16, 	.plen = 16, },
 	{ .blen = 64,	.plen = 16, },
 	{ .blen = 64,	.plen = 64, },
diff --git a/crypto/tea.c b/crypto/tea.c
index 5367adc..1c54e26 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -46,16 +46,10 @@
 };
 
 static int tea_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-		      unsigned int key_len, u32 *flags)
+		      unsigned int key_len)
 {
 	struct tea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
-	
-	if (key_len != 16)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	ctx->KEY[0] = le32_to_cpu(key[0]);
 	ctx->KEY[1] = le32_to_cpu(key[1]);
@@ -125,16 +119,10 @@
 }
 
 static int xtea_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct xtea_ctx *ctx = crypto_tfm_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
-	
-	if (key_len != 16)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
 
 	ctx->KEY[0] = le32_to_cpu(key[0]);
 	ctx->KEY[1] = le32_to_cpu(key[1]);
diff --git a/crypto/twofish.c b/crypto/twofish.c
index ec24882..4979a2b 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish.c
@@ -39,6 +39,7 @@
  */
 
 #include <asm/byteorder.h>
+#include <crypto/twofish.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -46,534 +47,6 @@
 #include <linux/crypto.h>
 #include <linux/bitops.h>
 
-
-/* The large precomputed tables for the Twofish cipher (twofish.c)
- * Taken from the same source as twofish.c
- * Marc Mutz <Marc@Mutz.com>
- */
-
-/* These two tables are the q0 and q1 permutations, exactly as described in
- * the Twofish paper. */
-
-static const u8 q0[256] = {
-   0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
-   0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
-   0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
-   0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
-   0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
-   0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
-   0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
-   0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
-   0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
-   0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
-   0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
-   0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
-   0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
-   0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
-   0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
-   0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
-   0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
-   0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
-   0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
-   0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
-   0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
-   0x4A, 0x5E, 0xC1, 0xE0
-};
-
-static const u8 q1[256] = {
-   0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
-   0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
-   0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
-   0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
-   0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
-   0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
-   0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
-   0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
-   0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
-   0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
-   0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
-   0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
-   0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
-   0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
-   0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
-   0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
-   0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
-   0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
-   0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
-   0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
-   0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
-   0x55, 0x09, 0xBE, 0x91
-};
-
-/* These MDS tables are actually tables of MDS composed with q0 and q1,
- * because it is only ever used that way and we can save some time by
- * precomputing.  Of course the main saving comes from precomputing the
- * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
- * things up in these tables we reduce the matrix multiply to four lookups
- * and three XORs.  Semi-formally, the definition of these tables is:
- * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
- * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
- * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
- * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
- * by Schneier et al, and I'm casually glossing over the byte/word
- * conversion issues. */
-
-static const u32 mds[4][256] = {
-   {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
-    0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
-    0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
-    0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
-    0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
-    0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
-    0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
-    0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
-    0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
-    0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
-    0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
-    0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
-    0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
-    0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
-    0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
-    0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
-    0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
-    0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
-    0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
-    0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
-    0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
-    0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
-    0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
-    0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
-    0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
-    0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
-    0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
-    0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
-    0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
-    0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
-    0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
-    0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
-    0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
-    0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
-    0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
-    0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
-    0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
-    0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
-    0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
-    0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
-    0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
-    0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
-    0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
-
-   {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
-    0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
-    0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
-    0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
-    0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
-    0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
-    0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
-    0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
-    0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
-    0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
-    0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
-    0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
-    0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
-    0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
-    0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
-    0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
-    0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
-    0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
-    0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
-    0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
-    0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
-    0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
-    0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
-    0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
-    0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
-    0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
-    0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
-    0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
-    0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
-    0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
-    0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
-    0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
-    0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
-    0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
-    0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
-    0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
-    0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
-    0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
-    0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
-    0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
-    0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
-    0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
-    0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
-
-   {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
-    0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
-    0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
-    0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
-    0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
-    0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
-    0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
-    0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
-    0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
-    0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
-    0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
-    0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
-    0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
-    0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
-    0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
-    0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
-    0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
-    0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
-    0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
-    0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
-    0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
-    0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
-    0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
-    0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
-    0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
-    0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
-    0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
-    0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
-    0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
-    0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
-    0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
-    0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
-    0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
-    0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
-    0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
-    0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
-    0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
-    0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
-    0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
-    0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
-    0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
-    0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
-    0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
-
-   {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
-    0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
-    0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
-    0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
-    0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
-    0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
-    0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
-    0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
-    0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
-    0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
-    0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
-    0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
-    0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
-    0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
-    0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
-    0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
-    0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
-    0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
-    0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
-    0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
-    0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
-    0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
-    0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
-    0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
-    0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
-    0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
-    0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
-    0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
-    0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
-    0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
-    0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
-    0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
-    0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
-    0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
-    0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
-    0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
-    0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
-    0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
-    0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
-    0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
-    0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
-    0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
-    0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
-};
-
-/* The exp_to_poly and poly_to_exp tables are used to perform efficient
- * operations in GF(2^8) represented as GF(2)[x]/w(x) where
- * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
- * definition of the RS matrix in the key schedule.  Elements of that field
- * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
- * which can be represented naturally by bytes (just substitute x=2).  In that
- * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
- * multiplication is inefficient without hardware support.  To multiply
- * faster, I make use of the fact x is a generator for the nonzero elements,
- * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
- * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
- * *not* polynomial notation.  So if I want to compute pq where p and q are
- * in GF(2^8), I can just say:
- *    1. if p=0 or q=0 then pq=0
- *    2. otherwise, find m and n such that p=x^m and q=x^n
- *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
- * The translations in steps 2 and 3 are looked up in the tables
- * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
- * in action, look at the CALC_S macro.  As additional wrinkles, note that
- * one of my operands is always a constant, so the poly_to_exp lookup on it
- * is done in advance; I included the original values in the comments so
- * readers can have some chance of recognizing that this *is* the RS matrix
- * from the Twofish paper.  I've only included the table entries I actually
- * need; I never do a lookup on a variable input of zero and the biggest
- * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
- * never sum to more than 491.	I'm repeating part of the exp_to_poly table
- * so that I don't have to do mod-255 reduction in the exponent arithmetic.
- * Since I know my constant operands are never zero, I only have to worry
- * about zero values in the variable operand, and I do it with a simple
- * conditional branch.	I know conditionals are expensive, but I couldn't
- * see a non-horrible way of avoiding them, and I did manage to group the
- * statements so that each if covers four group multiplications. */
-
-static const u8 poly_to_exp[255] = {
-   0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
-   0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
-   0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
-   0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
-   0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
-   0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
-   0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
-   0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
-   0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
-   0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
-   0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
-   0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
-   0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
-   0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
-   0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
-   0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
-   0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
-   0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
-   0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
-   0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
-   0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
-   0x85, 0xC8, 0xA1
-};
-
-static const u8 exp_to_poly[492] = {
-   0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
-   0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
-   0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
-   0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
-   0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
-   0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
-   0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
-   0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
-   0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
-   0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
-   0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
-   0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
-   0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
-   0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
-   0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
-   0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
-   0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
-   0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
-   0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
-   0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
-   0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
-   0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
-   0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
-   0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
-   0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
-   0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
-   0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
-   0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
-   0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
-   0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
-   0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
-   0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
-   0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
-   0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
-   0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
-   0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
-   0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
-   0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
-   0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
-   0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
-   0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
-};
-
-
-/* The table constants are indices of
- * S-box entries, preprocessed through q0 and q1. */
-static const u8 calc_sb_tbl[512] = {
-    0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
-    0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
-    0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
-    0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
-    0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
-    0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
-    0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
-    0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
-    0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
-    0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
-    0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
-    0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
-    0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
-    0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
-    0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
-    0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
-    0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
-    0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
-    0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
-    0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
-    0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
-    0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
-    0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
-    0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
-    0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
-    0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
-    0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
-    0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
-    0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
-    0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
-    0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
-    0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
-    0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
-    0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
-    0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
-    0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
-    0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
-    0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
-    0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
-    0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
-    0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
-    0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
-    0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
-    0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
-    0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
-    0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
-    0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
-    0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
-    0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
-    0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
-    0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
-    0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
-    0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
-    0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
-    0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
-    0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
-    0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
-    0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
-    0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
-    0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
-    0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
-    0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
-    0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
-    0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
-};
-
-/* Macro to perform one column of the RS matrix multiplication.  The
- * parameters a, b, c, and d are the four bytes of output; i is the index
- * of the key bytes, and w, x, y, and z, are the column of constants from
- * the RS matrix, preprocessed through the poly_to_exp table. */
-
-#define CALC_S(a, b, c, d, i, w, x, y, z) \
-   if (key[i]) { \
-      tmp = poly_to_exp[key[i] - 1]; \
-      (a) ^= exp_to_poly[tmp + (w)]; \
-      (b) ^= exp_to_poly[tmp + (x)]; \
-      (c) ^= exp_to_poly[tmp + (y)]; \
-      (d) ^= exp_to_poly[tmp + (z)]; \
-   }
-
-/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
- * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
- * four S-boxes, where i is the index of the entry to compute, and a and b
- * are the index numbers preprocessed through the q0 and q1 tables
- * respectively. */
-
-#define CALC_SB_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
-   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
-   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
-   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
-
-/* Macro exactly like CALC_SB_2, but for 192-bit keys. */
-
-#define CALC_SB192_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \
-   ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \
-   ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \
-   ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl];
-
-/* Macro exactly like CALC_SB_2, but for 256-bit keys. */
-
-#define CALC_SB256_2(i, a, b) \
-   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
-   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
-   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
-   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
-
-/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
- * last two stages of the h() function for a given index (either 2i or 2i+1).
- * a, b, c, and d are the four bytes going into the last two stages.  For
- * 128-bit keys, this is the entire h() function and a and c are the index
- * preprocessed through q0 and q1 respectively; for longer keys they are the
- * output of previous stages.  j is the index of the first key byte to use.
- * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
- * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
- * rotations.  Its parameters are: a, the array to write the results into,
- * j, the index of the first output entry, k and l, the preprocessed indices
- * for index 2i, and m and n, the preprocessed indices for index 2i+1.
- * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
- * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
- * four bytes going into the last three stages.  For 192-bit keys, c = d
- * are the index preprocessed through q0, and a = b are the index
- * preprocessed through q1; j is the index of the first key byte to use.
- * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
- * instead of CALC_K_2.
- * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
- * additional lookup-and-XOR stage.  The parameters a and b are the index
- * preprocessed through q0 and q1 respectively; j is the index of the first
- * key byte to use.  CALC_K256 is identical to CALC_K but for using the
- * CALC_K256_2 macro instead of CALC_K_2. */
-
-#define CALC_K_2(a, b, c, d, j) \
-     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
-   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
-   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
-   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
-
-#define CALC_K(a, j, k, l, m, n) \
-   x = CALC_K_2 (k, l, k, l, 0); \
-   y = CALC_K_2 (m, n, m, n, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-#define CALC_K192_2(a, b, c, d, j) \
-   CALC_K_2 (q0[a ^ key[(j) + 16]], \
-	     q1[b ^ key[(j) + 17]], \
-	     q0[c ^ key[(j) + 18]], \
-	     q1[d ^ key[(j) + 19]], j)
-
-#define CALC_K192(a, j, k, l, m, n) \
-   x = CALC_K192_2 (l, l, k, k, 0); \
-   y = CALC_K192_2 (n, n, m, m, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-#define CALC_K256_2(a, b, j) \
-   CALC_K192_2 (q1[b ^ key[(j) + 24]], \
-	        q1[a ^ key[(j) + 25]], \
-	        q0[a ^ key[(j) + 26]], \
-	        q0[b ^ key[(j) + 27]], j)
-
-#define CALC_K256(a, j, k, l, m, n) \
-   x = CALC_K256_2 (k, l, 0); \
-   y = CALC_K256_2 (m, n, 4); \
-   y = rol32(y, 8); \
-   x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = rol32(y, 9)
-
-
 /* Macros to compute the g() function in the encryption and decryption
  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
  * rotation for the high 32-bit word. */
@@ -630,176 +103,7 @@
    x ^= ctx->w[m]; \
    dst[n] = cpu_to_le32(x)
 
-#define TF_MIN_KEY_SIZE 16
-#define TF_MAX_KEY_SIZE 32
-#define TF_BLOCK_SIZE 16
 
-/* Structure for an expanded Twofish key.  s contains the key-dependent
- * S-boxes composed with the MDS matrix; w contains the eight "whitening"
- * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
- * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
-struct twofish_ctx {
-   u32 s[4][256], w[8], k[32];
-};
-
-/* Perform the key setup. */
-static int twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			  unsigned int key_len, u32 *flags)
-{
-	
-	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	int i, j, k;
-
-	/* Temporaries for CALC_K. */
-	u32 x, y;
-
-	/* The S vector used to key the S-boxes, split up into individual bytes.
-	 * 128-bit keys use only sa through sh; 256-bit use all of them. */
-	u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
-	u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
-
-	/* Temporary for CALC_S. */
-	u8 tmp;
-
-	/* Check key length. */
-	if (key_len != 16 && key_len != 24 && key_len != 32)
-	{
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL; /* unsupported key length */
-	}
-
-	/* Compute the first two words of the S vector.  The magic numbers are
-	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
-	 * numbers in the comments are the original (polynomial form) matrix
-	 * entries. */
-	CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-	CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-	CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-	CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-	CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-	CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-	CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-	CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-	CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-	CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-	CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-	CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-	CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-	CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-	CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-	CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-
-	if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */
-		/* Calculate the third word of the S vector */
-		CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-		CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-		CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-		CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-		CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-		CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-		CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-		CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-	}
-
-	if (key_len == 32) { /* 256-bit key */
-		/* Calculate the fourth word of the S vector */
-		CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
-		CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
-		CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
-		CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
-		CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
-		CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
-		CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
-		CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
-
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	} else if (key_len == 24) { /* 192-bit key */
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	} else { /* 128-bit key */
-		/* Compute the S-boxes. */
-		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
-			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
-		}
-
-		/* Calculate whitening and round subkeys.  The constants are
-		 * indices of subkeys, preprocessed through q0 and q1. */
-		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-		CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-		CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-		CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-		CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-		CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-		CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-		CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-		CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-		CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-		CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
-		CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-		CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
-		CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-		CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-		CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
-		CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
-		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
-	}
-
-	return 0;
-}
 
 /* Encrypt one block.  in and out may be the same. */
 static void twofish_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
@@ -877,6 +181,8 @@
 
 static struct crypto_alg alg = {
 	.cra_name           =   "twofish",
+	.cra_driver_name    =   "twofish-generic",
+	.cra_priority       =   100,
 	.cra_flags          =   CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize      =   TF_BLOCK_SIZE,
 	.cra_ctxsize        =   sizeof(struct twofish_ctx),
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
new file mode 100644
index 0000000..b4b9c0c
--- /dev/null
+++ b/crypto/twofish_common.c
@@ -0,0 +1,744 @@
+/*
+ * Common Twofish algorithm parts shared between the c and assembler
+ * implementations
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+/* The large precomputed tables for the Twofish cipher (twofish.c)
+ * Taken from the same source as twofish.c
+ * Marc Mutz <Marc@Mutz.com>
+ */
+
+/* These two tables are the q0 and q1 permutations, exactly as described in
+ * the Twofish paper. */
+
+static const u8 q0[256] = {
+	0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
+	0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
+	0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
+	0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
+	0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
+	0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
+	0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
+	0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
+	0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
+	0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
+	0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
+	0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
+	0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
+	0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
+	0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
+	0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
+	0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
+	0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
+	0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
+	0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
+	0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
+	0x4A, 0x5E, 0xC1, 0xE0
+};
+
+static const u8 q1[256] = {
+	0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
+	0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
+	0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
+	0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
+	0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
+	0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
+	0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
+	0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
+	0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
+	0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
+	0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
+	0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
+	0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
+	0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
+	0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
+	0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
+	0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
+	0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
+	0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
+	0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
+	0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
+	0x55, 0x09, 0xBE, 0x91
+};
+
+/* These MDS tables are actually tables of MDS composed with q0 and q1,
+ * because it is only ever used that way and we can save some time by
+ * precomputing.  Of course the main saving comes from precomputing the
+ * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
+ * things up in these tables we reduce the matrix multiply to four lookups
+ * and three XORs.  Semi-formally, the definition of these tables is:
+ * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
+ * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
+ * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
+ * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
+ * by Schneier et al, and I'm casually glossing over the byte/word
+ * conversion issues. */
+
+static const u32 mds[4][256] = {
+	{
+	0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
+	0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
+	0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
+	0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
+	0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
+	0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
+	0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
+	0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
+	0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
+	0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
+	0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
+	0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
+	0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
+	0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
+	0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
+	0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
+	0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
+	0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
+	0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
+	0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
+	0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
+	0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
+	0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
+	0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
+	0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
+	0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
+	0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
+	0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
+	0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
+	0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
+	0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
+	0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
+	0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
+	0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
+	0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
+	0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
+	0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
+	0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
+	0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
+	0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
+	0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
+	0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
+	0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
+
+	{
+	0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
+	0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
+	0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
+	0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
+	0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
+	0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
+	0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
+	0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
+	0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
+	0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
+	0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
+	0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
+	0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
+	0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
+	0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
+	0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
+	0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
+	0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
+	0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
+	0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
+	0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
+	0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
+	0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
+	0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
+	0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
+	0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
+	0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
+	0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
+	0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
+	0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
+	0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
+	0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
+	0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
+	0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
+	0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
+	0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
+	0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
+	0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
+	0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
+	0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
+	0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
+	0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
+	0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
+
+	{
+	0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
+	0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
+	0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
+	0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
+	0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
+	0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
+	0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
+	0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
+	0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
+	0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
+	0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
+	0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
+	0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
+	0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
+	0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
+	0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
+	0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
+	0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
+	0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
+	0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
+	0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
+	0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
+	0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
+	0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
+	0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
+	0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
+	0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
+	0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
+	0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
+	0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
+	0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
+	0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
+	0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
+	0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
+	0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
+	0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
+	0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
+	0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
+	0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
+	0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
+	0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
+	0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
+	0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
+
+	{
+	0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
+	0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
+	0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
+	0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
+	0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
+	0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
+	0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
+	0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
+	0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
+	0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
+	0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
+	0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
+	0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
+	0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
+	0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
+	0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
+	0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
+	0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
+	0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
+	0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
+	0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
+	0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
+	0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
+	0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
+	0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
+	0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
+	0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
+	0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
+	0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
+	0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
+	0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
+	0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
+	0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
+	0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
+	0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
+	0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
+	0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
+	0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
+	0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
+	0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
+	0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
+	0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
+	0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
+};
+
+/* The exp_to_poly and poly_to_exp tables are used to perform efficient
+ * operations in GF(2^8) represented as GF(2)[x]/w(x) where
+ * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
+ * definition of the RS matrix in the key schedule.  Elements of that field
+ * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
+ * which can be represented naturally by bytes (just substitute x=2).  In that
+ * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
+ * multiplication is inefficient without hardware support.  To multiply
+ * faster, I make use of the fact x is a generator for the nonzero elements,
+ * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
+ * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
+ * *not* polynomial notation.  So if I want to compute pq where p and q are
+ * in GF(2^8), I can just say:
+ *    1. if p=0 or q=0 then pq=0
+ *    2. otherwise, find m and n such that p=x^m and q=x^n
+ *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
+ * The translations in steps 2 and 3 are looked up in the tables
+ * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
+ * in action, look at the CALC_S macro.  As additional wrinkles, note that
+ * one of my operands is always a constant, so the poly_to_exp lookup on it
+ * is done in advance; I included the original values in the comments so
+ * readers can have some chance of recognizing that this *is* the RS matrix
+ * from the Twofish paper.  I've only included the table entries I actually
+ * need; I never do a lookup on a variable input of zero and the biggest
+ * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
+ * never sum to more than 491.	I'm repeating part of the exp_to_poly table
+ * so that I don't have to do mod-255 reduction in the exponent arithmetic.
+ * Since I know my constant operands are never zero, I only have to worry
+ * about zero values in the variable operand, and I do it with a simple
+ * conditional branch.	I know conditionals are expensive, but I couldn't
+ * see a non-horrible way of avoiding them, and I did manage to group the
+ * statements so that each if covers four group multiplications. */
+
+static const u8 poly_to_exp[255] = {
+	0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
+	0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
+	0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
+	0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
+	0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
+	0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
+	0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
+	0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
+	0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
+	0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
+	0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
+	0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
+	0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
+	0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
+	0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
+	0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
+	0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
+	0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
+	0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
+	0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
+	0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
+	0x85, 0xC8, 0xA1
+};
+
+static const u8 exp_to_poly[492] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
+	0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
+	0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
+	0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
+	0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
+	0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
+	0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
+	0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
+	0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
+	0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
+	0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
+	0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
+	0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
+	0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
+	0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
+	0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
+	0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
+	0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
+	0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
+	0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
+	0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
+	0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
+	0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
+	0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
+	0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
+	0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
+	0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
+	0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
+	0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
+	0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
+	0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
+	0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
+	0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
+	0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
+	0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
+	0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
+	0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
+	0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
+	0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
+	0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
+	0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
+};
+
+
+/* The table constants are indices of
+ * S-box entries, preprocessed through q0 and q1. */
+static const u8 calc_sb_tbl[512] = {
+	0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
+	0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
+	0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
+	0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
+	0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
+	0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
+	0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
+	0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
+	0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
+	0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
+	0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
+	0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
+	0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
+	0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
+	0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
+	0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
+	0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
+	0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
+	0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
+	0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
+	0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
+	0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
+	0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
+	0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
+	0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
+	0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
+	0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
+	0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
+	0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
+	0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
+	0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
+	0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
+	0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
+	0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
+	0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
+	0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
+	0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
+	0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
+	0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
+	0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
+	0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
+	0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
+	0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
+	0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
+	0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
+	0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
+	0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
+	0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
+	0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
+	0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
+	0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
+	0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
+	0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
+	0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
+	0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
+	0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
+	0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
+	0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
+	0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
+	0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
+	0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
+	0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
+	0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
+	0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
+};
+
+/* Macro to perform one column of the RS matrix multiplication.  The
+ * parameters a, b, c, and d are the four bytes of output; i is the index
+ * of the key bytes, and w, x, y, and z, are the column of constants from
+ * the RS matrix, preprocessed through the poly_to_exp table. */
+
+#define CALC_S(a, b, c, d, i, w, x, y, z) \
+   if (key[i]) { \
+      tmp = poly_to_exp[key[i] - 1]; \
+      (a) ^= exp_to_poly[tmp + (w)]; \
+      (b) ^= exp_to_poly[tmp + (x)]; \
+      (c) ^= exp_to_poly[tmp + (y)]; \
+      (d) ^= exp_to_poly[tmp + (z)]; \
+   }
+
+/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
+ * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
+ * four S-boxes, where i is the index of the entry to compute, and a and b
+ * are the index numbers preprocessed through the q0 and q1 tables
+ * respectively. */
+
+#define CALC_SB_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
+   ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
+   ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
+   ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
+
+/* Macro exactly like CALC_SB_2, but for 192-bit keys. */
+
+#define CALC_SB192_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[q0[(b) ^ sa] ^ se] ^ si]; \
+   ctx->s[1][i] = mds[1][q0[q1[(b) ^ sb] ^ sf] ^ sj]; \
+   ctx->s[2][i] = mds[2][q1[q0[(a) ^ sc] ^ sg] ^ sk]; \
+   ctx->s[3][i] = mds[3][q1[q1[(a) ^ sd] ^ sh] ^ sl];
+
+/* Macro exactly like CALC_SB_2, but for 256-bit keys. */
+
+#define CALC_SB256_2(i, a, b) \
+   ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
+   ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
+   ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
+   ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
+
+/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
+ * last two stages of the h() function for a given index (either 2i or 2i+1).
+ * a, b, c, and d are the four bytes going into the last two stages.  For
+ * 128-bit keys, this is the entire h() function and a and c are the index
+ * preprocessed through q0 and q1 respectively; for longer keys they are the
+ * output of previous stages.  j is the index of the first key byte to use.
+ * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
+ * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
+ * rotations.  Its parameters are: a, the array to write the results into,
+ * j, the index of the first output entry, k and l, the preprocessed indices
+ * for index 2i, and m and n, the preprocessed indices for index 2i+1.
+ * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
+ * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
+ * four bytes going into the last three stages.  For 192-bit keys, c = d
+ * are the index preprocessed through q0, and a = b are the index
+ * preprocessed through q1; j is the index of the first key byte to use.
+ * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
+ * instead of CALC_K_2.
+ * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
+ * additional lookup-and-XOR stage.  The parameters a and b are the index
+ * preprocessed through q0 and q1 respectively; j is the index of the first
+ * key byte to use.  CALC_K256 is identical to CALC_K but for using the
+ * CALC_K256_2 macro instead of CALC_K_2. */
+
+#define CALC_K_2(a, b, c, d, j) \
+     mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
+   ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
+   ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
+   ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
+
+#define CALC_K(a, j, k, l, m, n) \
+   x = CALC_K_2 (k, l, k, l, 0); \
+   y = CALC_K_2 (m, n, m, n, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+#define CALC_K192_2(a, b, c, d, j) \
+   CALC_K_2 (q0[a ^ key[(j) + 16]], \
+	     q1[b ^ key[(j) + 17]], \
+	     q0[c ^ key[(j) + 18]], \
+	     q1[d ^ key[(j) + 19]], j)
+
+#define CALC_K192(a, j, k, l, m, n) \
+   x = CALC_K192_2 (l, l, k, k, 0); \
+   y = CALC_K192_2 (n, n, m, m, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+#define CALC_K256_2(a, b, j) \
+   CALC_K192_2 (q1[b ^ key[(j) + 24]], \
+	        q1[a ^ key[(j) + 25]], \
+	        q0[a ^ key[(j) + 26]], \
+	        q0[b ^ key[(j) + 27]], j)
+
+#define CALC_K256(a, j, k, l, m, n) \
+   x = CALC_K256_2 (k, l, 0); \
+   y = CALC_K256_2 (m, n, 4); \
+   y = rol32(y, 8); \
+   x += y; y += x; ctx->a[j] = x; \
+   ctx->a[(j) + 1] = rol32(y, 9)
+
+/* Perform the key setup. */
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len)
+{
+
+	struct twofish_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+
+	int i, j, k;
+
+	/* Temporaries for CALC_K. */
+	u32 x, y;
+
+	/* The S vector used to key the S-boxes, split up into individual bytes.
+	 * 128-bit keys use only sa through sh; 256-bit use all of them. */
+	u8 sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
+	u8 si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
+
+	/* Temporary for CALC_S. */
+	u8 tmp;
+
+	/* Check key length. */
+	if (key_len % 8)
+	{
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL; /* unsupported key length */
+	}
+
+	/* Compute the first two words of the S vector.  The magic numbers are
+	 * the entries of the RS matrix, preprocessed through poly_to_exp. The
+	 * numbers in the comments are the original (polynomial form) matrix
+	 * entries. */
+	CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+	CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+	CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+	CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+	CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+	CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+	CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+	CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+	CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+	CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+	CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+	CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+	CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+	CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+	CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+	CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+	if (key_len == 24 || key_len == 32) { /* 192- or 256-bit key */
+		/* Calculate the third word of the S vector */
+		CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+		CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+		CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+		CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+		CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+		CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+		CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+		CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+	}
+
+	if (key_len == 32) { /* 256-bit key */
+		/* Calculate the fourth word of the S vector */
+		CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
+		CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
+		CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
+		CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
+		CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
+		CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
+		CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
+		CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
+
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+			CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	} else if (key_len == 24) { /* 192-bit key */
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+		        CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	} else { /* 128-bit key */
+		/* Compute the S-boxes. */
+		for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
+			CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
+		}
+
+		/* Calculate whitening and round subkeys.  The constants are
+		 * indices of subkeys, preprocessed through q0 and q1. */
+		CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
+		CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
+		CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
+		CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
+		CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
+		CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
+		CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
+		CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
+		CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
+		CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
+		CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
+		CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
+		CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
+		CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
+		CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
+		CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
+		CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
+		CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
+		CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
+		CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(twofish_setkey);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Twofish cipher common functions");
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index b1b5104..3fa80f0 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -32,7 +32,6 @@
  *
  */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index 2a7e349..d6d6658 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -268,7 +268,7 @@
 	/* Match it to a port node */
 	index = (ap == ap->host->ports[0]) ? 0 : 1;
 	for (np = np->child; np != NULL; np = np->sibling) {
-		u32 *reg = (u32 *)get_property(np, "reg", NULL);
+		const u32 *reg = get_property(np, "reg", NULL);
 		if (!reg)
 			continue;
 		if (index == *reg)
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index dd96123..41e052f 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1912,7 +1912,7 @@
 				skb->tail = skb->data + skb->len;
 #ifdef USE_CHECKSUM_HW
 				if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
-					skb->ip_summed = CHECKSUM_HW;
+					skb->ip_summed = CHECKSUM_COMPLETE;
 					skb->csum = TCP_CKSUM(skb->data,
 							he_vcc->pdu_len);
 				}
@@ -1928,7 +1928,9 @@
 #ifdef notdef
 		ATM_SKB(skb)->vcc = vcc;
 #endif
+		spin_unlock(&he_dev->global_lock);
 		vcc->push(vcc, skb);
+		spin_lock(&he_dev->global_lock);
 
 		atomic_inc(&vcc->stats->rx);
 
diff --git a/drivers/base/hypervisor.c b/drivers/base/hypervisor.c
index 0c85e9d..7080b41 100644
--- a/drivers/base/hypervisor.c
+++ b/drivers/base/hypervisor.c
@@ -1,8 +1,9 @@
 /*
  * hypervisor.c - /sys/hypervisor subsystem.
  *
- * This file is released under the GPLv2
+ * Copyright (C) IBM Corp. 2006
  *
+ * This file is released under the GPLv2
  */
 
 #include <linux/kobject.h>
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 4cd23c3..a360215 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -7115,7 +7115,7 @@
 	{
 		.vendor 	= PCI_VENDOR_ID_MYLEX,
 		.device		= PCI_DEVICE_ID_MYLEX_DAC960_GEM,
-		.subvendor	= PCI_ANY_ID,
+		.subvendor	= PCI_VENDOR_ID_MYLEX,
 		.subdevice	= PCI_ANY_ID,
 		.driver_data	= (unsigned long) &DAC960_GEM_privdata,
 	},
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index afdff32..05f79d7 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -251,10 +251,6 @@
 	stk->pool = NULL;
 }
 
-/* scsi_device_types comes from scsi.h */
-#define DEVICETYPE(n) (n<0 || n>MAX_SCSI_DEVICE_CODE) ? \
-	"Unknown" : scsi_device_types[n]
-
 #if 0
 static int xmargin=8;
 static int amargin=60;
@@ -389,7 +385,7 @@
 	   time anyway (the scsi layer's inquiries will show that info) */
 	if (hostno != -1)
 		printk("cciss%d: %s device c%db%dt%dl%d added.\n", 
-			ctlr, DEVICETYPE(sd->devtype), hostno, 
+			ctlr, scsi_device_type(sd->devtype), hostno,
 			sd->bus, sd->target, sd->lun);
 	return 0;
 }
@@ -407,7 +403,7 @@
 		ccissscsi[ctlr].dev[i] = ccissscsi[ctlr].dev[i+1];
 	ccissscsi[ctlr].ndevices--;
 	printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-		ctlr, DEVICETYPE(sd.devtype), hostno, 
+		ctlr, scsi_device_type(sd.devtype), hostno,
 			sd.bus, sd.target, sd.lun);
 }
 
@@ -458,7 +454,7 @@
 		if (found == 0) { /* device no longer present. */ 
 			changes++;
 			/* printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-				ctlr, DEVICETYPE(csd->devtype), hostno, 
+				ctlr, scsi_device_type(csd->devtype), hostno,
 					csd->bus, csd->target, csd->lun); */
 			cciss_scsi_remove_entry(ctlr, hostno, i);
 			/* note, i not incremented */
@@ -468,7 +464,7 @@
 			printk("cciss%d: device c%db%dt%dl%d type changed "
 				"(device type now %s).\n",
 				ctlr, hostno, csd->bus, csd->target, csd->lun,
-					DEVICETYPE(csd->devtype));
+					scsi_device_type(csd->devtype));
 			csd->devtype = sd[j].devtype;
 			i++;	/* so just move along. */
 		} else 		/* device is same as it ever was, */
@@ -1098,7 +1094,7 @@
 			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
 				printk(KERN_INFO "cciss%d: %s ignored, "
 					"too many devices.\n", cntl_num,
-					DEVICETYPE(devtype));
+					scsi_device_type(devtype));
 				break;
 			}
 			memcpy(&currentsd[ncurrent].scsi3addr[0], 
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
index 3d4261c..4053503 100644
--- a/drivers/block/cryptoloop.c
+++ b/drivers/block/cryptoloop.c
@@ -40,11 +40,13 @@
 cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
 {
 	int err = -EINVAL;
+	int cipher_len;
+	int mode_len;
 	char cms[LO_NAME_SIZE];			/* cipher-mode string */
 	char *cipher;
 	char *mode;
 	char *cmsp = cms;			/* c-m string pointer */
-	struct crypto_tfm *tfm = NULL;
+	struct crypto_blkcipher *tfm;
 
 	/* encryption breaks for non sector aligned offsets */
 
@@ -53,20 +55,39 @@
 
 	strncpy(cms, info->lo_crypt_name, LO_NAME_SIZE);
 	cms[LO_NAME_SIZE - 1] = 0;
-	cipher = strsep(&cmsp, "-");
-	mode = strsep(&cmsp, "-");
 
-	if (mode == NULL || strcmp(mode, "cbc") == 0)
-		tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_CBC |
-					       CRYPTO_TFM_REQ_MAY_SLEEP);
-	else if (strcmp(mode, "ecb") == 0)
-		tfm = crypto_alloc_tfm(cipher, CRYPTO_TFM_MODE_ECB |
-					       CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL)
+	cipher = cmsp;
+	cipher_len = strcspn(cmsp, "-");
+
+	mode = cmsp + cipher_len;
+	mode_len = 0;
+	if (*mode) {
+		mode++;
+		mode_len = strcspn(mode, "-");
+	}
+
+	if (!mode_len) {
+		mode = "cbc";
+		mode_len = 3;
+	}
+
+	if (cipher_len + mode_len + 3 > LO_NAME_SIZE)
 		return -EINVAL;
 
-	err = tfm->crt_u.cipher.cit_setkey(tfm, info->lo_encrypt_key,
-					   info->lo_encrypt_key_size);
+	memmove(cms, mode, mode_len);
+	cmsp = cms + mode_len;
+	*cmsp++ = '(';
+	memcpy(cmsp, info->lo_crypt_name, cipher_len);
+	cmsp += cipher_len;
+	*cmsp++ = ')';
+	*cmsp = 0;
+
+	tfm = crypto_alloc_blkcipher(cms, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	err = crypto_blkcipher_setkey(tfm, info->lo_encrypt_key,
+				      info->lo_encrypt_key_size);
 	
 	if (err != 0)
 		goto out_free_tfm;
@@ -75,99 +96,49 @@
 	return 0;
 
  out_free_tfm:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 
  out:
 	return err;
 }
 
 
-typedef int (*encdec_ecb_t)(struct crypto_tfm *tfm,
+typedef int (*encdec_cbc_t)(struct blkcipher_desc *desc,
 			struct scatterlist *sg_out,
 			struct scatterlist *sg_in,
 			unsigned int nsg);
 
-
 static int
-cryptoloop_transfer_ecb(struct loop_device *lo, int cmd,
-			struct page *raw_page, unsigned raw_off,
-			struct page *loop_page, unsigned loop_off,
-			int size, sector_t IV)
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
 {
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
-	struct scatterlist sg_out = { NULL, };
-	struct scatterlist sg_in = { NULL, };
-
-	encdec_ecb_t encdecfunc;
-	struct page *in_page, *out_page;
-	unsigned in_offs, out_offs;
-
-	if (cmd == READ) {
-		in_page = raw_page;
-		in_offs = raw_off;
-		out_page = loop_page;
-		out_offs = loop_off;
-		encdecfunc = tfm->crt_u.cipher.cit_decrypt;
-	} else {
-		in_page = loop_page;
-		in_offs = loop_off;
-		out_page = raw_page;
-		out_offs = raw_off;
-		encdecfunc = tfm->crt_u.cipher.cit_encrypt;
-	}
-
-	while (size > 0) {
-		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
-
-		sg_in.page = in_page;
-		sg_in.offset = in_offs;
-		sg_in.length = sz;
-
-		sg_out.page = out_page;
-		sg_out.offset = out_offs;
-		sg_out.length = sz;
-
-		encdecfunc(tfm, &sg_out, &sg_in, sz);
-
-		size -= sz;
-		in_offs += sz;
-		out_offs += sz;
-	}
-
-	return 0;
-}
-
-typedef int (*encdec_cbc_t)(struct crypto_tfm *tfm,
-			struct scatterlist *sg_out,
-			struct scatterlist *sg_in,
-			unsigned int nsg, u8 *iv);
-
-static int
-cryptoloop_transfer_cbc(struct loop_device *lo, int cmd,
-			struct page *raw_page, unsigned raw_off,
-			struct page *loop_page, unsigned loop_off,
-			int size, sector_t IV)
-{
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
+	struct crypto_blkcipher *tfm = lo->key_data;
+	struct blkcipher_desc desc = {
+		.tfm = tfm,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
+	};
 	struct scatterlist sg_out = { NULL, };
 	struct scatterlist sg_in = { NULL, };
 
 	encdec_cbc_t encdecfunc;
 	struct page *in_page, *out_page;
 	unsigned in_offs, out_offs;
+	int err;
 
 	if (cmd == READ) {
 		in_page = raw_page;
 		in_offs = raw_off;
 		out_page = loop_page;
 		out_offs = loop_off;
-		encdecfunc = tfm->crt_u.cipher.cit_decrypt_iv;
+		encdecfunc = crypto_blkcipher_crt(tfm)->decrypt;
 	} else {
 		in_page = loop_page;
 		in_offs = loop_off;
 		out_page = raw_page;
 		out_offs = raw_off;
-		encdecfunc = tfm->crt_u.cipher.cit_encrypt_iv;
+		encdecfunc = crypto_blkcipher_crt(tfm)->encrypt;
 	}
 
 	while (size > 0) {
@@ -183,7 +154,10 @@
 		sg_out.offset = out_offs;
 		sg_out.length = sz;
 
-		encdecfunc(tfm, &sg_out, &sg_in, sz, (u8 *)iv);
+		desc.info = iv;
+		err = encdecfunc(&desc, &sg_out, &sg_in, sz);
+		if (err)
+			return err;
 
 		IV++;
 		size -= sz;
@@ -195,32 +169,6 @@
 }
 
 static int
-cryptoloop_transfer(struct loop_device *lo, int cmd,
-		    struct page *raw_page, unsigned raw_off,
-		    struct page *loop_page, unsigned loop_off,
-		    int size, sector_t IV)
-{
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
-	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB)
-	{
-		lo->transfer = cryptoloop_transfer_ecb;
-		return cryptoloop_transfer_ecb(lo, cmd, raw_page, raw_off,
-					       loop_page, loop_off, size, IV);
-	}	
-	if(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC)
-	{	
-		lo->transfer = cryptoloop_transfer_cbc;
-		return cryptoloop_transfer_cbc(lo, cmd, raw_page, raw_off,
-					       loop_page, loop_off, size, IV);
-	}
-	
-	/*  This is not supposed to happen */
-
-	printk( KERN_ERR "cryptoloop: unsupported cipher mode in cryptoloop_transfer!\n");
-	return -EINVAL;
-}
-
-static int
 cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg)
 {
 	return -EINVAL;
@@ -229,9 +177,9 @@
 static int
 cryptoloop_release(struct loop_device *lo)
 {
-	struct crypto_tfm *tfm = (struct crypto_tfm *) lo->key_data;
+	struct crypto_blkcipher *tfm = lo->key_data;
 	if (tfm != NULL) {
-		crypto_free_tfm(tfm);
+		crypto_free_blkcipher(tfm);
 		lo->key_data = NULL;
 		return 0;
 	}
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index c40e487..52ea94b 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -495,6 +495,21 @@
 	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
 	  architectures and 24 bytes on 64-bit architectures.
 
+config BRIQ_PANEL
+	tristate 'Total Impact briQ front panel driver'
+	depends on PPC_CHRP
+	---help---
+	  The briQ is a small footprint CHRP computer with a frontpanel VFD, a
+	  tristate led and two switches. It is the size of a CDROM drive.
+
+	  If you have such one and want anything showing on the VFD then you
+	  must answer Y here.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called briq_panel.
+
+	  It's safe to say N here.
+
 config PRINTER
 	tristate "Parallel printer support"
 	depends on PARPORT
@@ -596,6 +611,13 @@
 	  console. This driver allows each pSeries partition to have a console
 	  which is accessed via the HMC.
 
+config HVC_ISERIES
+	bool "iSeries Hypervisor Virtual Console support"
+	depends on PPC_ISERIES && !VIOCONS
+	select HVC_DRIVER
+	help
+	  iSeries machines support a hypervisor virtual console.
+
 config HVC_RTAS
 	bool "IBM RTAS Console support"
 	depends on PPC_RTAS
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 6e0f446..8c6dfc62 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -42,6 +42,7 @@
 obj-$(CONFIG_SX)		+= sx.o generic_serial.o
 obj-$(CONFIG_RIO)		+= rio/ generic_serial.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvc_vio.o hvsi.o
+obj-$(CONFIG_HVC_ISERIES)	+= hvc_iseries.o
 obj-$(CONFIG_HVC_RTAS)		+= hvc_rtas.o
 obj-$(CONFIG_HVC_DRIVER)	+= hvc_console.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
@@ -51,6 +52,7 @@
 obj-$(CONFIG_VIOTAPE)		+= viotape.o
 obj-$(CONFIG_HVCS)		+= hvcs.o
 obj-$(CONFIG_SGI_MBCS)		+= mbcs.o
+obj-$(CONFIG_BRIQ_PANEL)	+= briq_panel.o
 
 obj-$(CONFIG_PRINTER)		+= lp.o
 obj-$(CONFIG_TIPAR)		+= tipar.o
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 3c623b6..8b3317f 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -117,7 +117,7 @@
 };
 
 struct agp_bridge_data {
-	struct agp_version *version;
+	const struct agp_version *version;
 	struct agp_bridge_driver *driver;
 	struct vm_operations_struct *vm_ops;
 	void *previous_size;
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 509adc4..d59e037 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -44,7 +44,7 @@
  * past 0.99 at all due to some boolean logic error. */
 #define AGPGART_VERSION_MAJOR 0
 #define AGPGART_VERSION_MINOR 101
-static struct agp_version agp_current_version =
+static const struct agp_version agp_current_version =
 {
 	.major = AGPGART_VERSION_MAJOR,
 	.minor = AGPGART_VERSION_MINOR,
diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index b788b0a..30f730f 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -337,13 +337,6 @@
 	.agp_destroy_page	= agp_generic_destroy_page,
 };
 
-
-static int agp_efficeon_resume(struct pci_dev *pdev)
-{
-	printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
-	return efficeon_configure();
-}
-
 static int __devinit agp_efficeon_probe(struct pci_dev *pdev,
 				     const struct pci_device_id *ent)
 {
@@ -414,11 +407,18 @@
 	agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state)
 {
 	return 0;
 }
 
+static int agp_efficeon_resume(struct pci_dev *pdev)
+{
+	printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
+	return efficeon_configure();
+}
+#endif
 
 static struct pci_device_id agp_efficeon_pci_table[] = {
 	{
@@ -439,8 +439,10 @@
 	.id_table	= agp_efficeon_pci_table,
 	.probe		= agp_efficeon_probe,
 	.remove		= agp_efficeon_remove,
+#ifdef CONFIG_PM
 	.suspend	= agp_efficeon_suspend,
 	.resume		= agp_efficeon_resume,
+#endif
 };
 
 static int __init agp_efficeon_init(void)
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index d9c5a91..0f2ed2a 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -151,35 +151,12 @@
 	client->segments = seg;
 }
 
-/* Originally taken from linux/mm/mmap.c from the array
- * protection_map.
- * The original really should be exported to modules, or
- * some routine which does the conversion for you
- */
-
-static const pgprot_t my_protect_map[16] =
-{
-	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
-	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
 static pgprot_t agp_convert_mmap_flags(int prot)
 {
-#define _trans(x,bit1,bit2) \
-((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
-
 	unsigned long prot_bits;
-	pgprot_t temp;
 
-	prot_bits = _trans(prot, PROT_READ, VM_READ) |
-	    _trans(prot, PROT_WRITE, VM_WRITE) |
-	    _trans(prot, PROT_EXEC, VM_EXEC);
-
-	prot_bits |= VM_SHARED;
-
-	temp = my_protect_map[prot_bits & 0x0000000f];
-
-	return temp;
+	prot_bits = calc_vm_prot_bits(prot) | VM_SHARED;
+	return vm_get_page_prot(prot_bits);
 }
 
 static int agp_create_segment(struct agp_client *client, struct agp_region *region)
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index cc5ea34..0dcdb36 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -568,25 +568,34 @@
 		*bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
 		goto done;
 
+	} else if (*requested_mode & AGPSTAT3_4X) {
+		*bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+		*bridge_agpstat |= AGPSTAT3_4X;
+		goto done;
+
 	} else {
 
 		/*
-		 * If we didn't specify AGPx8, we can only do x4.
-		 * If the hardware can't do x4, we're up shit creek, and never
-		 *  should have got this far.
+		 * If we didn't specify an AGP mode, we see if both
+		 * the graphics card, and the bridge can do x8, and use if so.
+		 * If not, we fall back to x4 mode.
 		 */
-		*bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
-		if ((*bridge_agpstat & AGPSTAT3_4X) && (*vga_agpstat & AGPSTAT3_4X))
-			*bridge_agpstat |= AGPSTAT3_4X;
-		else {
-			printk(KERN_INFO PFX "Badness. Don't know which AGP mode to set. "
-							"[bridge_agpstat:%x vga_agpstat:%x fell back to:- bridge_agpstat:%x vga_agpstat:%x]\n",
-							origbridge, origvga, *bridge_agpstat, *vga_agpstat);
-			if (!(*bridge_agpstat & AGPSTAT3_4X))
-				printk(KERN_INFO PFX "Bridge couldn't do AGP x4.\n");
-			if (!(*vga_agpstat & AGPSTAT3_4X))
-				printk(KERN_INFO PFX "Graphic card couldn't do AGP x4.\n");
-			return;
+		if ((*bridge_agpstat & AGPSTAT3_8X) && (*vga_agpstat & AGPSTAT3_8X)) {
+			printk(KERN_INFO PFX "No AGP mode specified. Setting to highest mode supported by bridge & card (x8).\n");
+			*bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+			*vga_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+		} else {
+			printk(KERN_INFO PFX "Fell back to AGPx4 mode because");
+			if (!(*bridge_agpstat & AGPSTAT3_8X)) {
+				printk("bridge couldn't do x8. bridge_agpstat:%x (orig=%x)\n", *bridge_agpstat, origbridge);
+				*bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+				*bridge_agpstat |= AGPSTAT3_4X;
+			}
+			if (!(*vga_agpstat & AGPSTAT3_8X)) {
+				printk("graphics card couldn't do x8. vga_agpstat:%x (orig=%x)\n", *vga_agpstat, origvga);
+				*vga_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+				*vga_agpstat |= AGPSTAT3_4X;
+			}
 		}
 	}
 
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 61ac380..d1ede7d 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2,14 +2,6 @@
  * Intel AGPGART routines.
  */
 
-/*
- * Intel(R) 855GM/852GM and 865G support added by David Dawes
- * <dawes@tungstengraphics.com>.
- *
- * Intel(R) 915G/915GM support added by Alan Hourihane
- * <alanh@tungstengraphics.com>.
- */
-
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/init.h>
@@ -17,6 +9,21 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
+#define PCI_DEVICE_ID_INTEL_82946GZ_HB      0x2970
+#define PCI_DEVICE_ID_INTEL_82946GZ_IG      0x2972
+#define PCI_DEVICE_ID_INTEL_82965G_1_HB     0x2980
+#define PCI_DEVICE_ID_INTEL_82965G_1_IG     0x2982
+#define PCI_DEVICE_ID_INTEL_82965Q_HB       0x2990
+#define PCI_DEVICE_ID_INTEL_82965Q_IG       0x2992
+#define PCI_DEVICE_ID_INTEL_82965G_HB       0x29A0
+#define PCI_DEVICE_ID_INTEL_82965G_IG       0x29A2
+
+#define IS_I965 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82946GZ_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_1_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB)
+
+
 /* Intel 815 register */
 #define INTEL_815_APCONT	0x51
 #define INTEL_815_ATTBASE_MASK	~0x1FFFFFFF
@@ -40,6 +47,8 @@
 #define I915_GMCH_GMS_STOLEN_48M	(0x6 << 4)
 #define I915_GMCH_GMS_STOLEN_64M	(0x7 << 4)
 
+/* Intel 965G registers */
+#define I965_MSAC 0x62
 
 /* Intel 7505 registers */
 #define INTEL_I7505_APSIZE	0x74
@@ -354,6 +363,7 @@
 	/* The 64M mode still requires a 128k gatt */
 	{64, 16384, 5},
 	{256, 65536, 6},
+	{512, 131072, 7},
 };
 
 static struct _intel_i830_private {
@@ -377,7 +387,11 @@
 	/* We obtain the size of the GTT, which is also stored (for some
 	 * reason) at the top of stolen memory. Then we add 4KB to that
 	 * for the video BIOS popup, which is also stored in there. */
-	size = agp_bridge->driver->fetch_size() + 4;
+
+	if (IS_I965)
+		size = 512 + 4;
+	else
+		size = agp_bridge->driver->fetch_size() + 4;
 
 	if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82830_HB ||
 	    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) {
@@ -423,7 +437,7 @@
 			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965 )
 				gtt_entries = MB(48) - KB(size);
 			else
 				gtt_entries = 0;
@@ -433,7 +447,7 @@
 			if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
 			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+			    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965)
 				gtt_entries = MB(64) - KB(size);
 			else
 				gtt_entries = 0;
@@ -791,6 +805,77 @@
 
 	return 0;
 }
+static int intel_i965_fetch_size(void)
+{
+       struct aper_size_info_fixed *values;
+       u32 offset = 0;
+       u8 temp;
+
+#define I965_512MB_ADDRESS_MASK (3<<1)
+
+       values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes);
+
+       pci_read_config_byte(intel_i830_private.i830_dev, I965_MSAC, &temp);
+       temp &= I965_512MB_ADDRESS_MASK;
+       switch (temp) {
+       case 0x00:
+               offset = 0; /* 128MB */
+               break;
+       case 0x06:
+               offset = 3; /* 512MB */
+               break;
+       default:
+       case 0x02:
+               offset = 2; /* 256MB */
+               break;
+       }
+
+       agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset);
+
+       return values[offset].size;
+}
+
+/* The intel i965 automatically initializes the agp aperture during POST.
++ * Use the memory already set aside for in the GTT.
++ */
+static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge)
+{
+       int page_order;
+       struct aper_size_info_fixed *size;
+       int num_entries;
+       u32 temp;
+
+       size = agp_bridge->current_size;
+       page_order = size->page_order;
+       num_entries = size->num_entries;
+       agp_bridge->gatt_table_real = NULL;
+
+       pci_read_config_dword(intel_i830_private.i830_dev, I915_MMADDR, &temp);
+
+       temp &= 0xfff00000;
+       intel_i830_private.gtt = ioremap((temp + (512 * 1024)) , 512 * 1024);
+
+       if (!intel_i830_private.gtt)
+               return -ENOMEM;
+
+
+       intel_i830_private.registers = ioremap(temp,128 * 4096);
+       if (!intel_i830_private.registers)
+               return -ENOMEM;
+
+       temp = readl(intel_i830_private.registers+I810_PGETBL_CTL) & 0xfffff000;
+       global_cache_flush();   /* FIXME: ? */
+
+       /* we have to call this as early as possible after the MMIO base address is known */
+       intel_i830_init_gtt_entries();
+
+       agp_bridge->gatt_table = NULL;
+
+       agp_bridge->gatt_bus_addr = temp;
+
+       return 0;
+}
+
 
 static int intel_fetch_size(void)
 {
@@ -1307,7 +1392,7 @@
 	.owner			= THIS_MODULE,
 	.aperture_sizes		= intel_i830_sizes,
 	.size_type		= FIXED_APER_SIZE,
-	.num_aperture_sizes	= 3,
+	.num_aperture_sizes	= 4,
 	.needs_scratch_page	= TRUE,
 	.configure		= intel_i830_configure,
 	.fetch_size		= intel_i830_fetch_size,
@@ -1469,7 +1554,7 @@
 	.owner			= THIS_MODULE,
 	.aperture_sizes		= intel_i830_sizes,
 	.size_type		= FIXED_APER_SIZE,
-	.num_aperture_sizes	= 3,
+	.num_aperture_sizes	= 4,
 	.needs_scratch_page	= TRUE,
 	.configure		= intel_i915_configure,
 	.fetch_size		= intel_i915_fetch_size,
@@ -1489,6 +1574,29 @@
 	.agp_destroy_page	= agp_generic_destroy_page,
 };
 
+static struct agp_bridge_driver intel_i965_driver = {
+       .owner                  = THIS_MODULE,
+       .aperture_sizes         = intel_i830_sizes,
+       .size_type              = FIXED_APER_SIZE,
+       .num_aperture_sizes     = 4,
+       .needs_scratch_page     = TRUE,
+       .configure              = intel_i915_configure,
+       .fetch_size             = intel_i965_fetch_size,
+       .cleanup                = intel_i915_cleanup,
+       .tlb_flush              = intel_i810_tlbflush,
+       .mask_memory            = intel_i810_mask_memory,
+       .masks                  = intel_i810_masks,
+       .agp_enable             = intel_i810_agp_enable,
+       .cache_flush            = global_cache_flush,
+       .create_gatt_table      = intel_i965_create_gatt_table,
+       .free_gatt_table        = intel_i830_free_gatt_table,
+       .insert_memory          = intel_i915_insert_entries,
+       .remove_memory          = intel_i915_remove_entries,
+       .alloc_by_type          = intel_i830_alloc_by_type,
+       .free_by_type           = intel_i810_free_by_type,
+       .agp_alloc_page         = agp_generic_alloc_page,
+       .agp_destroy_page       = agp_generic_destroy_page,
+};
 
 static struct agp_bridge_driver intel_7505_driver = {
 	.owner			= THIS_MODULE,
@@ -1684,6 +1792,35 @@
 			bridge->driver = &intel_845_driver;
 		name = "945GM";
 		break;
+	case PCI_DEVICE_ID_INTEL_82946GZ_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82946GZ_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "946GZ";
+		break;
+	case PCI_DEVICE_ID_INTEL_82965G_1_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82965G_1_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "965G";
+		break;
+	case PCI_DEVICE_ID_INTEL_82965Q_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82965Q_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "965Q";
+		break;
+	case PCI_DEVICE_ID_INTEL_82965G_HB:
+		if (find_i830(PCI_DEVICE_ID_INTEL_82965G_IG))
+			bridge->driver = &intel_i965_driver;
+		else
+			bridge->driver = &intel_845_driver;
+		name = "965G";
+		break;
+
 	case PCI_DEVICE_ID_INTEL_7505_0:
 		bridge->driver = &intel_7505_driver;
 		name = "E7505";
@@ -1766,6 +1903,7 @@
 	agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_intel_resume(struct pci_dev *pdev)
 {
 	struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
@@ -1786,9 +1924,12 @@
 		intel_i830_configure();
 	else if (bridge->driver == &intel_810_driver)
 		intel_i810_configure();
+	else if (bridge->driver == &intel_i965_driver)
+		intel_i915_configure();
 
 	return 0;
 }
+#endif
 
 static struct pci_device_id agp_intel_pci_table[] = {
 #define ID(x)						\
@@ -1825,6 +1966,10 @@
 	ID(PCI_DEVICE_ID_INTEL_82915GM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82945G_HB),
 	ID(PCI_DEVICE_ID_INTEL_82945GM_HB),
+	ID(PCI_DEVICE_ID_INTEL_82946GZ_HB),
+	ID(PCI_DEVICE_ID_INTEL_82965G_1_HB),
+	ID(PCI_DEVICE_ID_INTEL_82965Q_HB),
+	ID(PCI_DEVICE_ID_INTEL_82965G_HB),
 	{ }
 };
 
@@ -1835,7 +1980,9 @@
 	.id_table	= agp_intel_pci_table,
 	.probe		= agp_intel_probe,
 	.remove		= __devexit_p(agp_intel_remove),
+#ifdef CONFIG_PM
 	.resume		= agp_intel_resume,
+#endif
 };
 
 static int __init agp_intel_init(void)
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 1de1b12..91b71e7 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -601,8 +601,8 @@
 		uninorth_node = of_find_node_by_name(NULL, "u3");
 	}
 	if (uninorth_node) {
-		int *revprop = (int *)
-			get_property(uninorth_node, "device-rev", NULL);
+		const int *revprop = get_property(uninorth_node,
+				"device-rev", NULL);
 		if (revprop != NULL)
 			uninorth_rev = *revprop & 0x3f;
 		of_node_put(uninorth_node);
diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index b8ec25d..c149ac9 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -9,7 +9,7 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
-static struct pci_device_id agp_via_pci_table[];
+static const struct pci_device_id agp_via_pci_table[];
 
 #define VIA_GARTCTRL	0x80
 #define VIA_APSIZE	0x84
@@ -485,7 +485,7 @@
 #endif /* CONFIG_PM */
 
 /* must be the same order as name table above */
-static struct pci_device_id agp_via_pci_table[] = {
+static const struct pci_device_id agp_via_pci_table[] = {
 #define ID(x) \
 	{						\
 	.class		= (PCI_CLASS_BRIDGE_HOST << 8),	\
diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c
new file mode 100644
index 0000000..b8c2225
--- /dev/null
+++ b/drivers/char/briq_panel.c
@@ -0,0 +1,271 @@
+/*
+ * Drivers for the Total Impact PPC based computer "BRIQ"
+ * by Dr. Karsten Jeppesen
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/timer.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+
+#define		BRIQ_PANEL_MINOR	156
+#define		BRIQ_PANEL_VFD_IOPORT	0x0390
+#define		BRIQ_PANEL_LED_IOPORT	0x0398
+#define		BRIQ_PANEL_VER		"1.1 (04/20/2002)"
+#define		BRIQ_PANEL_MSG0		"Loading Linux"
+
+static int		vfd_is_open;
+static unsigned char	vfd[40];
+static int		vfd_cursor;
+static unsigned char	ledpb, led;
+
+static void update_vfd(void)
+{
+	int	i;
+
+	/* cursor home */
+	outb(0x02, BRIQ_PANEL_VFD_IOPORT);
+	for (i=0; i<20; i++)
+		outb(vfd[i], BRIQ_PANEL_VFD_IOPORT + 1);
+
+	/* cursor to next line */
+	outb(0xc0, BRIQ_PANEL_VFD_IOPORT);
+	for (i=20; i<40; i++)
+		outb(vfd[i], BRIQ_PANEL_VFD_IOPORT + 1);
+
+}
+
+static void set_led(char state)
+{
+	if (state == 'R')
+		led = 0x01;
+	else if (state == 'G')
+		led = 0x02;
+	else if (state == 'Y')
+		led = 0x03;
+	else if (state == 'X')
+		led = 0x00;
+	outb(led, BRIQ_PANEL_LED_IOPORT);
+}
+
+static int briq_panel_open(struct inode *ino, struct file *filep)
+{
+	/* enforce single access */
+	if (vfd_is_open)
+		return -EBUSY;
+	vfd_is_open = 1;
+
+	return 0;
+}
+
+static int briq_panel_release(struct inode *ino, struct file *filep)
+{
+	if (!vfd_is_open)
+		return -ENODEV;
+
+	vfd_is_open = 0;
+
+	return 0;
+}
+
+static ssize_t briq_panel_read(struct file *file, char __user *buf, size_t count,
+			 loff_t *ppos)
+{
+	unsigned short c;
+	unsigned char cp;
+
+#if 0	/*  Can't seek (pread) on this device  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+#endif
+
+	if (!vfd_is_open)
+		return -ENODEV;
+
+	c = (inb(BRIQ_PANEL_LED_IOPORT) & 0x000c) | (ledpb & 0x0003);
+	set_led(' ');
+	/* upper button released */
+	if ((!(ledpb & 0x0004)) && (c & 0x0004)) {
+		cp = ' ';
+		ledpb = c;
+		if (copy_to_user(buf, &cp, 1))
+			return -EFAULT;
+		return 1;
+	}
+	/* lower button released */
+	else if ((!(ledpb & 0x0008)) && (c & 0x0008)) {
+		cp = '\r';
+		ledpb = c;
+		if (copy_to_user(buf, &cp, 1))
+			return -EFAULT;
+		return 1;
+	} else {
+		ledpb = c;
+		return 0;
+	}
+}
+
+static void scroll_vfd( void )
+{
+	int	i;
+
+	for (i=0; i<20; i++) {
+		vfd[i] = vfd[i+20];
+		vfd[i+20] = ' ';
+	}
+	vfd_cursor = 20;
+}
+
+static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_t len,
+			  loff_t *ppos)
+{
+	size_t indx = len;
+	int i, esc = 0;
+
+#if 0	/*  Can't seek (pwrite) on this device  */
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+#endif
+
+	if (!vfd_is_open)
+		return -EBUSY;
+
+	for (;;) {
+		char c;
+		if (!indx)
+			break;
+		if (get_user(c, buf))
+			return -EFAULT;
+		if (esc) {
+			set_led(c);
+			esc = 0;
+		} else if (c == 27) {
+			esc = 1;
+		} else if (c == 12) {
+			/* do a form feed */
+			for (i=0; i<40; i++)
+				vfd[i] = ' ';
+			vfd_cursor = 0;
+		} else if (c == 10) {
+			if (vfd_cursor < 20)
+				vfd_cursor = 20;
+			else if (vfd_cursor < 40)
+				vfd_cursor = 40;
+			else if (vfd_cursor < 60)
+				vfd_cursor = 60;
+			if (vfd_cursor > 59)
+				scroll_vfd();
+		} else {
+			/* just a character */
+			if (vfd_cursor > 39)
+				scroll_vfd();
+			vfd[vfd_cursor++] = c;
+		}
+		indx--;
+		buf++;
+	}
+	update_vfd();
+
+	return len;
+}
+
+static struct file_operations briq_panel_fops = {
+	.owner		= THIS_MODULE,
+	.read		= briq_panel_read,
+	.write		= briq_panel_write,
+	.open		= briq_panel_open,
+	.release	= briq_panel_release,
+};
+
+static struct miscdevice briq_panel_miscdev = {
+	BRIQ_PANEL_MINOR,
+	"briq_panel",
+	&briq_panel_fops
+};
+
+static int __init briq_panel_init(void)
+{
+	struct device_node *root = find_path_device("/");
+	const char *machine;
+	int i;
+
+	machine = get_property(root, "model", NULL);
+	if (!machine || strncmp(machine, "TotalImpact,BRIQ-1", 18) != 0)
+		return -ENODEV;
+
+	printk(KERN_INFO
+		"briq_panel: v%s Dr. Karsten Jeppesen (kj@totalimpact.com)\n",
+		BRIQ_PANEL_VER);
+
+	if (!request_region(BRIQ_PANEL_VFD_IOPORT, 4, "BRIQ Front Panel"))
+		return -EBUSY;
+
+	if (!request_region(BRIQ_PANEL_LED_IOPORT, 2, "BRIQ Front Panel")) {
+		release_region(BRIQ_PANEL_VFD_IOPORT, 4);
+		return -EBUSY;
+	}
+	ledpb = inb(BRIQ_PANEL_LED_IOPORT) & 0x000c;
+
+	if (misc_register(&briq_panel_miscdev) < 0) {
+		release_region(BRIQ_PANEL_VFD_IOPORT, 4);
+		release_region(BRIQ_PANEL_LED_IOPORT, 2);
+		return -EBUSY;
+	}
+
+	outb(0x38, BRIQ_PANEL_VFD_IOPORT);	/* Function set */
+	outb(0x01, BRIQ_PANEL_VFD_IOPORT);	/* Clear display */
+	outb(0x0c, BRIQ_PANEL_VFD_IOPORT);	/* Display on */
+	outb(0x06, BRIQ_PANEL_VFD_IOPORT);	/* Entry normal */
+	for (i=0; i<40; i++)
+		vfd[i]=' ';
+#ifndef MODULE
+	vfd[0] = 'L';
+	vfd[1] = 'o';
+	vfd[2] = 'a';
+	vfd[3] = 'd';
+	vfd[4] = 'i';
+	vfd[5] = 'n';
+	vfd[6] = 'g';
+	vfd[7] = ' ';
+	vfd[8] = '.';
+	vfd[9] = '.';
+	vfd[10] = '.';
+#endif /* !MODULE */
+
+	update_vfd();
+
+	return 0;
+}
+
+static void __exit briq_panel_exit(void)
+{
+	misc_deregister(&briq_panel_miscdev);
+	release_region(BRIQ_PANEL_VFD_IOPORT, 4);
+	release_region(BRIQ_PANEL_LED_IOPORT, 2);
+}
+
+module_init(briq_panel_init);
+module_exit(briq_panel_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Karsten Jeppesen <karsten@jeppesens.com>");
+MODULE_DESCRIPTION("Driver for the Total Impact briQ front panel");
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 613d67f..a76d2c4 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -80,7 +80,8 @@
 	struct tty_struct *tty;
 	unsigned int count;
 	int do_wakeup;
-	char outbuf[N_OUTBUF] __ALIGNED__;
+	char *outbuf;
+	int outbuf_size;
 	int n_outbuf;
 	uint32_t vtermno;
 	struct hv_ops *ops;
@@ -319,10 +320,8 @@
 	struct kobject *kobjp;
 
 	/* Auto increments kobject reference if found. */
-	if (!(hp = hvc_get_by_index(tty->index))) {
-		printk(KERN_WARNING "hvc_console: tty open failed, no vty associated with tty.\n");
+	if (!(hp = hvc_get_by_index(tty->index)))
 		return -ENODEV;
-	}
 
 	spin_lock_irqsave(&hp->lock, flags);
 	/* Check and then increment for fast path open. */
@@ -505,7 +504,7 @@
 	if (hp->n_outbuf > 0)
 		hvc_push(hp);
 
-	while (count > 0 && (rsize = N_OUTBUF - hp->n_outbuf) > 0) {
+	while (count > 0 && (rsize = hp->outbuf_size - hp->n_outbuf) > 0) {
 		if (rsize > count)
 			rsize = count;
 		memcpy(hp->outbuf + hp->n_outbuf, buf, rsize);
@@ -538,7 +537,7 @@
 	if (!hp)
 		return -1;
 
-	return N_OUTBUF - hp->n_outbuf;
+	return hp->outbuf_size - hp->n_outbuf;
 }
 
 static int hvc_chars_in_buffer(struct tty_struct *tty)
@@ -729,12 +728,13 @@
 };
 
 struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
-					struct hv_ops *ops)
+					struct hv_ops *ops, int outbuf_size)
 {
 	struct hvc_struct *hp;
 	int i;
 
-	hp = kmalloc(sizeof(*hp), GFP_KERNEL);
+	hp = kmalloc(ALIGN(sizeof(*hp), sizeof(long)) + outbuf_size,
+			GFP_KERNEL);
 	if (!hp)
 		return ERR_PTR(-ENOMEM);
 
@@ -743,6 +743,8 @@
 	hp->vtermno = vtermno;
 	hp->irq = irq;
 	hp->ops = ops;
+	hp->outbuf_size = outbuf_size;
+	hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
 
 	kobject_init(&hp->kobj);
 	hp->kobj.ktype = &hvc_kobj_type;
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 96b7401..8c59818 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -56,7 +56,7 @@
 
 /* register a vterm for hvc tty operation (module_init or hotplug add) */
 extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
-						 struct hv_ops *ops);
+				struct hv_ops *ops, int outbuf_size);
 /* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
 extern int __devexit hvc_remove(struct hvc_struct *hp);
 
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
new file mode 100644
index 0000000..4747729
--- /dev/null
+++ b/drivers/char/hvc_iseries.c
@@ -0,0 +1,594 @@
+/*
+ * iSeries vio driver interface to hvc_console.c
+ *
+ * This code is based heavily on hvc_vio.c and viocons.c
+ *
+ * Copyright (C) 2006 Stephen Rothwell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/console.h>
+
+#include <asm/hvconsole.h>
+#include <asm/vio.h>
+#include <asm/prom.h>
+#include <asm/iseries/vio.h>
+#include <asm/iseries/hv_call.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/iseries/hv_lp_event.h>
+
+#include "hvc_console.h"
+
+#define VTTY_PORTS 10
+
+static DEFINE_SPINLOCK(consolelock);
+static DEFINE_SPINLOCK(consoleloglock);
+
+static const char hvc_driver_name[] = "hvc_console";
+
+#define IN_BUF_SIZE	200
+
+/*
+ * Our port information.
+ */
+static struct port_info {
+	HvLpIndex lp;
+	u64 seq;	/* sequence number of last HV send */
+	u64 ack;	/* last ack from HV */
+	struct hvc_struct *hp;
+	int in_start;
+	int in_end;
+	unsigned char in_buf[IN_BUF_SIZE];
+} port_info[VTTY_PORTS] = {
+	[ 0 ... VTTY_PORTS - 1 ] = {
+		.lp = HvLpIndexInvalid
+	}
+};
+
+#define viochar_is_console(pi)	((pi) == &port_info[0])
+
+static struct vio_device_id hvc_driver_table[] __devinitdata = {
+	{"serial", "IBM,iSeries-vty"},
+	{ "", "" }
+};
+MODULE_DEVICE_TABLE(vio, hvc_driver_table);
+
+static void hvlog(char *fmt, ...)
+{
+	int i;
+	unsigned long flags;
+	va_list args;
+	static char buf[256];
+
+	spin_lock_irqsave(&consoleloglock, flags);
+	va_start(args, fmt);
+	i = vscnprintf(buf, sizeof(buf) - 1, fmt, args);
+	va_end(args);
+	buf[i++] = '\r';
+	HvCall_writeLogBuffer(buf, i);
+	spin_unlock_irqrestore(&consoleloglock, flags);
+}
+
+/*
+ * Initialize the common fields in a charLpEvent
+ */
+static void init_data_event(struct viocharlpevent *viochar, HvLpIndex lp)
+{
+	struct HvLpEvent *hev = &viochar->event;
+
+	memset(viochar, 0, sizeof(struct viocharlpevent));
+
+	hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DEFERRED_ACK |
+		HV_LP_EVENT_INT;
+	hev->xType = HvLpEvent_Type_VirtualIo;
+	hev->xSubtype = viomajorsubtype_chario | viochardata;
+	hev->xSourceLp = HvLpConfig_getLpIndex();
+	hev->xTargetLp = lp;
+	hev->xSizeMinus1 = sizeof(struct viocharlpevent);
+	hev->xSourceInstanceId = viopath_sourceinst(lp);
+	hev->xTargetInstanceId = viopath_targetinst(lp);
+}
+
+static int get_chars(uint32_t vtermno, char *buf, int count)
+{
+	struct port_info *pi;
+	int n = 0;
+	unsigned long flags;
+
+	if (vtermno >= VTTY_PORTS)
+		return -EINVAL;
+	if (count == 0)
+		return 0;
+
+	pi = &port_info[vtermno];
+	spin_lock_irqsave(&consolelock, flags);
+
+	if (pi->in_end == 0)
+		goto done;
+
+	n = pi->in_end - pi->in_start;
+	if (n > count)
+		n = count;
+	memcpy(buf, &pi->in_buf[pi->in_start], n);
+	pi->in_start += n;
+	if (pi->in_start == pi->in_end) {
+		pi->in_start = 0;
+		pi->in_end = 0;
+	}
+done:
+	spin_unlock_irqrestore(&consolelock, flags);
+	return n;
+}
+
+static int put_chars(uint32_t vtermno, const char *buf, int count)
+{
+	struct viocharlpevent *viochar;
+	struct port_info *pi;
+	HvLpEvent_Rc hvrc;
+	unsigned long flags;
+	int sent = 0;
+
+	if (vtermno >= VTTY_PORTS)
+		return -EINVAL;
+
+	pi = &port_info[vtermno];
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	if (viochar_is_console(pi) && !viopath_isactive(pi->lp)) {
+		spin_lock_irqsave(&consoleloglock, flags);
+		HvCall_writeLogBuffer(buf, count);
+		spin_unlock_irqrestore(&consoleloglock, flags);
+		sent = count;
+		goto done;
+	}
+
+	viochar = vio_get_event_buffer(viomajorsubtype_chario);
+	if (viochar == NULL) {
+		hvlog("\n\rviocons: Can't get viochar buffer.");
+		goto done;
+	}
+
+	while ((count > 0) && ((pi->seq - pi->ack) < VIOCHAR_WINDOW)) {
+		int len;
+
+		len = (count > VIOCHAR_MAX_DATA) ? VIOCHAR_MAX_DATA : count;
+
+		if (viochar_is_console(pi)) {
+			spin_lock_irqsave(&consoleloglock, flags);
+			HvCall_writeLogBuffer(buf, len);
+			spin_unlock_irqrestore(&consoleloglock, flags);
+		}
+
+		init_data_event(viochar, pi->lp);
+
+		viochar->len = len;
+		viochar->event.xCorrelationToken = pi->seq++;
+		viochar->event.xSizeMinus1 =
+			offsetof(struct viocharlpevent, data) + len;
+
+		memcpy(viochar->data, buf, len);
+
+		hvrc = HvCallEvent_signalLpEvent(&viochar->event);
+		if (hvrc)
+			hvlog("\n\rerror sending event! return code %d\n\r",
+				(int)hvrc);
+		sent += len;
+		count -= len;
+		buf += len;
+	}
+
+	vio_free_event_buffer(viomajorsubtype_chario, viochar);
+done:
+	spin_unlock_irqrestore(&consolelock, flags);
+	return sent;
+}
+
+static struct hv_ops hvc_get_put_ops = {
+	.get_chars = get_chars,
+	.put_chars = put_chars,
+};
+
+static int __devinit hvc_vio_probe(struct vio_dev *vdev,
+			const struct vio_device_id *id)
+{
+	struct hvc_struct *hp;
+	struct port_info *pi;
+
+	/* probed with invalid parameters. */
+	if (!vdev || !id)
+		return -EPERM;
+
+	if (vdev->unit_address >= VTTY_PORTS)
+		return -ENODEV;
+
+	pi = &port_info[vdev->unit_address];
+
+	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops,
+			VIOCHAR_MAX_DATA);
+	if (IS_ERR(hp))
+		return PTR_ERR(hp);
+	pi->hp = hp;
+	dev_set_drvdata(&vdev->dev, pi);
+
+	return 0;
+}
+
+static int __devexit hvc_vio_remove(struct vio_dev *vdev)
+{
+	struct port_info *pi = dev_get_drvdata(&vdev->dev);
+	struct hvc_struct *hp = pi->hp;
+
+	return hvc_remove(hp);
+}
+
+static struct vio_driver hvc_vio_driver = {
+	.id_table	= hvc_driver_table,
+	.probe		= hvc_vio_probe,
+	.remove		= hvc_vio_remove,
+	.driver		= {
+		.name	= hvc_driver_name,
+		.owner	= THIS_MODULE,
+	}
+};
+
+static void hvc_open_event(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	u8 port = cevent->virtual_device;
+	struct port_info *pi;
+	int reject = 0;
+
+	if (hvlpevent_is_ack(event)) {
+		if (port >= VTTY_PORTS)
+			return;
+
+		spin_lock_irqsave(&consolelock, flags);
+
+		pi = &port_info[port];
+		if (event->xRc == HvLpEvent_Rc_Good) {
+			pi->seq = pi->ack = 0;
+			/*
+			 * This line allows connections from the primary
+			 * partition but once one is connected from the
+			 * primary partition nothing short of a reboot
+			 * of linux will allow access from the hosting
+			 * partition again without a required iSeries fix.
+			 */
+			pi->lp = event->xTargetLp;
+		}
+
+		spin_unlock_irqrestore(&consolelock, flags);
+		if (event->xRc != HvLpEvent_Rc_Good)
+			printk(KERN_WARNING
+			       "hvc: handle_open_event: event->xRc == (%d).\n",
+			       event->xRc);
+
+		if (event->xCorrelationToken != 0) {
+			atomic_t *aptr= (atomic_t *)event->xCorrelationToken;
+			atomic_set(aptr, 1);
+		} else
+			printk(KERN_WARNING
+			       "hvc: weird...got open ack without atomic\n");
+		return;
+	}
+
+	/* This had better require an ack, otherwise complain */
+	if (!hvlpevent_need_ack(event)) {
+		printk(KERN_WARNING "hvc: viocharopen without ack bit!\n");
+		return;
+	}
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	/* Make sure this is a good virtual tty */
+	if (port >= VTTY_PORTS) {
+		event->xRc = HvLpEvent_Rc_SubtypeError;
+		cevent->subtype_result_code = viorc_openRejected;
+		/*
+		 * Flag state here since we can't printk while holding
+		 * the consolelock spinlock.
+		 */
+		reject = 1;
+	} else {
+		pi = &port_info[port];
+		if ((pi->lp != HvLpIndexInvalid) &&
+				(pi->lp != event->xSourceLp)) {
+			/*
+			 * If this is tty is already connected to a different
+			 * partition, fail.
+			 */
+			event->xRc = HvLpEvent_Rc_SubtypeError;
+			cevent->subtype_result_code = viorc_openRejected;
+			reject = 2;
+		} else {
+			pi->lp = event->xSourceLp;
+			event->xRc = HvLpEvent_Rc_Good;
+			cevent->subtype_result_code = viorc_good;
+			pi->seq = pi->ack = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&consolelock, flags);
+
+	if (reject == 1)
+		printk(KERN_WARNING "hvc: open rejected: bad virtual tty.\n");
+	else if (reject == 2)
+		printk(KERN_WARNING "hvc: open rejected: console in exclusive "
+				"use by another partition.\n");
+
+	/* Return the acknowledgement */
+	HvCallEvent_ackLpEvent(event);
+}
+
+/*
+ * Handle a close charLpEvent.  This should ONLY be an Interrupt because the
+ * virtual console should never actually issue a close event to the hypervisor
+ * because the virtual console never goes away.  A close event coming from the
+ * hypervisor simply means that there are no client consoles connected to the
+ * virtual console.
+ */
+static void hvc_close_event(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	u8 port = cevent->virtual_device;
+
+	if (!hvlpevent_is_int(event)) {
+		printk(KERN_WARNING
+			"hvc: got unexpected close acknowlegement\n");
+		return;
+	}
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING
+			"hvc: close message from invalid virtual device.\n");
+		return;
+	}
+
+	/* For closes, just mark the console partition invalid */
+	spin_lock_irqsave(&consolelock, flags);
+
+	if (port_info[port].lp == event->xSourceLp)
+		port_info[port].lp = HvLpIndexInvalid;
+
+	spin_unlock_irqrestore(&consolelock, flags);
+}
+
+static void hvc_data_event(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	struct port_info *pi;
+	int n;
+	u8 port = cevent->virtual_device;
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING "hvc: data on invalid virtual device %d\n",
+				port);
+		return;
+	}
+	if (cevent->len == 0)
+		return;
+
+	/*
+	 * Change 05/01/2003 - Ryan Arnold: If a partition other than
+	 * the current exclusive partition tries to send us data
+	 * events then just drop them on the floor because we don't
+	 * want his stinking data.  He isn't authorized to receive
+	 * data because he wasn't the first one to get the console,
+	 * therefore he shouldn't be allowed to send data either.
+	 * This will work without an iSeries fix.
+	 */
+	pi = &port_info[port];
+	if (pi->lp != event->xSourceLp)
+		return;
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	n = IN_BUF_SIZE - pi->in_end;
+	if (n > cevent->len)
+		n = cevent->len;
+	if (n > 0) {
+		memcpy(&pi->in_buf[pi->in_end], cevent->data, n);
+		pi->in_end += n;
+	}
+	spin_unlock_irqrestore(&consolelock, flags);
+	if (n == 0)
+		printk(KERN_WARNING "hvc: input buffer overflow\n");
+}
+
+static void hvc_ack_event(struct HvLpEvent *event)
+{
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+	unsigned long flags;
+	u8 port = cevent->virtual_device;
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING "hvc: data on invalid virtual device\n");
+		return;
+	}
+
+	spin_lock_irqsave(&consolelock, flags);
+	port_info[port].ack = event->xCorrelationToken;
+	spin_unlock_irqrestore(&consolelock, flags);
+}
+
+static void hvc_config_event(struct HvLpEvent *event)
+{
+	struct viocharlpevent *cevent = (struct viocharlpevent *)event;
+
+	if (cevent->data[0] == 0x01)
+		printk(KERN_INFO "hvc: window resized to %d: %d: %d: %d\n",
+		       cevent->data[1], cevent->data[2],
+		       cevent->data[3], cevent->data[4]);
+	else
+		printk(KERN_WARNING "hvc: unknown config event\n");
+}
+
+static void hvc_handle_event(struct HvLpEvent *event)
+{
+	int charminor;
+
+	if (event == NULL)
+		return;
+
+	charminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK;
+	switch (charminor) {
+	case viocharopen:
+		hvc_open_event(event);
+		break;
+	case viocharclose:
+		hvc_close_event(event);
+		break;
+	case viochardata:
+		hvc_data_event(event);
+		break;
+	case viocharack:
+		hvc_ack_event(event);
+		break;
+	case viocharconfig:
+		hvc_config_event(event);
+		break;
+	default:
+		if (hvlpevent_is_int(event) && hvlpevent_need_ack(event)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+}
+
+static int send_open(HvLpIndex remoteLp, void *sem)
+{
+	return HvCallEvent_signalLpEventFast(remoteLp,
+			HvLpEvent_Type_VirtualIo,
+			viomajorsubtype_chario | viocharopen,
+			HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
+			viopath_sourceinst(remoteLp),
+			viopath_targetinst(remoteLp),
+			(u64)(unsigned long)sem, VIOVERSION << 16,
+			0, 0, 0, 0);
+}
+
+static int hvc_vio_init(void)
+{
+	atomic_t wait_flag;
+	int rc;
+
+	/* +2 for fudge */
+	rc = viopath_open(HvLpConfig_getPrimaryLpIndex(),
+			viomajorsubtype_chario, VIOCHAR_WINDOW + 2);
+	if (rc)
+		printk(KERN_WARNING "hvc: error opening to primary %d\n", rc);
+
+	if (viopath_hostLp == HvLpIndexInvalid)
+		vio_set_hostlp();
+
+	/*
+	 * And if the primary is not the same as the hosting LP, open to the
+	 * hosting lp
+	 */
+	if ((viopath_hostLp != HvLpIndexInvalid) &&
+	    (viopath_hostLp != HvLpConfig_getPrimaryLpIndex())) {
+		printk(KERN_INFO "hvc: open path to hosting (%d)\n",
+				viopath_hostLp);
+		rc = viopath_open(viopath_hostLp, viomajorsubtype_chario,
+				VIOCHAR_WINDOW + 2);	/* +2 for fudge */
+		if (rc)
+			printk(KERN_WARNING
+				"error opening to partition %d: %d\n",
+				viopath_hostLp, rc);
+	}
+
+	if (vio_setHandler(viomajorsubtype_chario, hvc_handle_event) < 0)
+		printk(KERN_WARNING
+			"hvc: error seting handler for console events!\n");
+
+	/*
+	 * First, try to open the console to the hosting lp.
+	 * Wait on a semaphore for the response.
+	 */
+	atomic_set(&wait_flag, 0);
+	if ((viopath_isactive(viopath_hostLp)) &&
+	    (send_open(viopath_hostLp, &wait_flag) == 0)) {
+		printk(KERN_INFO "hvc: hosting partition %d\n", viopath_hostLp);
+		while (atomic_read(&wait_flag) == 0)
+			mb();
+		atomic_set(&wait_flag, 0);
+	}
+
+	/*
+	 * If we don't have an active console, try the primary
+	 */
+	if ((!viopath_isactive(port_info[0].lp)) &&
+	    (viopath_isactive(HvLpConfig_getPrimaryLpIndex())) &&
+	    (send_open(HvLpConfig_getPrimaryLpIndex(), &wait_flag) == 0)) {
+		printk(KERN_INFO "hvc: opening console to primary partition\n");
+		while (atomic_read(&wait_flag) == 0)
+			mb();
+	}
+
+	/* Register as a vio device to receive callbacks */
+	rc = vio_register_driver(&hvc_vio_driver);
+
+	return rc;
+}
+module_init(hvc_vio_init); /* after drivers/char/hvc_console.c */
+
+static void hvc_vio_exit(void)
+{
+	vio_unregister_driver(&hvc_vio_driver);
+}
+module_exit(hvc_vio_exit);
+
+/* the device tree order defines our numbering */
+static int hvc_find_vtys(void)
+{
+	struct device_node *vty;
+	int num_found = 0;
+
+	for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
+			vty = of_find_node_by_name(vty, "vty")) {
+		uint32_t *vtermno;
+
+		/* We have statically defined space for only a certain number
+		 * of console adapters.
+		 */
+		if ((num_found >= MAX_NR_HVC_CONSOLES) ||
+				(num_found >= VTTY_PORTS))
+			break;
+
+		vtermno = (uint32_t *)get_property(vty, "reg", NULL);
+		if (!vtermno)
+			continue;
+
+		if (!device_is_compatible(vty, "IBM,iSeries-vty"))
+			continue;
+
+		if (num_found == 0)
+			add_preferred_console("hvc", 0, NULL);
+		hvc_instantiate(*vtermno, num_found, &hvc_get_put_ops);
+		++num_found;
+	}
+
+	return num_found;
+}
+console_initcall(hvc_find_vtys);
diff --git a/drivers/char/hvc_rtas.c b/drivers/char/hvc_rtas.c
index 57106e02..4b97eaf 100644
--- a/drivers/char/hvc_rtas.c
+++ b/drivers/char/hvc_rtas.c
@@ -94,7 +94,7 @@
 
 	/* Allocate an hvc_struct for the console device we instantiated
 	 * earlier.  Save off hp so that we can return it on exit */
-	hp = hvc_alloc(hvc_rtas_cookie, NO_IRQ, &hvc_rtas_get_put_ops);
+	hp = hvc_alloc(hvc_rtas_cookie, NO_IRQ, &hvc_rtas_get_put_ops, 16);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 9add81ce..cc95941 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -90,7 +90,8 @@
 	if (!vdev || !id)
 		return -EPERM;
 
-	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops);
+	hp = hvc_alloc(vdev->unit_address, vdev->irq, &hvc_get_put_ops,
+			MAX_VIO_PUT_CHARS);
 	if (IS_ERR(hp))
 		return PTR_ERR(hp);
 	dev_set_drvdata(&vdev->dev, hp);
@@ -140,7 +141,7 @@
 
 	for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL;
 			vty = of_find_node_by_name(vty, "vty")) {
-		uint32_t *vtermno;
+		const uint32_t *vtermno;
 
 		/* We have statically defined space for only a certain number
 		 * of console adapters.
@@ -148,7 +149,7 @@
 		if (num_found >= MAX_NR_HVC_CONSOLES)
 			break;
 
-		vtermno = (uint32_t *)get_property(vty, "reg", NULL);
+		vtermno = get_property(vty, "reg", NULL);
 		if (!vtermno)
 			continue;
 
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 017f755..a89a95f 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -1274,11 +1274,10 @@
 			vty != NULL;
 			vty = of_find_compatible_node(vty, "serial", "hvterm-protocol")) {
 		struct hvsi_struct *hp;
-		uint32_t *vtermno;
-		uint32_t *irq;
+		const uint32_t *vtermno, *irq;
 
-		vtermno = (uint32_t *)get_property(vty, "reg", NULL);
-		irq = (uint32_t *)get_property(vty, "interrupts", NULL);
+		vtermno = get_property(vty, "reg", NULL);
+		irq = get_property(vty, "interrupts", NULL);
 		if (!vtermno || !irq)
 			continue;
 
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 84dfc42..8c09997 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -3488,7 +3488,7 @@
  */
 	EBRDENABLE(brdp);
 	sigsp = (cdkecpsig_t __iomem *) EBRDGETMEMPTR(brdp, CDK_SIGADDR);
-	memcpy(&sig, sigsp, sizeof(cdkecpsig_t));
+	memcpy_fromio(&sig, sigsp, sizeof(cdkecpsig_t));
 	EBRDDISABLE(brdp);
 
 	if (sig.magic != cpu_to_le32(ECP_MAGIC))
diff --git a/drivers/char/tpm/tpm_atmel.h b/drivers/char/tpm/tpm_atmel.h
index 2e68eeb..aefd683 100644
--- a/drivers/char/tpm/tpm_atmel.h
+++ b/drivers/char/tpm/tpm_atmel.h
@@ -37,7 +37,7 @@
 {
 	struct device_node *dn;
 	unsigned long address, size;
-	unsigned int *reg;
+	const unsigned int *reg;
 	int reglen;
 	int naddrc;
 	int nsizec;
@@ -52,7 +52,7 @@
 		return NULL;
 	}
 
-	reg = (unsigned int *) get_property(dn, "reg", &reglen);
+	reg = get_property(dn, "reg", &reglen);
 	naddrc = prom_n_addr_cells(dn);
 	nsizec = prom_n_size_cells(dn);
 
diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c
index 766f786..f3efeaf 100644
--- a/drivers/char/viocons.c
+++ b/drivers/char/viocons.c
@@ -43,7 +43,6 @@
 #include <linux/sysrq.h>
 
 #include <asm/iseries/vio.h>
-
 #include <asm/iseries/hv_lp_event.h>
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/hv_lp_config.h>
@@ -67,35 +66,6 @@
 extern int sysrq_enabled;
 #endif
 
-/*
- * The structure of the events that flow between us and OS/400.  You can't
- * mess with this unless the OS/400 side changes too
- */
-struct viocharlpevent {
-	struct HvLpEvent event;
-	u32 reserved;
-	u16 version;
-	u16 subtype_result_code;
-	u8 virtual_device;
-	u8 len;
-	u8 data[VIOCHAR_MAX_DATA];
-};
-
-#define VIOCHAR_WINDOW		10
-#define VIOCHAR_HIGHWATERMARK	3
-
-enum viocharsubtype {
-	viocharopen = 0x0001,
-	viocharclose = 0x0002,
-	viochardata = 0x0003,
-	viocharack = 0x0004,
-	viocharconfig = 0x0005
-};
-
-enum viochar_rc {
-	viochar_rc_ebusy = 1
-};
-
 #define VIOCHAR_NUM_BUF		16
 
 /*
@@ -1183,6 +1153,7 @@
 		port_info[i].magic = VIOTTY_MAGIC;
 	}
 	HvCall_setLogBufferFormatAndCodepage(HvCall_LogBuffer_ASCII, 437);
+	add_preferred_console("viocons", 0, NULL);
 	register_console(&viocons_early);
 	return 0;
 }
diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c
index b72b204..73c78bf 100644
--- a/drivers/char/viotape.c
+++ b/drivers/char/viotape.c
@@ -940,7 +940,6 @@
 
 static int viotape_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 {
-	char tapename[32];
 	int i = vdev->unit_address;
 	int j;
 
@@ -956,10 +955,9 @@
 			"iseries!vt%d", i);
 	class_device_create(tape_class, NULL, MKDEV(VIOTAPE_MAJOR, i | 0x80),
 			NULL, "iseries!nvt%d", i);
-	sprintf(tapename, "iseries/vt%d", i);
-	printk(VIOTAPE_KERN_INFO "tape %s is iSeries "
+	printk(VIOTAPE_KERN_INFO "tape iseries/vt%d is iSeries "
 			"resource %10.10s type %4.4s, model %3.3s\n",
-			tapename, viotape_unitinfo[i].rsrcname,
+			i, viotape_unitinfo[i].rsrcname,
 			viotape_unitinfo[i].type, viotape_unitinfo[i].model);
 	return 0;
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b3df613..d35a9f0 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -32,7 +32,7 @@
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg)
 
 /**
- * The "cpufreq driver" - the arch- or hardware-dependend low
+ * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
  * also protects the cpufreq_cpu_data array.
  */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 52cf1f0..bf8aa45 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -55,6 +55,10 @@
 	struct cpufreq_policy *cur_policy;
  	struct work_struct work;
 	unsigned int enable;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int freq_lo;
+	unsigned int freq_lo_jiffies;
+	unsigned int freq_hi_jiffies;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
@@ -72,15 +76,15 @@
 
 static struct workqueue_struct	*kondemand_wq;
 
-struct dbs_tuners {
+static struct dbs_tuners {
 	unsigned int sampling_rate;
 	unsigned int up_threshold;
 	unsigned int ignore_nice;
-};
-
-static struct dbs_tuners dbs_tuners_ins = {
+	unsigned int powersave_bias;
+} dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.ignore_nice = 0,
+	.powersave_bias = 0,
 };
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
@@ -96,6 +100,70 @@
 	return retval;
 }
 
+/*
+ * Find right freq to be set now with powersave_bias on.
+ * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
+ * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ */
+static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+					  unsigned int freq_next,
+					  unsigned int relation)
+{
+	unsigned int freq_req, freq_reduc, freq_avg;
+	unsigned int freq_hi, freq_lo;
+	unsigned int index = 0;
+	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
+	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+
+	if (!dbs_info->freq_table) {
+		dbs_info->freq_lo = 0;
+		dbs_info->freq_lo_jiffies = 0;
+		return freq_next;
+	}
+
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
+			relation, &index);
+	freq_req = dbs_info->freq_table[index].frequency;
+	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+	freq_avg = freq_req - freq_reduc;
+
+	/* Find freq bounds for freq_avg in freq_table */
+	index = 0;
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+			CPUFREQ_RELATION_H, &index);
+	freq_lo = dbs_info->freq_table[index].frequency;
+	index = 0;
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+			CPUFREQ_RELATION_L, &index);
+	freq_hi = dbs_info->freq_table[index].frequency;
+
+	/* Find out how long we have to be in hi and lo freqs */
+	if (freq_hi == freq_lo) {
+		dbs_info->freq_lo = 0;
+		dbs_info->freq_lo_jiffies = 0;
+		return freq_lo;
+	}
+	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
+	jiffies_hi += ((freq_hi - freq_lo) / 2);
+	jiffies_hi /= (freq_hi - freq_lo);
+	jiffies_lo = jiffies_total - jiffies_hi;
+	dbs_info->freq_lo = freq_lo;
+	dbs_info->freq_lo_jiffies = jiffies_lo;
+	dbs_info->freq_hi_jiffies = jiffies_hi;
+	return freq_hi;
+}
+
+static void ondemand_powersave_bias_init(void)
+{
+	int i;
+	for_each_online_cpu(i) {
+		struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+		dbs_info->freq_table = cpufreq_frequency_get_table(i);
+		dbs_info->freq_lo = 0;
+	}
+}
+
 /************************** sysfs interface ************************/
 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
 {
@@ -124,6 +192,7 @@
 show_one(sampling_rate, sampling_rate);
 show_one(up_threshold, up_threshold);
 show_one(ignore_nice_load, ignore_nice);
+show_one(powersave_bias, powersave_bias);
 
 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
 		const char *buf, size_t count)
@@ -198,6 +267,27 @@
 	return count;
 }
 
+static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
+		const char *buf, size_t count)
+{
+	unsigned int input;
+	int ret;
+	ret = sscanf(buf, "%u", &input);
+
+	if (ret != 1)
+		return -EINVAL;
+
+	if (input > 1000)
+		input = 1000;
+
+	mutex_lock(&dbs_mutex);
+	dbs_tuners_ins.powersave_bias = input;
+	ondemand_powersave_bias_init();
+	mutex_unlock(&dbs_mutex);
+
+	return count;
+}
+
 #define define_one_rw(_name) \
 static struct freq_attr _name = \
 __ATTR(_name, 0644, show_##_name, store_##_name)
@@ -205,6 +295,7 @@
 define_one_rw(sampling_rate);
 define_one_rw(up_threshold);
 define_one_rw(ignore_nice_load);
+define_one_rw(powersave_bias);
 
 static struct attribute * dbs_attributes[] = {
 	&sampling_rate_max.attr,
@@ -212,6 +303,7 @@
 	&sampling_rate.attr,
 	&up_threshold.attr,
 	&ignore_nice_load.attr,
+	&powersave_bias.attr,
 	NULL
 };
 
@@ -234,6 +326,7 @@
 	if (!this_dbs_info->enable)
 		return;
 
+	this_dbs_info->freq_lo = 0;
 	policy = this_dbs_info->cur_policy;
 	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
 	total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
@@ -274,11 +367,18 @@
 	/* Check for frequency increase */
 	if (load > dbs_tuners_ins.up_threshold) {
 		/* if we are already at full speed then break out early */
-		if (policy->cur == policy->max)
-			return;
+		if (!dbs_tuners_ins.powersave_bias) {
+			if (policy->cur == policy->max)
+				return;
 
-		__cpufreq_driver_target(policy, policy->max,
-			CPUFREQ_RELATION_H);
+			__cpufreq_driver_target(policy, policy->max,
+				CPUFREQ_RELATION_H);
+		} else {
+			int freq = powersave_bias_target(policy, policy->max,
+					CPUFREQ_RELATION_H);
+			__cpufreq_driver_target(policy, freq,
+				CPUFREQ_RELATION_L);
+		}
 		return;
 	}
 
@@ -293,37 +393,64 @@
 	 * policy. To be safe, we focus 10 points under the threshold.
 	 */
 	if (load < (dbs_tuners_ins.up_threshold - 10)) {
-		unsigned int freq_next;
-		freq_next = (policy->cur * load) /
+		unsigned int freq_next = (policy->cur * load) /
 			(dbs_tuners_ins.up_threshold - 10);
-
-		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+		if (!dbs_tuners_ins.powersave_bias) {
+			__cpufreq_driver_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+		} else {
+			int freq = powersave_bias_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+			__cpufreq_driver_target(policy, freq,
+				CPUFREQ_RELATION_L);
+		}
 	}
 }
 
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
 static void do_dbs_timer(void *data)
 {
 	unsigned int cpu = smp_processor_id();
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	delay -= jiffies % delay;
 
 	if (!dbs_info->enable)
 		return;
-
-	lock_cpu_hotplug();
-	dbs_check_cpu(dbs_info);
-	unlock_cpu_hotplug();
-	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
+	/* Common NORMAL_SAMPLE setup */
+	INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+	if (!dbs_tuners_ins.powersave_bias ||
+	    (unsigned long) data == DBS_NORMAL_SAMPLE) {
+		lock_cpu_hotplug();
+		dbs_check_cpu(dbs_info);
+		unlock_cpu_hotplug();
+		if (dbs_info->freq_lo) {
+			/* Setup timer for SUB_SAMPLE */
+			INIT_WORK(&dbs_info->work, do_dbs_timer,
+					(void *)DBS_SUB_SAMPLE);
+			delay = dbs_info->freq_hi_jiffies;
+		}
+	} else {
+		__cpufreq_driver_target(dbs_info->cur_policy,
+	                        	dbs_info->freq_lo,
+	                        	CPUFREQ_RELATION_H);
+	}
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_init(unsigned int cpu)
 {
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	delay -= jiffies % delay;
 
-	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
-	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
-	return;
+	ondemand_powersave_bias_init();
+	INIT_WORK(&dbs_info->work, do_dbs_timer, NULL);
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 25eee53..c2ecc59 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -350,12 +350,10 @@
 	}
 
 	register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
-	lock_cpu_hotplug();
 	for_each_online_cpu(cpu) {
 		cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE,
 			(void *)(long)cpu);
 	}
-	unlock_cpu_hotplug();
 	return 0;
 }
 static void
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4263935..adb5541 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -2,22 +2,53 @@
 
 config CRYPTO_DEV_PADLOCK
 	tristate "Support for VIA PadLock ACE"
-	depends on CRYPTO && X86_32
+	depends on X86_32
+	select CRYPTO_ALGAPI
+	default m
 	help
 	  Some VIA processors come with an integrated crypto engine
 	  (so called VIA PadLock ACE, Advanced Cryptography Engine)
-	  that provides instructions for very fast {en,de}cryption 
-	  with some algorithms.
+	  that provides instructions for very fast cryptographic
+	  operations with supported algorithms.
 	  
 	  The instructions are used only when the CPU supports them.
-	  Otherwise software encryption is used. If you are unsure,
-	  say Y.
+	  Otherwise software encryption is used.
+
+	  Selecting M for this option will compile a helper module
+	  padlock.ko that should autoload all below configured
+	  algorithms. Don't worry if your hardware does not support
+	  some or all of them. In such case padlock.ko will
+	  simply write a single line into the kernel log informing
+	  about its failure but everything will keep working fine.
+
+	  If you are unsure, say M. The compiled module will be
+	  called padlock.ko
 
 config CRYPTO_DEV_PADLOCK_AES
-	bool "Support for AES in VIA PadLock"
+	tristate "PadLock driver for AES algorithm"
 	depends on CRYPTO_DEV_PADLOCK
-	default y
+	select CRYPTO_BLKCIPHER
+	default m
 	help
 	  Use VIA PadLock for AES algorithm.
 
+	  Available in VIA C3 and newer CPUs.
+
+	  If unsure say M. The compiled module will be
+	  called padlock-aes.ko
+
+config CRYPTO_DEV_PADLOCK_SHA
+	tristate "PadLock driver for SHA1 and SHA256 algorithms"
+	depends on CRYPTO_DEV_PADLOCK
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	default m
+	help
+	  Use VIA PadLock for SHA1/SHA256 algorithms.
+
+	  Available in VIA C7 and newer processors.
+
+	  If unsure say M. The compiled module will be
+	  called padlock-sha.ko
+
 endmenu
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 45426ca..4c3d0ec 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,7 +1,3 @@
-
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o
-
-padlock-objs-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
-
-padlock-objs := padlock-generic.o $(padlock-objs-y)
-
+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index b643d71..d4501dc 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -43,11 +43,11 @@
  * ---------------------------------------------------------------------------
  */
 
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <asm/byteorder.h>
@@ -59,6 +59,17 @@
 #define AES_EXTENDED_KEY_SIZE	64	/* in uint32_t units */
 #define AES_EXTENDED_KEY_SIZE_B	(AES_EXTENDED_KEY_SIZE * sizeof(uint32_t))
 
+/* Control word. */
+struct cword {
+	unsigned int __attribute__ ((__packed__))
+		rounds:4,
+		algo:3,
+		keygen:1,
+		interm:1,
+		encdec:1,
+		ksize:2;
+} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
+
 /* Whenever making any changes to the following
  * structure *make sure* you keep E, d_data
  * and cword aligned on 16 Bytes boundaries!!! */
@@ -286,9 +297,9 @@
 	return 0;
 }
 
-static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
+static inline struct aes_ctx *aes_ctx_common(void *ctx)
 {
-	unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
+	unsigned long addr = (unsigned long)ctx;
 	unsigned long align = PADLOCK_ALIGNMENT;
 
 	if (align <= crypto_tfm_ctx_alignment())
@@ -296,16 +307,27 @@
 	return (struct aes_ctx *)ALIGN(addr, align);
 }
 
+static inline struct aes_ctx *aes_ctx(struct crypto_tfm *tfm)
+{
+	return aes_ctx_common(crypto_tfm_ctx(tfm));
+}
+
+static inline struct aes_ctx *blk_aes_ctx(struct crypto_blkcipher *tfm)
+{
+	return aes_ctx_common(crypto_blkcipher_ctx(tfm));
+}
+
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len, u32 *flags)
+		       unsigned int key_len)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
 	const __le32 *key = (const __le32 *)in_key;
+	u32 *flags = &tfm->crt_flags;
 	uint32_t i, t, u, v, w;
 	uint32_t P[AES_EXTENDED_KEY_SIZE];
 	uint32_t rounds;
 
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
+	if (key_len % 8) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
@@ -430,50 +452,10 @@
 	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1);
 }
 
-static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt,
-			   nbytes / AES_BLOCK_SIZE);
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
-static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt,
-			   nbytes / AES_BLOCK_SIZE);
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
-static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	u8 *iv;
-
-	iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info,
-				&ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE);
-	memcpy(desc->info, iv, AES_BLOCK_SIZE);
-
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
-static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
-				    const u8 *in, unsigned int nbytes)
-{
-	struct aes_ctx *ctx = aes_ctx(desc->tfm);
-	padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt,
-			   nbytes / AES_BLOCK_SIZE);
-	return nbytes & ~(AES_BLOCK_SIZE - 1);
-}
-
 static struct crypto_alg aes_alg = {
 	.cra_name		=	"aes",
 	.cra_driver_name	=	"aes-padlock",
-	.cra_priority		=	300,
+	.cra_priority		=	PADLOCK_CRA_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct aes_ctx),
@@ -487,23 +469,195 @@
 			.cia_setkey	   	= 	aes_set_key,
 			.cia_encrypt	 	=	aes_encrypt,
 			.cia_decrypt	  	=	aes_decrypt,
-			.cia_encrypt_ecb 	=	aes_encrypt_ecb,
-			.cia_decrypt_ecb  	=	aes_decrypt_ecb,
-			.cia_encrypt_cbc 	=	aes_encrypt_cbc,
-			.cia_decrypt_cbc  	=	aes_decrypt_cbc,
 		}
 	}
 };
 
-int __init padlock_init_aes(void)
+static int ecb_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
 {
-	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
+				   ctx->E, &ctx->cword.encrypt,
+				   nbytes / AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr,
+				   ctx->D, &ctx->cword.decrypt,
+				   nbytes / AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg ecb_aes_alg = {
+	.cra_name		=	"ecb(aes)",
+	.cra_driver_name	=	"ecb-aes-padlock",
+	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.setkey	   		= 	aes_set_key,
+			.encrypt		=	ecb_aes_encrypt,
+			.decrypt		=	ecb_aes_decrypt,
+		}
+	}
+};
+
+static int cbc_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr,
+					    walk.dst.virt.addr, ctx->E,
+					    walk.iv, &ctx->cword.encrypt,
+					    nbytes / AES_BLOCK_SIZE);
+		memcpy(walk.iv, iv, AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static int cbc_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct aes_ctx *ctx = blk_aes_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr,
+				   ctx->D, walk.iv, &ctx->cword.decrypt,
+				   nbytes / AES_BLOCK_SIZE);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg cbc_aes_alg = {
+	.cra_name		=	"cbc(aes)",
+	.cra_driver_name	=	"cbc-aes-padlock",
+	.cra_priority		=	PADLOCK_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey	   		= 	aes_set_key,
+			.encrypt		=	cbc_aes_encrypt,
+			.decrypt		=	cbc_aes_decrypt,
+		}
+	}
+};
+
+static int __init padlock_init(void)
+{
+	int ret;
+
+	if (!cpu_has_xcrypt) {
+		printk(KERN_ERR PFX "VIA PadLock not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_xcrypt_enabled) {
+		printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
+		return -ENODEV;
+	}
 
 	gen_tabs();
-	return crypto_register_alg(&aes_alg);
+	if ((ret = crypto_register_alg(&aes_alg)))
+		goto aes_err;
+
+	if ((ret = crypto_register_alg(&ecb_aes_alg)))
+		goto ecb_aes_err;
+
+	if ((ret = crypto_register_alg(&cbc_aes_alg)))
+		goto cbc_aes_err;
+
+	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
+
+out:
+	return ret;
+
+cbc_aes_err:
+	crypto_unregister_alg(&ecb_aes_alg);
+ecb_aes_err:
+	crypto_unregister_alg(&aes_alg);
+aes_err:
+	printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
+	goto out;
 }
 
-void __exit padlock_fini_aes(void)
+static void __exit padlock_fini(void)
 {
+	crypto_unregister_alg(&cbc_aes_alg);
+	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("VIA PadLock AES algorithm support");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
+MODULE_ALIAS("aes-padlock");
diff --git a/drivers/crypto/padlock-generic.c b/drivers/crypto/padlock-generic.c
deleted file mode 100644
index 18cf0e8..0000000
--- a/drivers/crypto/padlock-generic.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* 
- * Cryptographic API.
- *
- * Support for VIA PadLock hardware crypto engine.
- *
- * Copyright (c) 2004  Michal Ludvig <michal@logix.cz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include <asm/byteorder.h>
-#include "padlock.h"
-
-static int __init
-padlock_init(void)
-{
-	int ret = -ENOSYS;
-	
-	if (!cpu_has_xcrypt) {
-		printk(KERN_ERR PFX "VIA PadLock not detected.\n");
-		return -ENODEV;
-	}
-
-	if (!cpu_has_xcrypt_enabled) {
-		printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
-		return -ENODEV;
-	}
-
-#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES
-	if ((ret = padlock_init_aes())) {
-		printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n");
-		return ret;
-	}
-#endif
-
-	if (ret == -ENOSYS)
-		printk(KERN_ERR PFX "Hmm, VIA PadLock was compiled without any algorithm.\n");
-
-	return ret;
-}
-
-static void __exit
-padlock_fini(void)
-{
-#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES
-	padlock_fini_aes();
-#endif
-}
-
-module_init(padlock_init);
-module_exit(padlock_fini);
-
-MODULE_DESCRIPTION("VIA PadLock crypto engine support.");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Michal Ludvig");
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
new file mode 100644
index 0000000..a781fd2
--- /dev/null
+++ b/drivers/crypto/padlock-sha.c
@@ -0,0 +1,318 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for VIA PadLock hardware crypto engine.
+ *
+ * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/cryptohash.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include "padlock.h"
+
+#define SHA1_DEFAULT_FALLBACK	"sha1-generic"
+#define SHA1_DIGEST_SIZE        20
+#define SHA1_HMAC_BLOCK_SIZE    64
+
+#define SHA256_DEFAULT_FALLBACK "sha256-generic"
+#define SHA256_DIGEST_SIZE      32
+#define SHA256_HMAC_BLOCK_SIZE  64
+
+struct padlock_sha_ctx {
+	char		*data;
+	size_t		used;
+	int		bypass;
+	void (*f_sha_padlock)(const char *in, char *out, int count);
+	struct hash_desc fallback;
+};
+
+static inline struct padlock_sha_ctx *ctx(struct crypto_tfm *tfm)
+{
+	return crypto_tfm_ctx(tfm);
+}
+
+/* We'll need aligned address on the stack */
+#define NEAREST_ALIGNED(ptr) \
+	((void *)ALIGN((size_t)(ptr), PADLOCK_ALIGNMENT))
+
+static struct crypto_alg sha1_alg, sha256_alg;
+
+static void padlock_sha_bypass(struct crypto_tfm *tfm)
+{
+	if (ctx(tfm)->bypass)
+		return;
+
+	crypto_hash_init(&ctx(tfm)->fallback);
+	if (ctx(tfm)->data && ctx(tfm)->used) {
+		struct scatterlist sg;
+
+		sg_set_buf(&sg, ctx(tfm)->data, ctx(tfm)->used);
+		crypto_hash_update(&ctx(tfm)->fallback, &sg, sg.length);
+	}
+
+	ctx(tfm)->used = 0;
+	ctx(tfm)->bypass = 1;
+}
+
+static void padlock_sha_init(struct crypto_tfm *tfm)
+{
+	ctx(tfm)->used = 0;
+	ctx(tfm)->bypass = 0;
+}
+
+static void padlock_sha_update(struct crypto_tfm *tfm,
+			const uint8_t *data, unsigned int length)
+{
+	/* Our buffer is always one page. */
+	if (unlikely(!ctx(tfm)->bypass &&
+		     (ctx(tfm)->used + length > PAGE_SIZE)))
+		padlock_sha_bypass(tfm);
+
+	if (unlikely(ctx(tfm)->bypass)) {
+		struct scatterlist sg;
+		sg_set_buf(&sg, (uint8_t *)data, length);
+		crypto_hash_update(&ctx(tfm)->fallback, &sg, length);
+		return;
+	}
+
+	memcpy(ctx(tfm)->data + ctx(tfm)->used, data, length);
+	ctx(tfm)->used += length;
+}
+
+static inline void padlock_output_block(uint32_t *src,
+		 	uint32_t *dst, size_t count)
+{
+	while (count--)
+		*dst++ = swab32(*src++);
+}
+
+static void padlock_do_sha1(const char *in, char *out, int count)
+{
+	/* We can't store directly to *out as it may be unaligned. */
+	/* BTW Don't reduce the buffer size below 128 Bytes!
+	 *     PadLock microcode needs it that big. */
+	char buf[128+16];
+	char *result = NEAREST_ALIGNED(buf);
+
+	((uint32_t *)result)[0] = 0x67452301;
+	((uint32_t *)result)[1] = 0xEFCDAB89;
+	((uint32_t *)result)[2] = 0x98BADCFE;
+	((uint32_t *)result)[3] = 0x10325476;
+	((uint32_t *)result)[4] = 0xC3D2E1F0;
+ 
+	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
+		      : "+S"(in), "+D"(result)
+		      : "c"(count), "a"(0));
+
+	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
+}
+
+static void padlock_do_sha256(const char *in, char *out, int count)
+{
+	/* We can't store directly to *out as it may be unaligned. */
+	/* BTW Don't reduce the buffer size below 128 Bytes!
+	 *     PadLock microcode needs it that big. */
+	char buf[128+16];
+	char *result = NEAREST_ALIGNED(buf);
+
+	((uint32_t *)result)[0] = 0x6A09E667;
+	((uint32_t *)result)[1] = 0xBB67AE85;
+	((uint32_t *)result)[2] = 0x3C6EF372;
+	((uint32_t *)result)[3] = 0xA54FF53A;
+	((uint32_t *)result)[4] = 0x510E527F;
+	((uint32_t *)result)[5] = 0x9B05688C;
+	((uint32_t *)result)[6] = 0x1F83D9AB;
+	((uint32_t *)result)[7] = 0x5BE0CD19;
+
+	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
+		      : "+S"(in), "+D"(result)
+		      : "c"(count), "a"(0));
+
+	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
+}
+
+static void padlock_sha_final(struct crypto_tfm *tfm, uint8_t *out)
+{
+	if (unlikely(ctx(tfm)->bypass)) {
+		crypto_hash_final(&ctx(tfm)->fallback, out);
+		ctx(tfm)->bypass = 0;
+		return;
+	}
+
+	/* Pass the input buffer to PadLock microcode... */
+	ctx(tfm)->f_sha_padlock(ctx(tfm)->data, out, ctx(tfm)->used);
+
+	ctx(tfm)->used = 0;
+}
+
+static int padlock_cra_init(struct crypto_tfm *tfm)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct crypto_hash *fallback_tfm;
+
+	/* For now we'll allocate one page. This
+	 * could eventually be configurable one day. */
+	ctx(tfm)->data = (char *)__get_free_page(GFP_KERNEL);
+	if (!ctx(tfm)->data)
+		return -ENOMEM;
+
+	/* Allocate a fallback and abort if it failed. */
+	fallback_tfm = crypto_alloc_hash(fallback_driver_name, 0,
+					 CRYPTO_ALG_ASYNC |
+					 CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
+		       fallback_driver_name);
+		free_page((unsigned long)(ctx(tfm)->data));
+		return PTR_ERR(fallback_tfm);
+	}
+
+	ctx(tfm)->fallback.tfm = fallback_tfm;
+	return 0;
+}
+
+static int padlock_sha1_cra_init(struct crypto_tfm *tfm)
+{
+	ctx(tfm)->f_sha_padlock = padlock_do_sha1;
+
+	return padlock_cra_init(tfm);
+}
+
+static int padlock_sha256_cra_init(struct crypto_tfm *tfm)
+{
+	ctx(tfm)->f_sha_padlock = padlock_do_sha256;
+
+	return padlock_cra_init(tfm);
+}
+
+static void padlock_cra_exit(struct crypto_tfm *tfm)
+{
+	if (ctx(tfm)->data) {
+		free_page((unsigned long)(ctx(tfm)->data));
+		ctx(tfm)->data = NULL;
+	}
+
+	crypto_free_hash(ctx(tfm)->fallback.tfm);
+	ctx(tfm)->fallback.tfm = NULL;
+}
+
+static struct crypto_alg sha1_alg = {
+	.cra_name		=	"sha1",
+	.cra_driver_name	=	"sha1-padlock",
+	.cra_priority		=	PADLOCK_CRA_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	SHA1_HMAC_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(sha1_alg.cra_list),
+	.cra_init		=	padlock_sha1_cra_init,
+	.cra_exit		=	padlock_cra_exit,
+	.cra_u			=	{
+		.digest = {
+			.dia_digestsize	=	SHA1_DIGEST_SIZE,
+			.dia_init   	= 	padlock_sha_init,
+			.dia_update 	=	padlock_sha_update,
+			.dia_final  	=	padlock_sha_final,
+		}
+	}
+};
+
+static struct crypto_alg sha256_alg = {
+	.cra_name		=	"sha256",
+	.cra_driver_name	=	"sha256-padlock",
+	.cra_priority		=	PADLOCK_CRA_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_DIGEST |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	SHA256_HMAC_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(sha256_alg.cra_list),
+	.cra_init		=	padlock_sha256_cra_init,
+	.cra_exit		=	padlock_cra_exit,
+	.cra_u			=	{
+		.digest = {
+			.dia_digestsize	=	SHA256_DIGEST_SIZE,
+			.dia_init   	= 	padlock_sha_init,
+			.dia_update 	=	padlock_sha_update,
+			.dia_final  	=	padlock_sha_final,
+		}
+	}
+};
+
+static void __init padlock_sha_check_fallbacks(void)
+{
+	if (!crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_NEED_FALLBACK))
+		printk(KERN_WARNING PFX
+		       "Couldn't load fallback module for sha1.\n");
+
+	if (!crypto_has_hash("sha256", 0, CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_NEED_FALLBACK))
+		printk(KERN_WARNING PFX
+		       "Couldn't load fallback module for sha256.\n");
+}
+
+static int __init padlock_init(void)
+{
+	int rc = -ENODEV;
+
+	if (!cpu_has_phe) {
+		printk(KERN_ERR PFX "VIA PadLock Hash Engine not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_phe_enabled) {
+		printk(KERN_ERR PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
+		return -ENODEV;
+	}
+
+	padlock_sha_check_fallbacks();
+
+	rc = crypto_register_alg(&sha1_alg);
+	if (rc)
+		goto out;
+
+	rc = crypto_register_alg(&sha256_alg);
+	if (rc)
+		goto out_unreg1;
+
+	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
+
+	return 0;
+
+out_unreg1:
+	crypto_unregister_alg(&sha1_alg);
+out:
+	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
+	return rc;
+}
+
+static void __exit padlock_fini(void)
+{
+	crypto_unregister_alg(&sha1_alg);
+	crypto_unregister_alg(&sha256_alg);
+}
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
+MODULE_ALIAS("sha1-padlock");
+MODULE_ALIAS("sha256-padlock");
diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c
new file mode 100644
index 0000000..d6d7dd5
--- /dev/null
+++ b/drivers/crypto/padlock.c
@@ -0,0 +1,58 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for VIA PadLock hardware crypto engine.
+ *
+ * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include "padlock.h"
+
+static int __init padlock_init(void)
+{
+	int success = 0;
+
+	if (crypto_has_cipher("aes-padlock", 0, 0))
+		success++;
+
+	if (crypto_has_hash("sha1-padlock", 0, 0))
+		success++;
+
+	if (crypto_has_hash("sha256-padlock", 0, 0))
+		success++;
+
+	if (!success) {
+		printk(KERN_WARNING PFX "No VIA PadLock drivers have been loaded.\n");
+		return -ENODEV;
+	}
+
+	printk(KERN_NOTICE PFX "%d drivers are available.\n", success);
+
+	return 0;
+}
+
+static void __exit padlock_fini(void)
+{
+}
+
+module_init(padlock_init);
+module_exit(padlock_fini);
+
+MODULE_DESCRIPTION("Load all configured PadLock algorithms.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Ludvig");
+
diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h
index b78489b..b728e45 100644
--- a/drivers/crypto/padlock.h
+++ b/drivers/crypto/padlock.h
@@ -15,22 +15,9 @@
 
 #define PADLOCK_ALIGNMENT 16
 
-/* Control word. */
-struct cword {
-	unsigned int __attribute__ ((__packed__))
-		rounds:4,
-		algo:3,
-		keygen:1,
-		interm:1,
-		encdec:1,
-		ksize:2;
-} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));
-
 #define PFX	"padlock: "
 
-#ifdef CONFIG_CRYPTO_DEV_PADLOCK_AES
-int padlock_init_aes(void);
-void padlock_fini_aes(void);
-#endif
+#define PADLOCK_CRA_PRIORITY	300
+#define PADLOCK_COMPOSITE_PRIORITY 400
 
 #endif	/* _CRYPTO_PADLOCK_H */
diff --git a/drivers/i2c/busses/i2c-powermac.c b/drivers/i2c/busses/i2c-powermac.c
index 53bb435..d658d91 100644
--- a/drivers/i2c/busses/i2c-powermac.c
+++ b/drivers/i2c/busses/i2c-powermac.c
@@ -207,7 +207,8 @@
 	struct pmac_i2c_bus *bus = dev->platform_data;
 	struct device_node *parent = NULL;
 	struct i2c_adapter *adapter;
-	char name[32], *basename;
+	char name[32];
+	const char *basename;
 	int rc;
 
 	if (bus == NULL)
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index ebf961f..996c694 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1154,7 +1154,7 @@
 pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 {
 	struct device_node *np = pmif->node;
-	int *bidp;
+	const int *bidp;
 
 	pmif->cable_80 = 0;
 	pmif->broken_dma = pmif->broken_dma_warn = 0;
@@ -1176,14 +1176,14 @@
 		pmif->broken_dma = 1;
 	}
 
-	bidp = (int *)get_property(np, "AAPL,bus-id", NULL);
+	bidp = get_property(np, "AAPL,bus-id", NULL);
 	pmif->aapl_bus_id =  bidp ? *bidp : 0;
 
 	/* Get cable type from device-tree */
 	if (pmif->kind == controller_kl_ata4 || pmif->kind == controller_un_ata6
 	    || pmif->kind == controller_k2_ata6
 	    || pmif->kind == controller_sh_ata6) {
-		char* cable = get_property(np, "cable-type", NULL);
+		const char* cable = get_property(np, "cable-type", NULL);
 		if (cable && !strncmp(cable, "80-", 3))
 			pmif->cable_80 = 1;
 	}
@@ -1326,7 +1326,7 @@
 	if (macio_irq_count(mdev) == 0) {
 		printk(KERN_WARNING "ide%d: no intrs for device %s, using 13\n",
 			i, mdev->ofdev.node->full_name);
-		irq = 13;
+		irq = irq_create_mapping(NULL, 13);
 	} else
 		irq = macio_irq(mdev, 0);
 
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 69a53d4..9edface 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -14,7 +14,7 @@
 	---help---
 	  Userspace InfiniBand Management Datagram (MAD) support.  This
 	  is the kernel side of the userspace MAD support, which allows
-	  userspace processes to send and receive MADs. You will also 
+	  userspace processes to send and receive MADs. You will also
 	  need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
@@ -36,6 +36,8 @@
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index c7ff58c..2b5d109 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,8 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
-obj-$(CONFIG_IPATH_CORE)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)		+= ulp/iser/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 68e73ec..163d991 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,7 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o
 
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o $(infiniband-y)
+					ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o
 
@@ -14,6 +14,8 @@
 
 ib_cm-y :=			cm.o
 
+iw_cm-y :=			iwcm.o
+
 rdma_cm-y :=			cma.o
 
 ib_addr-y :=			addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1205e80..9cbf09e 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -61,12 +61,15 @@
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-		     unsigned char *dst_dev_addr)
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+		     const unsigned char *dst_dev_addr)
 {
 	switch (dev->type) {
 	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = IB_NODE_CA;
+		dev_addr->dev_type = RDMA_NODE_IB_CA;
+		break;
+	case ARPHRD_ETHER:
+		dev_addr->dev_type = RDMA_NODE_RNIC;
 		break;
 	default:
 		return -EADDRNOTAVAIL;
@@ -78,6 +81,7 @@
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 	return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
@@ -89,7 +93,7 @@
 	if (!dev)
 		return -EADDRNOTAVAIL;
 
-	ret = copy_addr(dev_addr, dev, NULL);
+	ret = rdma_copy_addr(dev_addr, dev, NULL);
 	dev_put(dev);
 	return ret;
 }
@@ -161,7 +165,7 @@
 
 	/* If the device does ARP internally, return 'done' */
 	if (rt->idev->dev->flags & IFF_NOARP) {
-		copy_addr(addr, rt->idev->dev, NULL);
+		rdma_copy_addr(addr, rt->idev->dev, NULL);
 		goto put;
 	}
 
@@ -181,7 +185,7 @@
 		src_in->sin_addr.s_addr = rt->rt_src;
 	}
 
-	ret = copy_addr(addr, neigh->dev, neigh->ha);
+	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
 	neigh_release(neigh);
 put:
@@ -245,7 +249,7 @@
 	if (ZERONET(src_ip)) {
 		src_in->sin_family = dst_in->sin_family;
 		src_in->sin_addr.s_addr = dst_ip;
-		ret = copy_addr(addr, dev, dev->dev_addr);
+		ret = rdma_copy_addr(addr, dev, dev->dev_addr);
 	} else if (LOOPBACK(src_ip)) {
 		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
 		if (!ret)
@@ -327,10 +331,10 @@
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 {
-	if (event == NETEVENT_NEIGH_UPDATE) {  
+	if (event == NETEVENT_NEIGH_UPDATE) {
 		struct neighbour *neigh = ctx;
 
 		if (neigh->dev->type == ARPHRD_INFINIBAND &&
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 75313ad..20e9f64 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -62,12 +62,13 @@
 
 static inline int start_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }
 
 static inline int end_port(struct ib_device *device)
 {
-	return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
 }
 
 int ib_get_cached_gid(struct ib_device *device,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0de335b..f35fcc4 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -41,6 +41,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -73,6 +74,7 @@
 	struct rb_root remote_id_table;
 	struct rb_root remote_sidr_table;
 	struct idr local_id_table;
+	__be32 random_id_operand;
 	struct workqueue_struct *wq;
 } cm;
 
@@ -177,7 +179,7 @@
 	if (IS_ERR(ah))
 		return PTR_ERR(ah);
 
-	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
 			       cm_id_priv->av.pkey_index,
 			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
@@ -299,15 +301,17 @@
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
-	int ret;
+	int ret, id;
 	static int next_id;
 
 	do {
 		spin_lock_irqsave(&cm.lock, flags);
-		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
-					(__force int *) &cm_id_priv->id.local_id);
+		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+					next_id++, &id);
 		spin_unlock_irqrestore(&cm.lock, flags);
 	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+	cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
 	return ret;
 }
 
@@ -316,7 +320,8 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&cm.lock, flags);
-	idr_remove(&cm.local_id_table, (__force int) local_id);
+	idr_remove(&cm.local_id_table,
+		   (__force int) (local_id ^ cm.random_id_operand));
 	spin_unlock_irqrestore(&cm.lock, flags);
 }
 
@@ -324,7 +329,8 @@
 {
 	struct cm_id_private *cm_id_priv;
 
-	cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+	cm_id_priv = idr_find(&cm.local_id_table,
+			      (__force int) (local_id ^ cm.random_id_operand));
 	if (cm_id_priv) {
 		if (cm_id_priv->id.remote_id == remote_id)
 			atomic_inc(&cm_id_priv->refcount);
@@ -679,6 +685,8 @@
 {
 	int wait_time;
 
+	cm_cleanup_timewait(cm_id_priv->timewait_info);
+
 	/*
 	 * The cm_id could be destroyed by the user before we exit timewait.
 	 * To protect against this, we search for the cm_id after exiting
@@ -1354,7 +1362,7 @@
 							    id.local_id);
 	if (IS_ERR(cm_id_priv->timewait_info)) {
 		ret = PTR_ERR(cm_id_priv->timewait_info);
-		goto error1;
+		goto destroy;
 	}
 	cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1363,7 +1371,8 @@
 	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 	if (!listen_cm_id_priv) {
 		ret = -EINVAL;
-		goto error2;
+		kfree(cm_id_priv->timewait_info);
+		goto destroy;
 	}
 
 	cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1373,12 +1382,22 @@
 
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-	if (ret)
-		goto error3;
+	if (ret) {
+		ib_get_cached_gid(work->port->cm_dev->device,
+				  work->port->port_num, 0, &work->path[0].sgid);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+			       &work->path[0].sgid, sizeof work->path[0].sgid,
+			       NULL, 0);
+		goto rejected;
+	}
 	if (req_msg->alt_local_lid) {
 		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-		if (ret)
-			goto error3;
+		if (ret) {
+			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+				       &work->path[0].sgid,
+				       sizeof work->path[0].sgid, NULL, 0);
+			goto rejected;
+		}
 	}
 	cm_id_priv->tid = req_msg->hdr.tid;
 	cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1400,12 +1419,11 @@
 	cm_deref_id(listen_cm_id_priv);
 	return 0;
 
-error3:	atomic_dec(&cm_id_priv->refcount);
+rejected:
+	atomic_dec(&cm_id_priv->refcount);
 	cm_deref_id(listen_cm_id_priv);
-	cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:	kfree(cm_id_priv->timewait_info);
-	cm_id_priv->timewait_info = NULL;
-error1:	ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+	ib_destroy_cm_id(cm_id);
 	return ret;
 }
 
@@ -2072,8 +2090,9 @@
 			spin_unlock_irqrestore(&cm.lock, flags);
 			return NULL;
 		}
-		cm_id_priv = idr_find(&cm.local_id_table,
-				      (__force int) timewait_info->work.local_id);
+		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+				      (timewait_info->work.local_id ^
+				       cm.random_id_operand));
 		if (cm_id_priv) {
 			if (cm_id_priv->id.remote_id == remote_id)
 				atomic_inc(&cm_id_priv->refcount);
@@ -3125,7 +3144,8 @@
 		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
 					   IB_ACCESS_REMOTE_WRITE;
 		if (cm_id_priv->responder_resources)
-			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+						    IB_ACCESS_REMOTE_ATOMIC;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		qp_attr->port_num = cm_id_priv->av.port->port_num;
 		ret = 0;
@@ -3262,6 +3282,9 @@
 	int ret;
 	u8 i;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
 			 device->phys_port_cnt, GFP_KERNEL);
 	if (!cm_dev)
@@ -3349,6 +3372,7 @@
 	cm.remote_qp_table = RB_ROOT;
 	cm.remote_sidr_table = RB_ROOT;
 	idr_init(&cm.local_id_table);
+	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
 	idr_pre_get(&cm.local_id_table, GFP_KERNEL);
 
 	cm.wq = create_workqueue("ib_cm");
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5d625a8..1178bd4 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>
 
 #include <net/tcp.h>
 
@@ -43,6 +44,7 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -60,6 +62,7 @@
 	.remove = cma_remove_one
 };
 
+static struct ib_sa_client sa_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@@ -124,6 +127,7 @@
 	int			query_id;
 	union {
 		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
 	} cm_id;
 
 	u32			seq_num;
@@ -259,15 +263,24 @@
 	id_priv->cma_dev = NULL;
 }
 
-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+	enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
 	struct cma_device *cma_dev;
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
+	switch (rdma_node_get_transport(dev_type)) {
+	case RDMA_TRANSPORT_IB:
+		ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+		iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		break;
+	default:
+		return -ENODEV;
+	}
 
-	mutex_lock(&lock);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
@@ -276,20 +289,9 @@
 			break;
 		}
 	}
-	mutex_unlock(&lock);
 	return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-	switch (id_priv->id.route.addr.dev_addr.dev_type) {
-	case IB_NODE_CA:
-		return cma_acquire_ib_dev(id_priv);
-	default:
-		return -ENODEV;
-	}
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
@@ -347,6 +349,16 @@
 					  IB_QP_PKEY_INDEX | IB_QP_PORT);
 }
 
+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 		   struct ib_qp_init_attr *qp_init_attr)
 {
@@ -362,10 +374,13 @@
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_init_ib_qp(id_priv, qp);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_init_iw_qp(id_priv, qp);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -451,13 +466,17 @@
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 					 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+					qp_attr_mask);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -590,8 +609,8 @@
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-	switch (id_priv->id.device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
@@ -611,11 +630,15 @@
 	cma_exch(id_priv, CMA_DESTROYING);
 
 	if (id_priv->cma_dev) {
-		switch (id_priv->id.device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
@@ -689,19 +712,25 @@
 	state = cma_exch(id_priv, CMA_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
+	mutex_lock(&lock);
 	if (id_priv->cma_dev) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
-	 		if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+		mutex_unlock(&lock);
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
 				ib_destroy_cm_id(id_priv->cm_id.ib);
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+			break;
 		default:
 			break;
 		}
-	  	mutex_lock(&lock);
+		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
-		mutex_unlock(&lock);
 	}
+	mutex_unlock(&lock);
 
 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
@@ -869,7 +898,7 @@
 	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-	rt->addr.dev_addr.dev_type = IB_NODE_CA;
+	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -898,7 +927,9 @@
 	}
 
 	atomic_inc(&conn_id->dev_remove);
-	ret = cma_acquire_ib_dev(conn_id);
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
 	if (ret) {
 		ret = -ENODEV;
 		cma_release_remove(conn_id);
@@ -982,6 +1013,130 @@
 	}
 }
 
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+	struct rdma_id_private *id_priv = iw_id->context;
+	enum rdma_cm_event_type event = 0;
+	struct sockaddr_in *sin;
+	int ret = 0;
+
+	atomic_inc(&id_priv->dev_remove);
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CLOSE:
+		event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		*sin = iw_event->local_addr;
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		*sin = iw_event->remote_addr;
+		if (iw_event->status)
+			event = RDMA_CM_EVENT_REJECTED;
+		else
+			event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+	ret = cma_notify_user(id_priv, event, iw_event->status,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.iw = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+
+	cma_release_remove(id_priv);
+	return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+			       struct iw_cm_event *iw_event)
+{
+	struct rdma_cm_id *new_cm_id;
+	struct rdma_id_private *listen_id, *conn_id;
+	struct sockaddr_in *sin;
+	struct net_device *dev = NULL;
+	int ret;
+
+	listen_id = cm_id->context;
+	atomic_inc(&listen_id->dev_remove);
+	if (!cma_comp(listen_id, CMA_LISTEN)) {
+		ret = -ECONNABORTED;
+		goto out;
+	}
+
+	/* Create a new RDMA id for the new IW CM ID */
+	new_cm_id = rdma_create_id(listen_id->id.event_handler,
+				   listen_id->id.context,
+				   RDMA_PS_TCP);
+	if (!new_cm_id) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+	atomic_inc(&conn_id->dev_remove);
+	conn_id->state = CMA_CONNECT;
+
+	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+	if (!dev) {
+		ret = -EADDRNOTAVAIL;
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	mutex_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mutex_unlock(&lock);
+	if (ret) {
+		cma_release_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	conn_id->cm_id.iw = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_iw_handler;
+
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	*sin = iw_event->local_addr;
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	*sin = iw_event->remote_addr;
+
+	ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+			      iw_event->private_data,
+			      iw_event->private_data_len);
+	if (ret) {
+		/* User wants to destroy the CM ID */
+		conn_id->cm_id.iw = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_release_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+
+out:
+	if (dev)
+		dev_put(dev);
+	cma_release_remove(listen_id);
+	return ret;
+}
+
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
 	struct ib_cm_compare_data compare_data;
@@ -1011,6 +1166,30 @@
 	return ret;
 }
 
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+	int ret;
+	struct sockaddr_in *sin;
+
+	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+					    iw_conn_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.iw))
+		return PTR_ERR(id_priv->cm_id.iw);
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	id_priv->cm_id.iw->local_addr = *sin;
+
+	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+	if (ret) {
+		iw_destroy_cm_id(id_priv->cm_id.iw);
+		id_priv->cm_id.iw = NULL;
+	}
+
+	return ret;
+}
+
 static int cma_listen_handler(struct rdma_cm_id *id,
 			      struct rdma_cm_event *event)
 {
@@ -1087,12 +1266,17 @@
 
 	id_priv->backlog = backlog;
 	if (id->device) {
-		switch (id->device->node_type) {
-		case IB_NODE_CA:
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
 			ret = cma_ib_listen(id_priv);
 			if (ret)
 				goto err;
 			break;
+		case RDMA_TRANSPORT_IWARP:
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+			break;
 		default:
 			ret = -ENOSYS;
 			goto err;
@@ -1140,7 +1324,7 @@
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
 	path_rec.numb_path = 1;
 
-	id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
 				id_priv->id.port_num, &path_rec,
 				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@@ -1231,6 +1415,23 @@
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct cma_work *work;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (!work)
+		return -ENOMEM;
+
+	work->id = id_priv;
+	INIT_WORK(&work->work, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	queue_work(cma_wq, &work->work);
+	return 0;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
@@ -1241,10 +1442,13 @@
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_resolve_ib_route(id_priv, timeout_ms);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1309,16 +1513,26 @@
 	enum rdma_cm_event_type event;
 
 	atomic_inc(&id_priv->dev_remove);
-	if (!id_priv->cma_dev && !status)
+
+	/*
+	 * Grab mutex to block rdma_destroy_id() from removing the device while
+	 * we're trying to acquire it.
+	 */
+	mutex_lock(&lock);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+		mutex_unlock(&lock);
+		goto out;
+	}
+
+	if (!status && !id_priv->cma_dev)
 		status = cma_acquire_dev(id_priv);
+	mutex_unlock(&lock);
 
 	if (status) {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
 			goto out;
 		event = RDMA_CM_EVENT_ADDR_ERROR;
 	} else {
-		if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
-			goto out;
 		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
 		       ip_addr_size(src_addr));
 		event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1492,7 +1706,7 @@
 	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
 		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
 			return -EADDRNOTAVAIL;
-		
+
 		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
 		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
 			return -EADDRINUSE;
@@ -1542,8 +1756,11 @@
 
 	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-		if (!ret)
+		if (!ret) {
+			mutex_lock(&lock);
 			ret = cma_acquire_dev(id_priv);
+			mutex_unlock(&lock);
+		}
 		if (ret)
 			goto err;
 	}
@@ -1649,6 +1866,47 @@
 	return ret;
 }
 
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_id *cm_id;
+	struct sockaddr_in* sin;
+	int ret;
+	struct iw_cm_conn_param iw_param;
+
+	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
+		goto out;
+	}
+
+	id_priv->cm_id.iw = cm_id;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	cm_id->local_addr = *sin;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	cm_id->remote_addr = *sin;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret) {
+		iw_destroy_cm_id(cm_id);
+		return ret;
+	}
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp)
+		iw_param.qpn = id_priv->qp_num;
+	else
+		iw_param.qpn = conn_param->qp_num;
+	ret = iw_cm_connect(cm_id, &iw_param);
+out:
+	return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1664,10 +1922,13 @@
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = cma_connect_ib(id_priv, conn_param);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_connect_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1708,6 +1969,28 @@
 	return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 }
 
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+		  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_conn_param iw_param;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp) {
+		iw_param.qpn = id_priv->qp_num;
+	} else
+		iw_param.qpn = conn_param->qp_num;
+
+	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
@@ -1723,13 +2006,16 @@
 		id_priv->srq = conn_param->srq;
 	}
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		if (conn_param)
 			ret = cma_accept_ib(id_priv, conn_param);
 		else
 			ret = cma_rep_recv(id_priv);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = cma_accept_iw(id_priv, conn_param);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1756,12 +2042,16 @@
 	if (!cma_comp(id_priv, CMA_CONNECT))
 		return -EINVAL;
 
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
 		ret = ib_send_cm_rej(id_priv->cm_id.ib,
 				     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
 				     private_data, private_data_len);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_reject(id_priv->cm_id.iw,
+				   private_data, private_data_len);
+		break;
 	default:
 		ret = -ENOSYS;
 		break;
@@ -1780,17 +2070,20 @@
 	    !cma_comp(id_priv, CMA_DISCONNECT))
 		return -EINVAL;
 
-	ret = cma_modify_qp_err(id);
-	if (ret)
-		goto out;
-
-	switch (id->device->node_type) {
-	case IB_NODE_CA:
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_modify_qp_err(id);
+		if (ret)
+			goto out;
 		/* Initiate or respond to a disconnect. */
 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
 		break;
+	case RDMA_TRANSPORT_IWARP:
+		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+		break;
 	default:
+		ret = -EINVAL;
 		break;
 	}
 out:
@@ -1907,12 +2200,15 @@
 	if (!cma_wq)
 		return -ENOMEM;
 
+	ib_sa_register_client(&sa_client);
+
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
 	return 0;
 
 err:
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	return ret;
 }
@@ -1920,6 +2216,7 @@
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b2f3cb9..63d2a39 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -385,7 +385,7 @@
 EXPORT_SYMBOL(ib_get_client_data);
 
 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
@@ -505,7 +505,7 @@
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +580,7 @@
 		   u8 port_num, int port_modify_mask,
 		   struct ib_port_modify *port_modify)
 {
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		if (port_num)
 			return -EINVAL;
 	} else if (port_num < 1 || port_num > device->phys_port_cnt)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
new file mode 100644
index 0000000..c3fb304
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.c
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+	struct work_struct work;
+	struct iwcm_id_private *cm_id;
+	struct list_head list;
+	struct iw_cm_event event;
+	struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *	LISTENING IDS: 	Get enough elements preallocated to handle the
+ *			listen backlog.
+ *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
+ *    to the provider to reject the connectino request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+	struct iwcm_work *work;
+
+	if (list_empty(&cm_id_priv->work_free_list))
+		return NULL;
+	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+			  free_list);
+	list_del_init(&work->free_list);
+	return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+	list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+	struct list_head *e, *tmp;
+
+	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+		kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+	struct iwcm_work *work;
+
+	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+	while (count--) {
+		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+		if (!work) {
+			dealloc_work_entries(cm_id_priv);
+			return -ENOMEM;
+		}
+		work->cm_id = cm_id_priv;
+		INIT_LIST_HEAD(&work->list);
+		put_work(work);
+	}
+	return 0;
+}
+
+/*
+ * Save private data from incoming connection requests in the
+ * cm_id_priv so the low level driver doesn't have to.  Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iwcm_id_private *cm_id_priv,
+		       struct iw_cm_event *event)
+{
+	void *p;
+
+	p = kmalloc(event->private_data_len, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	memcpy(p, event->private_data, event->private_data_len);
+	event->private_data = p;
+	return 0;
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being removed
+ * and iw_destroy_cm_id is waiting, wake up the waiting thread.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+	int ret = 0;
+
+	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+		BUG_ON(!list_empty(&cm_id_priv->work_list));
+		if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
+			BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+			BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+					&cm_id_priv->flags));
+			ret = 1;
+		}
+		complete(&cm_id_priv->destroy_comp);
+	}
+
+	return ret;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	iwcm_deref_id(cm_id_priv);
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler,
+				 void *context)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+	if (!cm_id_priv)
+		return ERR_PTR(-ENOMEM);
+
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	cm_id_priv->id.device = device;
+	cm_id_priv->id.cm_handler = cm_handler;
+	cm_id_priv->id.context = context;
+	cm_id_priv->id.event_handler = cm_event_handler;
+	cm_id_priv->id.add_ref = add_ref;
+	cm_id_priv->id.rem_ref = rem_ref;
+	spin_lock_init(&cm_id_priv->lock);
+	atomic_set(&cm_id_priv->refcount, 1);
+	init_waitqueue_head(&cm_id_priv->connect_wait);
+	init_completion(&cm_id_priv->destroy_comp);
+	INIT_LIST_HEAD(&cm_id_priv->work_list);
+	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+	return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!qp)
+		return -EINVAL;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	BUG_ON(qp == NULL);
+	qp_attr.qp_state = IB_QPS_SQD;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currenlty being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+	struct ib_qp *qp = NULL;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/* Wait if we're currently in a connect or accept downcall */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+		/* QP could be <nul> for user-mode client */
+		if (cm_id_priv->qp)
+			qp = cm_id_priv->qp;
+		else
+			ret = -EINVAL;
+		break;
+	case IW_CM_STATE_LISTEN:
+		ret = -EINVAL;
+		break;
+	case IW_CM_STATE_CLOSING:
+		/* remote peer closed first */
+	case IW_CM_STATE_IDLE:
+		/* accept or connect returned !0 */
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called disconnect before/without calling accept after
+		 * connect_request event delivered.
+		 */
+		break;
+	case IW_CM_STATE_CONN_SENT:
+		/* Can only get here if wait above fails */
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	if (qp) {
+		if (abrupt)
+			ret = iwcm_modify_qp_err(qp);
+		else
+			ret = iwcm_modify_qp_sqd(qp);
+
+		/*
+		 * If both sides are disconnecting the QP could
+		 * already be in ERR or SQD states
+		 */
+		ret = 0;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/*
+	 * Wait if we're currently in a connect or accept downcall. A
+	 * listening endpoint should never block here.
+	 */
+	wait_event(cm_id_priv->connect_wait,
+		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_LISTEN:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* destroy the listening endpoint */
+		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		/* Abrupt close of the connection */
+		(void)iwcm_modify_qp_err(cm_id_priv->qp);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called destroy before/without calling accept after
+		 * receiving connection request event notification.
+		 */
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_DESTROYING:
+	default:
+		BUG();
+		break;
+	}
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	(void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+	destroy_cm_id(cm_id);
+
+	wait_for_completion(&cm_id_priv->destroy_comp);
+
+	dealloc_work_entries(cm_id_priv);
+
+	kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, backlog);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+		cm_id_priv->state = IW_CM_STATE_LISTEN;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+		if (ret)
+			cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+		 const void *private_data,
+		 u8 private_data_len)
+{
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->reject(cm_id, private_data,
+					  private_data_len);
+
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+		 struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	struct ib_qp *qp;
+	unsigned long flags;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+	if (ret) {
+		/* An error on accept precludes provider events */
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret = 0;
+	unsigned long flags;
+	struct ib_qp *qp;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, 4);
+	if (ret)
+		return ret;
+
+	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+		return -EINVAL;
+	}
+
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		return -EINVAL;
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+	if (ret) {
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+		wake_up_all(&cm_id_priv->connect_wait);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+				struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	struct iw_cm_id *cm_id;
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	/*
+	 * The provider should never generate a connection request
+	 * event with a bad status.
+	 */
+	BUG_ON(iw_event->status);
+
+	/*
+	 * We could be destroying the listening id. If so, ignore this
+	 * upcall.
+	 */
+	spin_lock_irqsave(&listen_id_priv->lock, flags);
+	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+	cm_id = iw_create_cm_id(listen_id_priv->id.device,
+				listen_id_priv->id.cm_handler,
+				listen_id_priv->id.context);
+	/* If the cm_id could not be created, ignore the request */
+	if (IS_ERR(cm_id))
+		return;
+
+	cm_id->provider_data = iw_event->provider_data;
+	cm_id->local_addr = iw_event->local_addr;
+	cm_id->remote_addr = iw_event->remote_addr;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+	ret = alloc_work_entries(cm_id_priv, 3);
+	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
+		iw_destroy_cm_id(cm_id);
+		return;
+	}
+
+	/* Call the client CM handler */
+	ret = cm_id->cm_handler(cm_id, iw_event);
+	if (ret) {
+		set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+		destroy_cm_id(cm_id);
+		if (atomic_read(&cm_id_priv->refcount)==0)
+			kfree(cm_id);
+	}
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	/*
+	 * We clear the CONNECT_WAIT bit here to allow the callback
+	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+	 * from a callback handler is not allowed.
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	/*
+	 * Clear the connect wait bit so a callback function calling
+	 * iw_cm_disconnect will not wait and deadlock this thread
+	 */
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+		cm_id_priv->id.local_addr = iw_event->local_addr;
+		cm_id_priv->id.remote_addr = iw_event->remote_addr;
+		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	} else {
+		/* REJECTED or RESET */
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+	if (iw_event->private_data_len)
+		kfree(iw_event->private_data);
+
+	/* Wake up waiters on connect complete */
+	wake_up_all(&cm_id_priv->connect_wait);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	unsigned long flags;
+	int ret = 0;
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		break;
+	case IW_CM_STATE_DESTROYING:
+		break;
+	default:
+		BUG();
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+	return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+			 struct iw_cm_event *iw_event)
+{
+	int ret = 0;
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		cm_conn_req_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		ret = cm_conn_est_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		cm_disconnect_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CLOSE:
+		ret = cm_close_handler(cm_id_priv, iw_event);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *arg)
+{
+	struct iwcm_work *work = arg, lwork;
+	struct iwcm_id_private *cm_id_priv = work->cm_id;
+	unsigned long flags;
+	int empty;
+	int ret = 0;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	empty = list_empty(&cm_id_priv->work_list);
+	while (!empty) {
+		work = list_entry(cm_id_priv->work_list.next,
+				  struct iwcm_work, list);
+		list_del_init(&work->list);
+		empty = list_empty(&cm_id_priv->work_list);
+		lwork = *work;
+		put_work(work);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+		ret = process_event(cm_id_priv, &work->event);
+		if (ret) {
+			set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+			destroy_cm_id(&cm_id_priv->id);
+		}
+		BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+		if (iwcm_deref_id(cm_id_priv))
+			return;
+
+		if (atomic_read(&cm_id_priv->refcount)==0 &&
+		    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+			dealloc_work_entries(cm_id_priv);
+			kfree(cm_id_priv);
+			return;
+		}
+		spin_lock_irqsave(&cm_id_priv->lock, flags);
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ * 	      0	- the event was handled.
+ *	-ENOMEM	- the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *iw_event)
+{
+	struct iwcm_work *work;
+	struct iwcm_id_private *cm_id_priv;
+	unsigned long flags;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	work = get_work(cm_id_priv);
+	if (!work) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	INIT_WORK(&work->work, cm_work_handler, work);
+	work->cm_id = cm_id_priv;
+	work->event = *iw_event;
+
+	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+	    work->event.private_data_len) {
+		ret = copy_private_data(cm_id_priv, &work->event);
+		if (ret) {
+			put_work(work);
+			goto out;
+		}
+	}
+
+	atomic_inc(&cm_id_priv->refcount);
+	if (list_empty(&cm_id_priv->work_list)) {
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+		queue_work(iwcm_wq, &work->work);
+	} else
+		list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+					   IB_ACCESS_REMOTE_WRITE|
+					   IB_ACCESS_REMOTE_READ;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = 0;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	switch (qp_attr->qp_state) {
+	case IB_QPS_INIT:
+	case IB_QPS_RTR:
+		ret = iwcm_init_qp_init_attr(cm_id_priv,
+					     qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTS:
+		ret = iwcm_init_qp_rts_attr(cm_id_priv,
+					    qp_attr, qp_attr_mask);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+	if (!iwcm_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+	destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
new file mode 100644
index 0000000..3f6cc82
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+	IW_CM_STATE_IDLE,             /* unbound, inactive */
+	IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+	IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+	IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+	IW_CM_STATE_ESTABLISHED,      /* established */
+	IW_CM_STATE_CLOSING,	      /* disconnect */
+	IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+	struct iw_cm_id	id;
+	enum iw_cm_state state;
+	unsigned long flags;
+	struct ib_qp *qp;
+	struct completion destroy_comp;
+	wait_queue_head_t connect_wait;
+	struct list_head work_list;
+	spinlock_t lock;
+	atomic_t refcount;
+	struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1c3cfbb..082f03c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1246,8 +1246,8 @@
 	int i;
 
 	for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+		/* Is there matching OUI for this vendor class ? */
+		if (!memcmp(vendor_class->oui[i], oui, 3))
 			return i;
 
 	return -1;
@@ -2237,7 +2237,7 @@
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &mad_agent_priv->send_list, agent_list) {
 		if (mad_send_wr->status == IB_WC_SUCCESS) {
- 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 		}
 	}
@@ -2528,10 +2528,10 @@
 			}
 		}
 		sg_list.addr = dma_map_single(qp_info->port_priv->
-					      	device->dma_device,
+					        device->dma_device,
 					      &mad_priv->grh,
 					      sizeof *mad_priv -
-					      	sizeof mad_priv->header,
+					        sizeof mad_priv->header,
 					      DMA_FROM_DEVICE);
 		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@@ -2606,7 +2606,7 @@
 	struct ib_qp *qp;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
- 	if (!attr) {
+	if (!attr) {
 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
 		return -ENOMEM;
 	}
@@ -2876,7 +2876,10 @@
 {
 	int start, end, i;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		start = 0;
 		end   = 0;
 	} else {
@@ -2923,7 +2926,7 @@
 {
 	int i, num_ports, cur_port;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		num_ports = 1;
 		cur_port = 0;
 	} else {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d147f3b..d06b590 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -38,8 +38,8 @@
 #define __IB_MAD_PRIV_H__
 
 #include <linux/completion.h>
+#include <linux/err.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index ebcd5b1..1ef79d0 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -33,8 +33,6 @@
  * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"
 
@@ -60,6 +58,7 @@
 	int last_ack;
 	int seg_num;
 	int newwin;
+	int repwin;
 
 	__be64 tid;
 	u32 src_qp;
@@ -170,6 +169,32 @@
 	return msg;
 }
 
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+		       struct ib_mad_recv_wc *recv_wc)
+{
+	struct ib_mad_send_buf *msg;
+	struct ib_rmpp_mad *rmpp_mad;
+	int ret;
+
+	msg = alloc_response_msg(&agent->agent, recv_wc);
+	if (IS_ERR(msg))
+		return;
+
+	rmpp_mad = msg->mad;
+	memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+	rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+	rmpp_mad->rmpp_hdr.seg_num = 0;
+	rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +296,7 @@
 	rmpp_recv->newwin = 1;
 	rmpp_recv->seg_num = 1;
 	rmpp_recv->last_ack = 0;
+	rmpp_recv->repwin = 1;
 
 	mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
 	rmpp_recv->tid = mad_hdr->tid;
@@ -365,7 +391,7 @@
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
 						  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+	struct ib_mad_recv_buf *seg_buf;
 	int cur_seg_num;
 
 	list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@@ -591,6 +617,16 @@
 			break;
 }
 
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+			   struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+	struct mad_rmpp_recv *rmpp_recv;
+
+	rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+	if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+		rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			     struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -616,8 +652,18 @@
 
 	spin_lock_irqsave(&agent->lock, flags);
 	mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-	if (!mad_send_wr)
-		goto out;	/* Unmatched ACK */
+	if (!mad_send_wr) {
+		if (!seg_num)
+			process_ds_ack(agent, mad_recv_wc, newwin);
+		goto out;	/* Unmatched or DS RMPP ACK */
+	}
+
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+	    (mad_send_wr->timeout)) {
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;		/* Repeated ACK for DS RMPP transaction */
+	}
 
 	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +702,9 @@
 		if (mad_send_wr->refcount == 1)
 			ib_reset_mad_timeout(mad_send_wr,
 					     mad_send_wr->send_buf.timeout_ms);
+		spin_unlock_irqrestore(&agent->lock, flags);
+		ack_ds_ack(agent, mad_recv_wc);
+		return;
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +821,39 @@
 	return NULL;
 }
 
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+	struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+	struct mad_rmpp_recv *rmpp_recv;
+	struct ib_ah_attr ah_attr;
+	unsigned long flags;
+	int newwin = 1;
+
+	if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+		goto out;
+
+	spin_lock_irqsave(&agent->lock, flags);
+	list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+		if (rmpp_recv->tid != mad_hdr->tid ||
+		    rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+		    rmpp_recv->class_version != mad_hdr->class_version ||
+		    (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+			continue;
+
+		if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+			continue;
+
+		if (rmpp_recv->slid == ah_attr.dlid) {
+			newwin = rmpp_recv->repwin;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&agent->lock, flags);
+out:
+	return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +869,7 @@
 		return IB_RMPP_RESULT_INTERNAL;
 	}
 
-	mad_send_wr->newwin = 1;
+	mad_send_wr->newwin = init_newwin(mad_send_wr);
 
 	/* We need to wait for the final ACK even if there isn't a response */
 	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d6b8422..1706d3c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -75,6 +76,7 @@
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
+	struct ib_sa_client    *client;
 	struct ib_sa_port      *port;
 	struct ib_mad_send_buf *mad_buf;
 	struct ib_sa_sm_ah     *sm_ah;
@@ -415,6 +417,31 @@
 	}
 }
 
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+	atomic_set(&client->users, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+	atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+	if (atomic_dec_and_test(&client->users))
+		complete(&client->comp);
+}
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+	ib_sa_client_put(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
  * ib_sa_cancel_query - try to cancel an SA query
  * @id:ID of query to cancel
@@ -557,6 +584,7 @@
 
 /**
  * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:Path Record to send in query
@@ -579,7 +607,8 @@
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +643,10 @@
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -639,6 +670,7 @@
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -671,6 +703,7 @@
 
 /**
  * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
  * @device:device to send request on
  * @port_num: port number to send request on
  * @method:SA method - should be get, set, or delete
@@ -695,7 +728,8 @@
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num, u8 method,
 			    struct ib_sa_service_rec *rec,
 			    ib_sa_comp_mask comp_mask,
 			    int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +769,10 @@
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -761,6 +797,7 @@
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -791,7 +828,8 @@
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@@ -827,8 +865,10 @@
 		goto err1;
 	}
 
-	query->callback = callback;
-	query->context  = context;
+	ib_sa_client_get(client);
+	query->sa_query.client = client;
+	query->callback        = callback;
+	query->context         = context;
 
 	mad = query->sa_query.mad_buf->mad;
 	init_mad(mad, agent);
@@ -853,6 +893,7 @@
 
 err2:
 	*sa_query = NULL;
+	ib_sa_client_put(query->sa_query.client);
 	ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -887,8 +928,9 @@
 	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 
-        ib_free_send_mad(mad_send_wc->send_buf);
+	ib_free_send_mad(mad_send_wc->send_buf);
 	kref_put(&query->sm_ah->ref, free_sm_ah);
+	ib_sa_client_put(query->client);
 	query->release(query);
 }
 
@@ -919,7 +961,10 @@
 	struct ib_sa_device *sa_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 35852e7..54b81e1 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -64,7 +64,7 @@
 
 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->return_path set when received */
@@ -77,7 +77,7 @@
 		if (hop_ptr == hop_cnt) {
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -95,7 +95,7 @@
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->hop_ptr--;
@@ -107,7 +107,7 @@
 		if (hop_ptr == 1) {
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_slid == IB_LID_PERMISSIVE);
 		}
 
@@ -142,7 +142,7 @@
 
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@
 				smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */
 
-			return (node_type == IB_NODE_SWITCH ||
+			return (node_type == RDMA_NODE_IB_SWITCH ||
 				smp->dr_dlid == IB_LID_PERMISSIVE);
 		}
 
@@ -175,7 +175,7 @@
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-			if (node_type != IB_NODE_SWITCH)
+			if (node_type != RDMA_NODE_IB_SWITCH)
 				return 0;
 
 			/* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@
 				return 1;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == IB_NODE_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 21f9282..709323c 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -68,7 +68,7 @@
 	int			index;
 };
 
-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
 	return dev->reg_state == IB_DEV_REGISTERED;
 }
@@ -589,10 +589,11 @@
 		return -ENODEV;
 
 	switch (dev->node_type) {
-	case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-	case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-	case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-	default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
+	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
+	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	}
 }
 
@@ -708,7 +709,7 @@
 	if (ret)
 		goto err_put;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		ret = add_port(device, 0);
 		if (ret)
 			goto err_put;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index c1c6fda..ad4f4d5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -309,9 +309,9 @@
 		info	      = evt->param.apr_rcvd.apr_info;
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
-		uvt->resp.u.sidr_req_resp.pkey = 
+		uvt->resp.u.sidr_req_resp.pkey =
 					evt->param.sidr_req_rcvd.pkey;
-		uvt->resp.u.sidr_req_resp.port = 
+		uvt->resp.u.sidr_req_resp.port =
 					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
@@ -1237,7 +1237,7 @@
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
 	struct ib_ucm_device *dev;
-	
+
 	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
 	return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
@@ -1247,7 +1247,8 @@
 {
 	struct ib_ucm_device *ucm_dev;
 
-	if (!device->alloc_ucontext)
+	if (!device->alloc_ucontext ||
+	    rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
 	ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1273f88..807fbd6 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -1032,7 +1032,10 @@
 	struct ib_umad_device *umad_dev;
 	int s, e, i;
 
-	if (device->node_type == IB_NODE_SWITCH)
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
 		s = e = 0;
 	else {
 		s = 1;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 30923eb..b72c7f6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -155,7 +155,7 @@
 }
 
 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-					struct ib_ucontext *context)
+					struct ib_ucontext *context, int nested)
 {
 	struct ib_uobject *uobj;
 
@@ -163,7 +163,10 @@
 	if (!uobj)
 		return NULL;
 
-	down_read(&uobj->mutex);
+	if (nested)
+		down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+	else
+		down_read(&uobj->mutex);
 	if (!uobj->live) {
 		put_uobj_read(uobj);
 		return NULL;
@@ -190,17 +193,18 @@
 	return uobj;
 }
 
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+			  int nested)
 {
 	struct ib_uobject *uobj;
 
-	uobj = idr_read_uobj(idr, id, context);
+	uobj = idr_read_uobj(idr, id, context, nested);
 	return uobj ? uobj->object : NULL;
 }
 
 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+	return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }
 
 static void put_pd_read(struct ib_pd *pd)
@@ -208,9 +212,9 @@
 	put_uobj_read(pd->uobject);
 }
 
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+	return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }
 
 static void put_cq_read(struct ib_cq *cq)
@@ -220,7 +224,7 @@
 
 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+	return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }
 
 static void put_ah_read(struct ib_ah *ah)
@@ -230,7 +234,7 @@
 
 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }
 
 static void put_qp_read(struct ib_qp *qp)
@@ -240,7 +244,7 @@
 
 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }
 
 static void put_srq_read(struct ib_srq *srq)
@@ -837,7 +841,6 @@
 err_copy:
 	idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 
-
 err_free:
 	ib_destroy_cq(cq);
 
@@ -867,7 +870,7 @@
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
 	if (!cq)
 		return -EINVAL;
 
@@ -875,11 +878,10 @@
 	if (ret)
 		goto out;
 
-	memset(&resp, 0, sizeof resp);
 	resp.cqe = cq->cqe;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
+			 &resp, sizeof resp.cqe))
 		ret = -EFAULT;
 
 out:
@@ -894,7 +896,6 @@
 {
 	struct ib_uverbs_poll_cq       cmd;
 	struct ib_uverbs_poll_cq_resp *resp;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 	struct ib_wc                  *wc;
 	int                            ret = 0;
@@ -915,16 +916,15 @@
 		goto out_wc;
 	}
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj) {
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq) {
 		ret = -EINVAL;
 		goto out;
 	}
-	cq = uobj->object;
 
 	resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	for (i = 0; i < resp->count; i++) {
 		resp->wc[i].wr_id 	   = wc[i].wr_id;
@@ -959,21 +959,19 @@
 				int out_len)
 {
 	struct ib_uverbs_req_notify_cq cmd;
-	struct ib_uobject	      *uobj;
 	struct ib_cq                  *cq;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-	if (!uobj)
+	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+	if (!cq)
 		return -EINVAL;
-	cq = uobj->object;
 
 	ib_req_notify_cq(cq, cmd.solicited_only ?
 			 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-	put_uobj_read(uobj);
+	put_cq_read(cq);
 
 	return in_len;
 }
@@ -1064,9 +1062,9 @@
 
 	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
 	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
 		ret = -EINVAL;
@@ -1274,6 +1272,7 @@
 			    int out_len)
 {
 	struct ib_uverbs_modify_qp cmd;
+	struct ib_udata            udata;
 	struct ib_qp              *qp;
 	struct ib_qp_attr         *attr;
 	int                        ret;
@@ -1281,6 +1280,9 @@
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
@@ -1337,7 +1339,7 @@
 	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
-	ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
 
 	put_qp_read(qp);
 
@@ -1674,7 +1676,6 @@
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -1724,7 +1725,6 @@
 				break;
 		}
 
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
 		ret = -EFAULT;
@@ -2055,6 +2055,7 @@
 			     int out_len)
 {
 	struct ib_uverbs_modify_srq cmd;
+	struct ib_udata             udata;
 	struct ib_srq              *srq;
 	struct ib_srq_attr          attr;
 	int                         ret;
@@ -2062,6 +2063,9 @@
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+		   out_len);
+
 	srq = idr_read_srq(cmd.srq_handle, file->ucontext);
 	if (!srq)
 		return -EINVAL;
@@ -2069,7 +2073,7 @@
 	attr.max_wr    = cmd.max_wr;
 	attr.srq_limit = cmd.srq_limit;
 
-	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+	ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
 
 	put_srq_read(srq);
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 468999c..8b5dd36 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -79,6 +79,23 @@
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+	switch (node_type) {
+	case RDMA_NODE_IB_CA:
+	case RDMA_NODE_IB_SWITCH:
+	case RDMA_NODE_IB_ROUTER:
+		return RDMA_TRANSPORT_IB;
+	case RDMA_NODE_RNIC:
+		return RDMA_TRANSPORT_IWARP;
+	default:
+		BUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
@@ -231,7 +248,7 @@
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
 {
-	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -547,7 +564,7 @@
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644
index 0000000..06964c4
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+	c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644
index 0000000..809cb14
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+	tristate "Ammasso 1100 HCA support"
+	depends on PCI && INET && INFINIBAND
+	---help---
+	  This is a low-level driver for the Ammasso 1100 host
+	  channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+	bool "Verbose debugging output"
+	depends on INFINIBAND_AMSO1100
+	default n
+	---help---
+	  This option causes the amso1100 driver to produce a bunch of
+	  debug messages.  Select this if you are developing the driver
+	  or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644
index 0000000..9e9120f
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -0,0 +1,1255 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1;		/* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+	{ PCI_DEVICE(0x18b8, 0xb001) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+		"IRQ %u\n", netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+		netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+	struct net_device *netdev = c2_port->netdev;
+
+	if (netdev->mtu > RX_BUF_SIZE)
+		c2_port->rx_buf_size =
+		    netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+		    NET_IP_ALIGN;
+	else
+		c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+	struct c2_tx_desc *tx_desc;
+	struct c2_txp_desc __iomem *txp_desc;
+	struct c2_element *elem;
+	int i;
+
+	tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+	if (!tx_ring->start)
+		return -ENOMEM;
+
+	elem = tx_ring->start;
+	tx_desc = vaddr;
+	txp_desc = mmio_txp_ring;
+	for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+		tx_desc->len = 0;
+		tx_desc->status = 0;
+
+		/* Set TXP_HTXD_UNINIT */
+		__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+			     (void __iomem *) txp_desc + C2_TXP_ADDR);
+		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+		__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = tx_desc;
+		elem->hw_desc = txp_desc;
+
+		if (i == tx_ring->count - 1) {
+			elem->next = tx_ring->start;
+			tx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			tx_desc->next_offset =
+			    base + (i + 1) * sizeof(*tx_desc);
+		}
+	}
+
+	tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+	return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+			    dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_desc __iomem *rxp_desc;
+	struct c2_element *elem;
+	int i;
+
+	rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+	if (!rx_ring->start)
+		return -ENOMEM;
+
+	elem = rx_ring->start;
+	rx_desc = vaddr;
+	rxp_desc = mmio_rxp_ring;
+	for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+		rx_desc->len = 0;
+		rx_desc->status = 0;
+
+		/* Set RXP_HRXD_UNINIT */
+		__raw_writew(cpu_to_be16(RXP_HRXD_OK),
+		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+		elem->skb = NULL;
+		elem->ht_desc = rx_desc;
+		elem->hw_desc = rxp_desc;
+
+		if (i == rx_ring->count - 1) {
+			elem->next = rx_ring->start;
+			rx_desc->next_offset = base;
+		} else {
+			elem->next = elem + 1;
+			rx_desc->next_offset =
+			    base + (i + 1) * sizeof(*rx_desc);
+		}
+	}
+
+	rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+	return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	struct c2_rxp_hdr *rxp_hdr;
+
+	skb = dev_alloc_skb(c2_port->rx_buf_size);
+	if (unlikely(!skb)) {
+		pr_debug("%s: out of memory for receive\n",
+			c2_port->netdev->name);
+		return -ENOMEM;
+	}
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(skb->data, 0, sizeof(*rxp_hdr));
+
+	skb->dev = c2_port->netdev;
+
+	maplen = c2_port->rx_buf_size;
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen,
+			   PCI_DMA_FROMDEVICE);
+
+	/* Set the sk_buff RXP_header to RXP_HRXD_READY */
+	rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+	rxp_hdr->flags = RXP_HRXD_READY;
+
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+	rx_desc->len = maplen;
+
+	return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive:  rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	int ret = 0;
+
+	elem = rx_ring->start;
+	do {
+		if (c2_rx_alloc(c2_port, elem)) {
+			ret = 1;
+			break;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+
+	rx_ring->to_clean = rx_ring->start;
+	return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+
+	elem = rx_ring->start;
+	do {
+		rx_desc = elem->ht_desc;
+		rx_desc->len = 0;
+
+		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+			     elem->hw_desc + C2_RXP_ADDR);
+		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+			     elem->hw_desc + C2_RXP_FLAGS);
+
+		if (elem->skb) {
+			pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+					 elem->maplen, PCI_DMA_FROMDEVICE);
+			dev_kfree_skb(elem->skb);
+			elem->skb = NULL;
+		}
+	} while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+	struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+	tx_desc->len = 0;
+
+	pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+			 PCI_DMA_TODEVICE);
+
+	if (elem->skb) {
+		dev_kfree_skb_any(elem->skb);
+		elem->skb = NULL;
+	}
+
+	return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+	int retry;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	elem = tx_ring->start;
+
+	do {
+		retry = 0;
+		do {
+			txp_htxd.flags =
+			    readw(elem->hw_desc + C2_TXP_FLAGS);
+
+			if (txp_htxd.flags == TXP_HTXD_READY) {
+				retry = 1;
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(0,
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+					     elem->hw_desc + C2_TXP_FLAGS);
+				c2_port->netstats.tx_dropped++;
+				break;
+			} else {
+				__raw_writew(0,
+					     elem->hw_desc + C2_TXP_LEN);
+				__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+					     elem->hw_desc + C2_TXP_ADDR);
+				__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+					     elem->hw_desc + C2_TXP_FLAGS);
+			}
+
+			c2_tx_free(c2_port->c2dev, elem);
+
+		} while ((elem = elem->next) != tx_ring->start);
+	} while (retry);
+
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+	if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(c2_port->netdev);
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	struct c2_txp_desc txp_htxd;
+
+	spin_lock(&c2_port->tx_lock);
+
+	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+	     elem = elem->next) {
+		txp_htxd.flags =
+		    be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+
+		if (txp_htxd.flags != TXP_HTXD_DONE)
+			break;
+
+		if (netif_msg_tx_done(c2_port)) {
+			/* PCI reads are expensive in fast path */
+			txp_htxd.len =
+			    be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+			pr_debug("%s: tx done slot %3Zu status 0x%x len "
+				"%5u bytes\n",
+				netdev->name, elem - tx_ring->start,
+				txp_htxd.flags, txp_htxd.len);
+		}
+
+		c2_tx_free(c2dev, elem);
+		++(c2_port->tx_avail);
+	}
+
+	tx_ring->to_clean = elem;
+
+	if (netif_queue_stopped(netdev)
+	    && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+		netif_wake_queue(netdev);
+
+	spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+	struct c2_rx_desc *rx_desc = elem->ht_desc;
+	struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+	if (rxp_hdr->status != RXP_HRXD_OK ||
+	    rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+		pr_debug("BAD RXP_HRXD\n");
+		pr_debug("  rx_desc : %p\n", rx_desc);
+		pr_debug("    index : %Zu\n",
+			elem - c2_port->rx_ring.start);
+		pr_debug("    len   : %u\n", rx_desc->len);
+		pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
+			(void *) __pa((unsigned long) rxp_hdr));
+		pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
+		pr_debug("    status: 0x%x\n", rxp_hdr->status);
+		pr_debug("    len   : %u\n", rxp_hdr->len);
+		pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
+	}
+
+	/* Setup the skb for reuse since we're dropping this pkt */
+	elem->skb->tail = elem->skb->data = elem->skb->head;
+
+	/* Zero out the rxp hdr in the sk_buff */
+	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+	/* Write the descriptor to the adapter's rx ring */
+	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+	__raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+		     elem->hw_desc + C2_RXP_LEN);
+	__raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+	pr_debug("packet dropped\n");
+	c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *rx_ring = &c2_port->rx_ring;
+	struct c2_element *elem;
+	struct c2_rx_desc *rx_desc;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen, buflen;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+
+	/* Begin where we left off */
+	rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+	for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+	     elem = elem->next) {
+		rx_desc = elem->ht_desc;
+		mapaddr = elem->mapaddr;
+		maplen = elem->maplen;
+		skb = elem->skb;
+		rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+		if (rxp_hdr->flags != RXP_HRXD_DONE)
+			break;
+		buflen = rxp_hdr->len;
+
+		/* Sanity check the RXP header */
+		if (rxp_hdr->status != RXP_HRXD_OK ||
+		    buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/*
+		 * Allocate and map a new skb for replenishing the host
+		 * RX desc
+		 */
+		if (c2_rx_alloc(c2_port, elem)) {
+			c2_rx_error(c2_port, elem);
+			continue;
+		}
+
+		/* Unmap the old skb */
+		pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+				 PCI_DMA_FROMDEVICE);
+
+		prefetch(skb->data);
+
+		/*
+		 * Skip past the leading 8 bytes comprising of the
+		 * "struct c2_rxp_hdr", prepended by the adapter
+		 * to the usual Ethernet header ("struct ethhdr"),
+		 * to the start of the raw Ethernet packet.
+		 *
+		 * Fix up the various fields in the sk_buff before
+		 * passing it up to netif_rx(). The transfer size
+		 * (in bytes) specified by the adapter len field of
+		 * the "struct rxp_hdr_t" does NOT include the
+		 * "sizeof(struct c2_rxp_hdr)".
+		 */
+		skb->data += sizeof(*rxp_hdr);
+		skb->tail = skb->data + buflen;
+		skb->len = buflen;
+		skb->dev = netdev;
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		netif_rx(skb);
+
+		netdev->last_rx = jiffies;
+		c2_port->netstats.rx_packets++;
+		c2_port->netstats.rx_bytes += buflen;
+	}
+
+	/* Save where we left off */
+	rx_ring->to_clean = elem;
+	c2dev->cur_rx = elem - rx_ring->start;
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int netisr0, dmaisr;
+	int handled = 0;
+	struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+	/* Process CCILNET interrupts */
+	netisr0 = readl(c2dev->regs + C2_NISR0);
+	if (netisr0) {
+
+		/*
+		 * There is an issue with the firmware that always
+		 * provides the status of RX for both TX & RX
+		 * interrupts.  So process both queues here.
+		 */
+		c2_rx_interrupt(c2dev->netdev);
+		c2_tx_interrupt(c2dev->netdev);
+
+		/* Clear the interrupt */
+		writel(netisr0, c2dev->regs + C2_NISR0);
+		handled++;
+	}
+
+	/* Process RNIC interrupts */
+	dmaisr = readl(c2dev->regs + C2_DISR);
+	if (dmaisr) {
+		writel(dmaisr, c2dev->regs + C2_DISR);
+		c2_rnic_interrupt(c2dev);
+		handled++;
+	}
+
+	if (handled) {
+		return IRQ_HANDLED;
+	} else {
+		return IRQ_NONE;
+	}
+}
+
+static int c2_up(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_element *elem;
+	struct c2_rxp_hdr *rxp_hdr;
+	struct in_device *in_dev;
+	size_t rx_size, tx_size;
+	int ret, i;
+	unsigned int netimr0;
+
+	if (netif_msg_ifup(c2_port))
+		pr_debug("%s: enabling interface\n", netdev->name);
+
+	/* Set the Rx buffer size based on MTU */
+	c2_set_rxbufsize(c2_port);
+
+	/* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+	rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+	tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+	c2_port->mem_size = tx_size + rx_size;
+	c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+					    &c2_port->dma);
+	if (c2_port->mem == NULL) {
+		pr_debug("Unable to allocate memory for "
+			"host descriptor rings\n");
+		return -ENOMEM;
+	}
+
+	memset(c2_port->mem, 0, c2_port->mem_size);
+
+	/* Create the Rx host descriptor ring */
+	if ((ret =
+	     c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+			      c2dev->mmio_rxp_ring))) {
+		pr_debug("Unable to create RX ring\n");
+		goto bail0;
+	}
+
+	/* Allocate Rx buffers for the host descriptor ring */
+	if (c2_rx_fill(c2_port)) {
+		pr_debug("Unable to fill RX ring\n");
+		goto bail1;
+	}
+
+	/* Create the Tx host descriptor ring */
+	if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+				    c2_port->dma + rx_size,
+				    c2dev->mmio_txp_ring))) {
+		pr_debug("Unable to create TX ring\n");
+		goto bail1;
+	}
+
+	/* Set the TX pointer to where we left off */
+	c2_port->tx_avail = c2_port->tx_ring.count - 1;
+	c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+	    c2_port->tx_ring.start + c2dev->cur_tx;
+
+	/* missing: Initialize MAC */
+
+	BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+	for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+	     i++, elem++) {
+		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+		rxp_hdr->flags = 0;
+		__raw_writew(cpu_to_be16(RXP_HRXD_READY),
+			     elem->hw_desc + C2_RXP_FLAGS);
+	}
+
+	/* Enable network packets */
+	netif_start_queue(netdev);
+
+	/* Enable IRQ */
+	writel(0, c2dev->regs + C2_IDIS);
+	netimr0 = readl(c2dev->regs + C2_NIMR0);
+	netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+	writel(netimr0, c2dev->regs + C2_NIMR0);
+
+	/* Tell the stack to ignore arp requests for ipaddrs bound to
+	 * other interfaces.  This is needed to prevent the host stack
+	 * from responding to arp requests to the ipaddr bound on the
+	 * rdma interface.
+	 */
+	in_dev = in_dev_get(netdev);
+	in_dev->cnf.arp_ignore = 1;
+	in_dev_put(in_dev);
+
+	return 0;
+
+      bail1:
+	c2_rx_clean(c2_port);
+	kfree(c2_port->rx_ring.start);
+
+      bail0:
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+
+	if (netif_msg_ifdown(c2_port))
+		pr_debug("%s: disabling interface\n",
+			netdev->name);
+
+	/* Wait for all the queued packets to get sent */
+	c2_tx_interrupt(netdev);
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Disable IRQs by clearing the interrupt mask */
+	writel(1, c2dev->regs + C2_IDIS);
+	writel(0, c2dev->regs + C2_NIMR0);
+
+	/* missing: Stop transmitter */
+
+	/* missing: Stop receiver */
+
+	/* Reset the adapter, ensures the driver is in sync with the RXP */
+	c2_reset(c2_port);
+
+	/* missing: Turn off LEDs here */
+
+	/* Free all buffers in the host descriptor rings */
+	c2_tx_clean(c2_port);
+	c2_rx_clean(c2_port);
+
+	/* Free the host descriptor rings */
+	kfree(c2_port->rx_ring.start);
+	kfree(c2_port->tx_ring.start);
+	pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+			    c2_port->dma);
+
+	return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+	struct c2_dev *c2dev = c2_port->c2dev;
+	unsigned int cur_rx = c2dev->cur_rx;
+
+	/* Tell the hardware to quiesce */
+	C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+	/*
+	 * The hardware will reset the C2_PCI_HRX_QUI bit once
+	 * the RXP is quiesced.  Wait 2 seconds for this.
+	 */
+	ssleep(2);
+
+	cur_rx = C2_GET_CUR_RX(c2dev);
+
+	if (cur_rx & C2_PCI_HRX_QUI)
+		pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+	cur_rx &= ~C2_PCI_HRX_QUI;
+
+	c2dev->cur_rx = cur_rx;
+
+	pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+	struct c2_dev *c2dev = c2_port->c2dev;
+	struct c2_ring *tx_ring = &c2_port->tx_ring;
+	struct c2_element *elem;
+	dma_addr_t mapaddr;
+	u32 maplen;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+	if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+		netif_stop_queue(netdev);
+		spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+		pr_debug("%s: Tx ring full when queue awake!\n",
+			netdev->name);
+		return NETDEV_TX_BUSY;
+	}
+
+	maplen = skb_headlen(skb);
+	mapaddr =
+	    pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+	elem = tx_ring->to_use;
+	elem->skb = skb;
+	elem->mapaddr = mapaddr;
+	elem->maplen = maplen;
+
+	/* Tell HW to xmit */
+	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
+	__raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
+	__raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+
+	c2_port->netstats.tx_packets++;
+	c2_port->netstats.tx_bytes += maplen;
+
+	/* Loop thru additional data fragments and queue them */
+	if (skb_shinfo(skb)->nr_frags) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			maplen = frag->size;
+			mapaddr =
+			    pci_map_page(c2dev->pcidev, frag->page,
+					 frag->page_offset, maplen,
+					 PCI_DMA_TODEVICE);
+
+			elem = elem->next;
+			elem->skb = NULL;
+			elem->mapaddr = mapaddr;
+			elem->maplen = maplen;
+
+			/* Tell HW to xmit */
+			__raw_writeq(cpu_to_be64(mapaddr),
+				     elem->hw_desc + C2_TXP_ADDR);
+			__raw_writew(cpu_to_be16(maplen),
+				     elem->hw_desc + C2_TXP_LEN);
+			__raw_writew(cpu_to_be16(TXP_HTXD_READY),
+				     elem->hw_desc + C2_TXP_FLAGS);
+
+			c2_port->netstats.tx_packets++;
+			c2_port->netstats.tx_bytes += maplen;
+		}
+	}
+
+	tx_ring->to_use = elem->next;
+	c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+	if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+		netif_stop_queue(netdev);
+		if (netif_msg_tx_queued(c2_port))
+			pr_debug("%s: transmit queue full\n",
+				netdev->name);
+	}
+
+	spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+	netdev->trans_start = jiffies;
+
+	return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+	struct c2_port *c2_port = netdev_priv(netdev);
+
+	if (netif_msg_timer(c2_port))
+		pr_debug("%s: tx timeout\n", netdev->name);
+
+	c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev)) {
+		c2_down(netdev);
+
+		c2_up(netdev);
+	}
+
+	return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+				     void __iomem * mmio_addr)
+{
+	struct c2_port *c2_port = NULL;
+	struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+	if (!netdev) {
+		pr_debug("c2_port etherdev alloc failed");
+		return NULL;
+	}
+
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	netdev->open = c2_up;
+	netdev->stop = c2_down;
+	netdev->hard_start_xmit = c2_xmit_frame;
+	netdev->get_stats = c2_get_stats;
+	netdev->tx_timeout = c2_tx_timeout;
+	netdev->change_mtu = c2_change_mtu;
+	netdev->watchdog_timeo = C2_TX_TIMEOUT;
+	netdev->irq = c2dev->pcidev->irq;
+
+	c2_port = netdev_priv(netdev);
+	c2_port->netdev = netdev;
+	c2_port->c2dev = c2dev;
+	c2_port->msg_enable = netif_msg_init(debug, default_msg);
+	c2_port->tx_ring.count = C2_NUM_TX_DESC;
+	c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+	spin_lock_init(&c2_port->tx_lock);
+
+	/* Copy our 48-bit ethernet hardware address */
+	memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+	/* Validate the MAC address */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		pr_debug("Invalid MAC Address\n");
+		c2_print_macaddr(netdev);
+		free_netdev(netdev);
+		return NULL;
+	}
+
+	c2dev->netdev = netdev;
+
+	return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+			      const struct pci_device_id *ent)
+{
+	int ret = 0, i;
+	unsigned long reg0_start, reg0_flags, reg0_len;
+	unsigned long reg2_start, reg2_flags, reg2_len;
+	unsigned long reg4_start, reg4_flags, reg4_len;
+	unsigned kva_map_size;
+	struct net_device *netdev = NULL;
+	struct c2_dev *c2dev = NULL;
+	void __iomem *mmio_regs = NULL;
+
+	printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+		DRV_VERSION);
+
+	/* Enable PCI device */
+	ret = pci_enable_device(pcidev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+			pci_name(pcidev));
+		goto bail0;
+	}
+
+	reg0_start = pci_resource_start(pcidev, BAR_0);
+	reg0_len = pci_resource_len(pcidev, BAR_0);
+	reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+	reg2_start = pci_resource_start(pcidev, BAR_2);
+	reg2_len = pci_resource_len(pcidev, BAR_2);
+	reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+	reg4_start = pci_resource_start(pcidev, BAR_4);
+	reg4_len = pci_resource_len(pcidev, BAR_4);
+	reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+	pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+	pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+	pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+	/* Make sure PCI base addr are MMIO */
+	if (!(reg0_flags & IORESOURCE_MEM) ||
+	    !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Check for weird/broken PCI region reporting */
+	if ((reg0_len < C2_REG0_SIZE) ||
+	    (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+		printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	ret = pci_request_regions(pcidev, DRV_NAME);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: Unable to request regions\n",
+			pci_name(pcidev));
+		goto bail1;
+	}
+
+	if ((sizeof(dma_addr_t) > 4)) {
+		ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "64b DMA configuration failed\n");
+			goto bail2;
+		}
+	} else {
+		ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "32b DMA configuration failed\n");
+			goto bail2;
+		}
+	}
+
+	/* Enables bus-mastering on the device */
+	pci_set_master(pcidev);
+
+	/* Remap the adapter PCI registers in BAR4 */
+	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				    sizeof(struct c2_adapter_pci_regs));
+	if (mmio_regs == 0UL) {
+		printk(KERN_ERR PFX
+			"Unable to remap adapter PCI registers in BAR4\n");
+		ret = -EIO;
+		goto bail2;
+	}
+
+	/* Validate PCI regs magic */
+	for (i = 0; i < sizeof(c2_magic); i++) {
+		if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+			printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+				"[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+			       "utility to update your boot loader\n",
+				i + 1, sizeof(c2_magic),
+				readb(mmio_regs + C2_REGS_MAGIC + i),
+				c2_magic[i]);
+			printk(KERN_ERR PFX "Adapter not claimed\n");
+			iounmap(mmio_regs);
+			ret = -EIO;
+			goto bail2;
+		}
+	}
+
+	/* Validate the adapter version */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+		printk(KERN_ERR PFX "Version mismatch "
+			"[fw=%u, c2=%u], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+			C2_VERSION);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Validate the adapter IVN */
+	if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+		       "the OpenIB device support kit. "
+		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+			be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
+			C2_IVN);
+		ret = -EINVAL;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	/* Allocate hardware structure */
+	c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+	if (!c2dev) {
+		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+			pci_name(pcidev));
+		ret = -ENOMEM;
+		iounmap(mmio_regs);
+		goto bail2;
+	}
+
+	memset(c2dev, 0, sizeof(*c2dev));
+	spin_lock_init(&c2dev->lock);
+	c2dev->pcidev = pcidev;
+	c2dev->cur_tx = 0;
+
+	/* Get the last RX index */
+	c2dev->cur_rx =
+	    (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+	     0xffffc000) / sizeof(struct c2_rxp_desc);
+
+	/* Request an interrupt line for the driver */
+	ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+			pci_name(pcidev), pcidev->irq);
+		iounmap(mmio_regs);
+		goto bail3;
+	}
+
+	/* Set driver specific data */
+	pci_set_drvdata(pcidev, c2dev);
+
+	/* Initialize network device */
+	if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+		iounmap(mmio_regs);
+		goto bail4;
+	}
+
+	/* Save off the actual size prior to unmapping mmio_regs */
+	kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+	/* Unmap the adapter PCI registers in BAR4 */
+	iounmap(mmio_regs);
+
+	/* Register network device */
+	ret = register_netdev(netdev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+			ret);
+		goto bail5;
+	}
+
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+
+	/* Remap the adapter HRXDQ PA space to kernel VA space */
+	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+					       C2_RXP_HRXDQ_SIZE);
+	if (c2dev->mmio_rxp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+		ret = -EIO;
+		goto bail6;
+	}
+
+	/* Remap the adapter HTXDQ PA space to kernel VA space */
+	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+					       C2_TXP_HTXDQ_SIZE);
+	if (c2dev->mmio_txp_ring == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+		ret = -EIO;
+		goto bail7;
+	}
+
+	/* Save off the current RX index in the last 4 bytes of the TXP Ring */
+	C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
+	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+	if (c2dev->regs == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR0\n");
+		ret = -EIO;
+		goto bail8;
+	}
+
+	/* Remap the PCI registers in adapter BAR4 to kernel VA space */
+	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+				     kva_map_size);
+	if (c2dev->kva == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR4\n");
+		ret = -EIO;
+		goto bail9;
+	}
+
+	/* Print out the MAC address */
+	c2_print_macaddr(netdev);
+
+	ret = c2_rnic_init(c2dev);
+	if (ret) {
+		printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+		goto bail10;
+	}
+
+	c2_register_device(c2dev);
+
+	return 0;
+
+ bail10:
+	iounmap(c2dev->kva);
+
+ bail9:
+	iounmap(c2dev->regs);
+
+ bail8:
+	iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+	iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+	unregister_netdev(netdev);
+
+ bail5:
+	free_netdev(netdev);
+
+ bail4:
+	free_irq(pcidev->irq, c2dev);
+
+ bail3:
+	ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+	pci_release_regions(pcidev);
+
+ bail1:
+	pci_disable_device(pcidev);
+
+ bail0:
+	return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+	struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+	struct net_device *netdev = c2dev->netdev;
+
+	/* Unregister with OpenIB */
+	c2_unregister_device(c2dev);
+
+	/* Clean up the RNIC resources */
+	c2_rnic_term(c2dev);
+
+	/* Remove network device from the kernel */
+	unregister_netdev(netdev);
+
+	/* Free network device */
+	free_netdev(netdev);
+
+	/* Free the interrupt line */
+	free_irq(pcidev->irq, c2dev);
+
+	/* missing: Turn LEDs off here */
+
+	/* Unmap adapter PA space */
+	iounmap(c2dev->kva);
+	iounmap(c2dev->regs);
+	iounmap(c2dev->mmio_txp_ring);
+	iounmap(c2dev->mmio_rxp_ring);
+
+	/* Free the hardware structure */
+	ib_dealloc_device(&c2dev->ibdev);
+
+	/* Release reserved PCI I/O and memory resources */
+	pci_release_regions(pcidev);
+
+	/* Disable PCI device */
+	pci_disable_device(pcidev);
+
+	/* Clear driver specific data */
+	pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = c2_pci_table,
+	.probe = c2_probe,
+	.remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+	return pci_module_init(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+	pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644
index 0000000..1b17dcd
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT	    (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+	0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+	C2_REGS_MAGIC = 0x0000,
+	C2_REGS_VERS = 0x0008,
+	C2_REGS_IVN = 0x000C,
+	C2_REGS_PCI_WINSIZE = 0x0010,
+	C2_REGS_Q0_QSIZE = 0x0014,
+	C2_REGS_Q0_MSGSIZE = 0x0018,
+	C2_REGS_Q0_POOLSTART = 0x001C,
+	C2_REGS_Q0_SHARED = 0x0020,
+	C2_REGS_Q1_QSIZE = 0x0024,
+	C2_REGS_Q1_MSGSIZE = 0x0028,
+	C2_REGS_Q1_SHARED = 0x0030,
+	C2_REGS_Q2_QSIZE = 0x0034,
+	C2_REGS_Q2_MSGSIZE = 0x0038,
+	C2_REGS_Q2_SHARED = 0x0040,
+	C2_REGS_ENADDR = 0x004C,
+	C2_REGS_RDMA_ENADDR = 0x0054,
+	C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+	char reg_magic[8];
+	u32 version;
+	u32 ivn;
+	u32 pci_window_size;
+	u32 q0_q_size;
+	u32 q0_msg_size;
+	u32 q0_pool_start;
+	u32 q0_shared;
+	u32 q1_q_size;
+	u32 q1_msg_size;
+	u32 q1_pool_start;
+	u32 q1_shared;
+	u32 q2_q_size;
+	u32 q2_msg_size;
+	u32 q2_pool_start;
+	u32 q2_shared;
+	u32 log_start;
+	u32 log_size;
+	u8 host_enaddr[8];
+	u8 rdma_enaddr[8];
+	u32 crash_entry;
+	u32 crash_ready[2];
+	u32 fw_txd_cur;
+	u32 fw_hrxd_cur;
+	u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+	C2_HISR = 0x0000,
+	C2_DISR = 0x0004,
+	C2_HIMR = 0x0008,
+	C2_DIMR = 0x000C,
+	C2_NISR0 = 0x0010,
+	C2_NISR1 = 0x0014,
+	C2_NIMR0 = 0x0018,
+	C2_NIMR1 = 0x001C,
+	C2_IDIS = 0x0020,
+};
+
+enum {
+	C2_PCI_HRX_INT = 1 << 8,
+	C2_PCI_HTX_INT = 1 << 17,
+	C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+	u32 hostisr;
+	u32 dmaisr;
+	u32 hostimr;
+	u32 dmaimr;
+	u32 netisr0;
+	u32 netisr1;
+	u32 netimr0;
+	u32 netimr1;
+	u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+	TXP_HTXD_DONE = 0,
+	TXP_HTXD_READY = 1 << 0,
+	TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+	RXP_HRXD_UNINIT = 0,
+	RXP_HRXD_READY = 1 << 0,
+	RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+	RXP_HRXD_ZERO = 0,
+	RXP_HRXD_OK = 1 << 0,
+	RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+	C2_TXP_FLAGS = 0x0000,
+	C2_TXP_LEN = 0x0002,
+	C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+	C2_RXP_FLAGS = 0x0000,
+	C2_RXP_STATUS = 0x0002,
+	C2_RXP_COUNT = 0x0004,
+	C2_RXP_LEN = 0x0006,
+	C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+	u16 flags;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+	u16 flags;
+	u16 status;
+	u16 count;
+	u16 len;
+	u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+	u16 flags;
+	u16 status;
+	u16 len;
+	u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+	u32 len;
+	u32 status;
+	dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_array {
+	struct {
+		void **page;
+		int used;
+	} *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+	struct sp_chunk *next;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 head;
+	u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+	u32 last;
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
+struct c2_qp_table {
+	struct idr idr;
+	spinlock_t lock;
+	int last;
+};
+
+struct c2_element {
+	struct c2_element *next;
+	void *ht_desc;		/* host     descriptor */
+	void __iomem *hw_desc;	/* hardware descriptor */
+	struct sk_buff *skb;
+	dma_addr_t mapaddr;
+	u32 maplen;
+};
+
+struct c2_ring {
+	struct c2_element *to_clean;
+	struct c2_element *to_use;
+	struct c2_element *start;
+	unsigned long count;
+};
+
+struct c2_dev {
+	struct ib_device ibdev;
+	void __iomem *regs;
+	void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+	void __iomem *mmio_rxp_ring;
+	spinlock_t lock;
+	struct pci_dev *pcidev;
+	struct net_device *netdev;
+	struct net_device *pseudo_netdev;
+	unsigned int cur_tx;
+	unsigned int cur_rx;
+	u32 adapter_handle;
+	int device_cap_flags;
+	void __iomem *kva;	/* KVA device memory */
+	unsigned long pa;	/* PA device memory */
+	void **qptr_array;
+
+	kmem_cache_t *host_msg_cache;
+
+	struct list_head cca_link;		/* adapter list */
+	struct list_head eh_wakeup_list;	/* event wakeup list */
+	wait_queue_head_t req_vq_wo;
+
+	/* Cached RNIC properties */
+	struct ib_device_attr props;
+
+	struct c2_pd_table pd_table;
+	struct c2_qp_table qp_table;
+	int ports;		/* num of GigE ports */
+	int devnum;
+	spinlock_t vqlock;	/* sync vbs req MQ */
+
+	/* Verbs Queues */
+	struct c2_mq req_vq;	/* Verbs Request MQ */
+	struct c2_mq rep_vq;	/* Verbs Reply MQ */
+	struct c2_mq aeq;	/* Async Events MQ */
+
+	/* Kernel client MQs */
+	struct sp_chunk *kern_mqsp_pool;
+
+	/* Device updates these values when posting messages to a host
+	 * target queue */
+	u16 req_vq_shared;
+	u16 rep_vq_shared;
+	u16 aeq_shared;
+	u16 irq_claimed;
+
+	/*
+	 * Shared host target pages for user-accessible MQs.
+	 */
+	int hthead;		/* index of first free entry */
+	void *htpages;		/* kernel vaddr */
+	int htlen;		/* length of htpages memory */
+	void *htuva;		/* user mapped vaddr */
+	spinlock_t htlock;	/* serialize allocation */
+
+	u64 adapter_hint_uva;	/* access to the activity FIFO */
+
+	//	spinlock_t aeq_lock;
+	//	spinlock_t rnic_lock;
+
+	u16 *hint_count;
+	dma_addr_t hint_count_dma;
+	u16 hints_read;
+
+	int init;		/* TRUE if it's ready */
+	char ae_cache_name[16];
+	char vq_cache_name[16];
+};
+
+struct c2_port {
+	u32 msg_enable;
+	struct c2_dev *c2dev;
+	struct net_device *netdev;
+
+	spinlock_t tx_lock;
+	u32 tx_avail;
+	struct c2_ring tx_ring;
+	struct c2_ring rx_ring;
+
+	void *mem;		/* PCI memory for host rings */
+	dma_addr_t dma;
+	unsigned long mem_size;
+
+	u32 rx_buf_size;
+
+	struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT	0x100
+#define PCI_BAR0_ADAPTER_HINT	0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED 	0x01
+#define CQ_WAIT_FOR_DMA	0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+	u64 ret = readl(addr + 4);
+	ret <<= 32;
+	ret |= readl(addr);
+
+	return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+	__raw_writel((u32) (val), addr);
+	__raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+	__raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+	be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+	switch (c2_wr_get_result(reply)) {
+	case C2_OK:
+		return 0;
+	case CCERR_NO_BUFS:
+	case CCERR_INSUFFICIENT_RESOURCES:
+	case CCERR_ZERO_RDMA_READ_RESOURCES:
+		return -ENOMEM;
+	case CCERR_MR_IN_USE:
+	case CCERR_QP_IN_USE:
+		return -EBUSY;
+	case CCERR_ADDR_IN_USE:
+		return -EADDRINUSE;
+	case CCERR_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	case CCERR_CONN_RESET:
+		return -ECONNRESET;
+	case CCERR_NOT_IMPLEMENTED:
+	case CCERR_INVALID_WQE:
+		return -ENOSYS;
+	case CCERR_QP_NOT_PRIVILEGED:
+		return -EPERM;
+	case CCERR_STACK_ERROR:
+		return -EPROTO;
+	case CCERR_ACCESS_VIOLATION:
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return -EFAULT;
+	case CCERR_STAG_STATE_NOT_INVALID:
+	case CCERR_INVALID_ADDRESS:
+	case CCERR_INVALID_CQ:
+	case CCERR_INVALID_EP:
+	case CCERR_INVALID_MODIFIER:
+	case CCERR_INVALID_MTU:
+	case CCERR_INVALID_PD_ID:
+	case CCERR_INVALID_QP:
+	case CCERR_INVALID_RNIC:
+	case CCERR_INVALID_STAG:
+		return -EINVAL;
+	default:
+		return -EAGAIN;
+	}
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+		       struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+			struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+				 int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+			struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+		      struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 				      int page_size, int pbl_depth, u32 length,
+ 				      u32 off, u64 *va, enum c2_acf acf,
+				      struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+			     struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+			  dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(u16 * mqsp);
+#endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
new file mode 100644
index 0000000..08f46c8
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+	switch (c2_status) {
+	case C2_CONN_STATUS_SUCCESS:
+		return 0;
+	case C2_CONN_STATUS_REJECTED:
+		return -ENETRESET;
+	case C2_CONN_STATUS_REFUSED:
+		return -ECONNREFUSED;
+	case C2_CONN_STATUS_TIMEDOUT:
+		return -ETIMEDOUT;
+	case C2_CONN_STATUS_NETUNREACH:
+		return -ENETUNREACH;
+	case C2_CONN_STATUS_HOSTUNREACH:
+		return -EHOSTUNREACH;
+	case C2_CONN_STATUS_INVALID_RNIC:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP:
+		return -EINVAL;
+	case C2_CONN_STATUS_INVALID_QP_STATE:
+		return -EINVAL;
+	case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+		return -EADDRNOTAVAIL;
+	default:
+		printk(KERN_ERR PFX
+		       "%s - Unable to convert CM status: %d\n",
+		       __FUNCTION__, c2_status);
+		return -EIO;
+	}
+}
+
+#ifdef DEBUG
+static const char* to_event_str(int event)
+{
+	static const char* event_str[] = {
+		"CCAE_REMOTE_SHUTDOWN",
+		"CCAE_ACTIVE_CONNECT_RESULTS",
+		"CCAE_CONNECTION_REQUEST",
+		"CCAE_LLP_CLOSE_COMPLETE",
+		"CCAE_TERMINATE_MESSAGE_RECEIVED",
+		"CCAE_LLP_CONNECTION_RESET",
+		"CCAE_LLP_CONNECTION_LOST",
+		"CCAE_LLP_SEGMENT_SIZE_INVALID",
+		"CCAE_LLP_INVALID_CRC",
+		"CCAE_LLP_BAD_FPDU",
+		"CCAE_INVALID_DDP_VERSION",
+		"CCAE_INVALID_RDMA_VERSION",
+		"CCAE_UNEXPECTED_OPCODE",
+		"CCAE_INVALID_DDP_QUEUE_NUMBER",
+		"CCAE_RDMA_READ_NOT_ENABLED",
+		"CCAE_RDMA_WRITE_NOT_ENABLED",
+		"CCAE_RDMA_READ_TOO_SMALL",
+		"CCAE_NO_L_BIT",
+		"CCAE_TAGGED_INVALID_STAG",
+		"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+		"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_TAGGED_INVALID_PD",
+		"CCAE_WRAP_ERROR",
+		"CCAE_BAD_CLOSE",
+		"CCAE_BAD_LLP_CLOSE",
+		"CCAE_INVALID_MSN_RANGE",
+		"CCAE_INVALID_MSN_GAP",
+		"CCAE_IRRQ_OVERFLOW",
+		"CCAE_IRRQ_MSN_GAP",
+		"CCAE_IRRQ_MSN_RANGE",
+		"CCAE_IRRQ_INVALID_STAG",
+		"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+		"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+		"CCAE_IRRQ_INVALID_PD",
+		"CCAE_IRRQ_WRAP_ERROR",
+		"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+		"CCAE_CQ_RQ_COMPLETION_ERROR",
+		"CCAE_QP_SRQ_WQE_ERROR",
+		"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+		"CCAE_CQ_OVERFLOW",
+		"CCAE_CQ_OPERATION_ERROR",
+		"CCAE_SRQ_LIMIT_REACHED",
+		"CCAE_QP_RQ_LIMIT_REACHED",
+		"CCAE_SRQ_CATASTROPHIC_ERROR",
+		"CCAE_RNIC_CATASTROPHIC_ERROR"
+	};
+
+	if (event < CCAE_REMOTE_SHUTDOWN ||
+	    event > CCAE_RNIC_CATASTROPHIC_ERROR)
+		return "<invalid event>";
+
+	event -= CCAE_REMOTE_SHUTDOWN;
+	return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+	switch (state) {
+	case C2_QP_STATE_IDLE:
+		return "C2_QP_STATE_IDLE";
+	case C2_QP_STATE_CONNECTING:
+		return "C2_QP_STATE_CONNECTING";
+	case C2_QP_STATE_RTS:
+		return "C2_QP_STATE_RTS";
+	case C2_QP_STATE_CLOSING:
+		return "C2_QP_STATE_CLOSING";
+	case C2_QP_STATE_TERMINATE:
+		return "C2_QP_STATE_TERMINATE";
+	case C2_QP_STATE_ERROR:
+		return "C2_QP_STATE_ERROR";
+	default:
+		return "<invalid QP state>";
+	};
+}
+#endif
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_mq *mq = c2dev->qptr_array[mq_index];
+	union c2wr *wr;
+	void *resource_user_context;
+	struct iw_cm_event cm_event;
+	struct ib_event ib_event;
+	enum c2_resource_indicator resource_indicator;
+	enum c2_event_id event_id;
+	unsigned long flags;
+	int status;
+
+	/*
+	 * retreive the message
+	 */
+	wr = c2_mq_consume(mq);
+	if (!wr)
+		return;
+
+	memset(&ib_event, 0, sizeof(ib_event));
+	memset(&cm_event, 0, sizeof(cm_event));
+
+	event_id = c2_wr_get_id(wr);
+	resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+	resource_user_context =
+	    (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+	status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+	pr_debug("event received c2_dev=%p, event_id=%d, "
+		"resource_indicator=%d, user_context=%p, status = %d\n",
+		c2dev, event_id, resource_indicator, resource_user_context,
+		status);
+
+	switch (resource_indicator) {
+	case C2_RES_IND_QP:{
+
+		struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+		struct iw_cm_id *cm_id = qp->cm_id;
+		struct c2wr_ae_active_connect_results *res;
+
+		if (!cm_id) {
+			pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+				qp);
+			goto ignore_it;
+		}
+		pr_debug("%s: event = %s, user_context=%llx, "
+			"resource_type=%x, "
+			"resource=%x, qp_state=%s\n",
+			__FUNCTION__,
+			to_event_str(event_id),
+			be64_to_cpu(wr->ae.ae_generic.user_context),
+			be32_to_cpu(wr->ae.ae_generic.resource_type),
+			be32_to_cpu(wr->ae.ae_generic.resource),
+			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+		c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+		switch (event_id) {
+		case CCAE_ACTIVE_CONNECT_RESULTS:
+			res = &wr->ae.ae_active_connect_results;
+			cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+			cm_event.local_addr.sin_addr.s_addr = res->laddr;
+			cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+			cm_event.local_addr.sin_port = res->lport;
+			cm_event.remote_addr.sin_port =	res->rport;
+			if (status == 0) {
+				cm_event.private_data_len =
+					be32_to_cpu(res->private_data_length);
+				cm_event.private_data = res->private_data;
+			} else {
+				spin_lock_irqsave(&qp->lock, flags);
+				if (qp->cm_id) {
+					qp->cm_id->rem_ref(qp->cm_id);
+					qp->cm_id = NULL;
+				}
+				spin_unlock_irqrestore(&qp->lock, flags);
+				cm_event.private_data_len = 0;
+				cm_event.private_data = NULL;
+			}
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		case CCAE_TERMINATE_MESSAGE_RECEIVED:
+		case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+			ib_event.device = &c2dev->ibdev;
+			ib_event.element.qp = &qp->ibqp;
+			ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+			if (qp->ibqp.event_handler)
+				qp->ibqp.event_handler(&ib_event,
+						       qp->ibqp.
+						       qp_context);
+			break;
+		case CCAE_BAD_CLOSE:
+		case CCAE_LLP_CLOSE_COMPLETE:
+		case CCAE_LLP_CONNECTION_RESET:
+		case CCAE_LLP_CONNECTION_LOST:
+			BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id) {
+				qp->cm_id->rem_ref(qp->cm_id);
+				qp->cm_id = NULL;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+			cm_event.event = IW_CM_EVENT_CLOSE;
+			cm_event.status = 0;
+			if (cm_id->event_handler)
+				cm_id->event_handler(cm_id, &cm_event);
+			break;
+		default:
+			BUG_ON(1);
+			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+				"CM_ID=%p\n",
+				__FUNCTION__, __LINE__,
+				event_id, qp, cm_id);
+			break;
+		}
+		break;
+	}
+
+	case C2_RES_IND_EP:{
+
+		struct c2wr_ae_connection_request *req =
+			&wr->ae.ae_connection_request;
+		struct iw_cm_id *cm_id =
+			(struct iw_cm_id *)resource_user_context;
+
+		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+		if (event_id != CCAE_CONNECTION_REQUEST) {
+			pr_debug("%s: Invalid event_id: %d\n",
+				__FUNCTION__, event_id);
+			break;
+		}
+		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+		cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+		cm_event.local_addr.sin_addr.s_addr = req->laddr;
+		cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+		cm_event.local_addr.sin_port = req->lport;
+		cm_event.remote_addr.sin_port = req->rport;
+		cm_event.private_data_len =
+			be32_to_cpu(req->private_data_length);
+		cm_event.private_data = req->private_data;
+
+		if (cm_id->event_handler)
+			cm_id->event_handler(cm_id, &cm_event);
+		break;
+	}
+
+	case C2_RES_IND_CQ:{
+		struct c2_cq *cq =
+		    (struct c2_cq *) resource_user_context;
+
+		pr_debug("IB_EVENT_CQ_ERR\n");
+		ib_event.device = &c2dev->ibdev;
+		ib_event.element.cq = &cq->ibcq;
+		ib_event.event = IB_EVENT_CQ_ERR;
+
+		if (cq->ibcq.event_handler)
+			cq->ibcq.event_handler(&ib_event,
+					       cq->ibcq.cq_context);
+	}
+
+	default:
+		printk("Bad resource indicator = %d\n",
+		       resource_indicator);
+		break;
+	}
+
+ ignore_it:
+	c2_mq_free(mq);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h
new file mode 100644
index 0000000..3a065c3
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses.  This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+	CCAE_REMOTE_SHUTDOWN = 0x80,
+	CCAE_ACTIVE_CONNECT_RESULTS,
+	CCAE_CONNECTION_REQUEST,
+	CCAE_LLP_CLOSE_COMPLETE,
+	CCAE_TERMINATE_MESSAGE_RECEIVED,
+	CCAE_LLP_CONNECTION_RESET,
+	CCAE_LLP_CONNECTION_LOST,
+	CCAE_LLP_SEGMENT_SIZE_INVALID,
+	CCAE_LLP_INVALID_CRC,
+	CCAE_LLP_BAD_FPDU,
+	CCAE_INVALID_DDP_VERSION,
+	CCAE_INVALID_RDMA_VERSION,
+	CCAE_UNEXPECTED_OPCODE,
+	CCAE_INVALID_DDP_QUEUE_NUMBER,
+	CCAE_RDMA_READ_NOT_ENABLED,
+	CCAE_RDMA_WRITE_NOT_ENABLED,
+	CCAE_RDMA_READ_TOO_SMALL,
+	CCAE_NO_L_BIT,
+	CCAE_TAGGED_INVALID_STAG,
+	CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+	CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+	CCAE_TAGGED_INVALID_PD,
+	CCAE_WRAP_ERROR,
+	CCAE_BAD_CLOSE,
+	CCAE_BAD_LLP_CLOSE,
+	CCAE_INVALID_MSN_RANGE,
+	CCAE_INVALID_MSN_GAP,
+	CCAE_IRRQ_OVERFLOW,
+	CCAE_IRRQ_MSN_GAP,
+	CCAE_IRRQ_MSN_RANGE,
+	CCAE_IRRQ_INVALID_STAG,
+	CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+	CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+	CCAE_IRRQ_INVALID_PD,
+	CCAE_IRRQ_WRAP_ERROR,
+	CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+	CCAE_CQ_RQ_COMPLETION_ERROR,
+	CCAE_QP_SRQ_WQE_ERROR,
+	CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+	CCAE_CQ_OVERFLOW,
+	CCAE_CQ_OPERATION_ERROR,
+	CCAE_SRQ_LIMIT_REACHED,
+	CCAE_QP_RQ_LIMIT_REACHED,
+	CCAE_SRQ_CATASTROPHIC_ERROR,
+	CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+	C2_RES_IND_QP = 1,
+	C2_RES_IND_EP,
+	C2_RES_IND_CQ,
+	C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
new file mode 100644
index 0000000..1d25299
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+			       struct sp_chunk **head)
+{
+	int i;
+	struct sp_chunk *new_head;
+
+	new_head = (struct sp_chunk *) __get_free_page(gfp_mask);
+	if (new_head == NULL)
+		return -ENOMEM;
+
+	new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head,
+					    PAGE_SIZE, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+	new_head->next = NULL;
+	new_head->head = 0;
+
+	/* build list where each index is the next free slot */
+	for (i = 0;
+	     i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+		  sizeof(u16)) / sizeof(u16) - 1;
+	     i++) {
+		new_head->shared_ptr[i] = i + 1;
+	}
+	/* terminate list */
+	new_head->shared_ptr[i] = 0xFFFF;
+
+	*head = new_head;
+	return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+		      struct sp_chunk **root)
+{
+	return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+	struct sp_chunk *next;
+
+	while (root) {
+		next = root->next;
+		dma_unmap_single(c2dev->ibdev.dma_device,
+				 pci_unmap_addr(root, mapping), PAGE_SIZE,
+			         DMA_FROM_DEVICE);
+		__free_page((struct page *) root);
+		root = next;
+	}
+}
+
+u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+		   dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+	u16 mqsp;
+
+	while (head) {
+		mqsp = head->head;
+		if (mqsp != 0xFFFF) {
+			head->head = head->shared_ptr[mqsp];
+			break;
+		} else if (head->next == NULL) {
+			if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+			    0) {
+				head = head->next;
+				mqsp = head->head;
+				head->head = head->shared_ptr[mqsp];
+				break;
+			} else
+				return NULL;
+		} else
+			head = head->next;
+	}
+	if (head) {
+		*dma_addr = head->dma_addr +
+			    ((unsigned long) &(head->shared_ptr[mqsp]) -
+			     (unsigned long) head);
+		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+			 &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+		return &(head->shared_ptr[mqsp]);
+	}
+	return NULL;
+}
+
+void c2_free_mqsp(u16 * mqsp)
+{
+	struct sp_chunk *head;
+	u16 idx;
+
+	/* The chunk containing this ptr begins at the page boundary */
+	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+	/* Link head to new mqsp */
+	*mqsp = head->head;
+
+	/* Compute the shared_ptr index */
+	idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+	idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+	/* Point this index at the head */
+	head->shared_ptr[idx] = head->head;
+
+	/* Point head at this index */
+	head->head = idx;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
new file mode 100644
index 0000000..485254e
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct ib_qp *ibqp;
+	struct c2_qp *qp;
+	struct c2wr_qp_connect_req *wr;	/* variable size needs a malloc. */
+	struct c2_vq_req *vq_req;
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Associate QP <--> CM_ID */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	/*
+	 * only support the max private_data length
+	 */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail0;
+	}
+	/*
+	 * Set the rdma read limits
+	 */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Create and send a WR_QP_CONNECT...
+	 */
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	c2_wr_set_id(wr, CCWR_QP_CONNECT);
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->qp_handle = qp->adapter_handle;
+
+	wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+	wr->remote_port = cm_id->remote_addr.sin_port;
+
+	/*
+	 * Move any private data from the callers's buf into
+	 * the WR.
+	 */
+	if (iw_param->private_data) {
+		wr->private_data_length =
+			cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0], iw_param->private_data,
+		       iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/*
+	 * Send WR to adapter.  NOTE: There is no synch reply from
+	 * the adapter.
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	vq_req_free(c2dev, vq_req);
+
+ bail1:
+	kfree(wr);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_create_req wr;
+	struct c2wr_ep_listen_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+	wr.local_port = cm_id->local_addr.sin_port;
+	wr.backlog = cpu_to_be32(backlog);
+	wr.user_context = (u64) (unsigned long) cm_id;
+
+	/*
+	 * Reference the request struct.  Dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply =
+	    (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+	/*
+	 * Keep the adapter handle. Used in subsequent destroy
+	 */
+	cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+	struct c2_dev *c2dev;
+	struct c2wr_ep_listen_destroy_req wr;
+	struct c2wr_ep_listen_destroy_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+	if (c2dev == NULL)
+		return -EINVAL;
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	if ((err = c2_errno(reply)) != 0)
+		goto bail1;
+
+ bail1:
+	vq_repbuf_free(c2dev, reply);
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct c2_dev *c2dev = to_c2dev(cm_id->device);
+	struct c2_qp *qp;
+	struct ib_qp *ibqp;
+	struct c2wr_cr_accept_req *wr;	/* variable length WR */
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_accept_rep *reply;	/* VQ Reply msg ptr. */
+	int err;
+
+	ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	qp = to_c2qp(ibqp);
+
+	/* Set the RDMA read limits */
+	err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+	if (err)
+		goto bail0;
+
+	/* Allocate verbs request. */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	vq_req->qp = qp;
+	vq_req->cm_id = cm_id;
+	vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Build the WR */
+	c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+	wr->qp_handle = qp->adapter_handle;
+
+	/* Replace the cr_handle with the QP after accept */
+	cm_id->provider_data = qp;
+	cm_id->add_ref(cm_id);
+	qp->cm_id = cm_id;
+
+	cm_id->provider_data = qp;
+
+	/* Validate private_data length */
+	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+		err = -EINVAL;
+		goto bail2;
+	}
+
+	if (iw_param->private_data) {
+		wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+		memcpy(&wr->private_data[0],
+		       iw_param->private_data, iw_param->private_data_len);
+	} else
+		wr->private_data_length = 0;
+
+	/* Reference the request struct.  Dereferenced in the int handler. */
+	vq_req_get(c2dev, vq_req);
+
+	/* Send WR to adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	/* Wait for reply from adapter */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	/* Check that reply is present */
+	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+	if (!err)
+		c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail2:
+	kfree(wr);
+ bail1:
+	vq_req_free(c2dev, vq_req);
+ bail0:
+	if (err) {
+		/*
+		 * If we fail, release reference on QP and
+		 * disassociate QP from CM_ID
+		 */
+		cm_id->provider_data = NULL;
+		qp->cm_id = NULL;
+		cm_id->rem_ref(cm_id);
+	}
+	return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct c2_dev *c2dev;
+	struct c2wr_cr_reject_req wr;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cr_reject_rep *reply;
+	int err;
+
+	c2dev = to_c2dev(cm_id->device);
+
+	/*
+	 * Allocate verbs request.
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_CR_REJECT);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+	err = c2_errno(reply);
+	/*
+	 * free vq stuff
+	 */
+	vq_repbuf_free(c2dev, reply);
+
+ bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
new file mode 100644
index 0000000..9d7bcc5
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+	struct c2_cq *cq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c2dev->lock, flags);
+	cq = c2dev->qptr_array[cqn];
+	if (!cq) {
+		spin_unlock_irqrestore(&c2dev->lock, flags);
+		return NULL;
+	}
+	atomic_inc(&cq->refcount);
+	spin_unlock_irqrestore(&c2dev->lock, flags);
+	return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+	if (atomic_dec_and_test(&cq->refcount))
+		wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+	struct c2_cq *cq;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq) {
+		printk("discarding events on destroyed CQN=%d\n", mq_index);
+		return;
+	}
+
+	(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+	c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+	struct c2_cq *cq;
+	struct c2_mq *q;
+
+	cq = c2_cq_get(c2dev, mq_index);
+	if (!cq)
+		return;
+
+	spin_lock_irq(&cq->lock);
+	q = &cq->mq;
+	if (q && !c2_mq_empty(q)) {
+		u16 priv = q->priv;
+		struct c2wr_ce *msg;
+
+		while (priv != be16_to_cpu(*q->shared)) {
+			msg = (struct c2wr_ce *)
+				(q->msg_pool.host + priv * q->msg_size);
+			if (msg->qp_user_context == (u64) (unsigned long) qp) {
+				msg->qp_user_context = (u64) 0;
+			}
+			priv = (priv + 1) % q->q_size;
+		}
+	}
+	spin_unlock_irq(&cq->lock);
+	c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+	switch (status) {
+	case C2_OK:
+		return IB_WC_SUCCESS;
+	case CCERR_FLUSHED:
+		return IB_WC_WR_FLUSH_ERR;
+	case CCERR_BASE_AND_BOUNDS_VIOLATION:
+		return IB_WC_LOC_PROT_ERR;
+	case CCERR_ACCESS_VIOLATION:
+		return IB_WC_LOC_ACCESS_ERR;
+	case CCERR_TOTAL_LENGTH_TOO_BIG:
+		return IB_WC_LOC_LEN_ERR;
+	case CCERR_INVALID_WINDOW:
+		return IB_WC_MW_BIND_ERR;
+	default:
+		return IB_WC_GENERAL_ERR;
+	}
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+			      struct c2_cq *cq, struct ib_wc *entry)
+{
+	struct c2wr_ce *ce;
+	struct c2_qp *qp;
+	int is_recv = 0;
+
+	ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+	if (!ce) {
+		return -EAGAIN;
+	}
+
+	/*
+	 * if the qp returned is null then this qp has already
+	 * been freed and we are unable process the completion.
+	 * try pulling the next message
+	 */
+	while ((qp =
+		(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+		c2_mq_free(&cq->mq);
+		ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+		if (!ce)
+			return -EAGAIN;
+	}
+
+	entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+	entry->wr_id = ce->hdr.context;
+	entry->qp_num = ce->handle;
+	entry->wc_flags = 0;
+	entry->slid = 0;
+	entry->sl = 0;
+	entry->src_qp = 0;
+	entry->dlid_path_bits = 0;
+	entry->pkey_index = 0;
+
+	switch (c2_wr_get_id(ce)) {
+	case C2_WR_TYPE_SEND:
+		entry->opcode = IB_WC_SEND;
+		break;
+	case C2_WR_TYPE_RDMA_WRITE:
+		entry->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case C2_WR_TYPE_RDMA_READ:
+		entry->opcode = IB_WC_RDMA_READ;
+		break;
+	case C2_WR_TYPE_BIND_MW:
+		entry->opcode = IB_WC_BIND_MW;
+		break;
+	case C2_WR_TYPE_RECV:
+		entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+		entry->opcode = IB_WC_RECV;
+		is_recv = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* consume the WQEs */
+	if (is_recv)
+		c2_mq_lconsume(&qp->rq_mq, 1);
+	else
+		c2_mq_lconsume(&qp->sq_mq,
+			       be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+	/* free the message */
+	c2_mq_free(&cq->mq);
+
+	return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct c2_dev *c2dev = to_c2dev(ibcq->device);
+	struct c2_cq *cq = to_c2cq(ibcq);
+	unsigned long flags;
+	int npolled, err;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	for (npolled = 0; npolled < num_entries; ++npolled) {
+
+		err = c2_poll_one(c2dev, cq, entry + npolled);
+		if (err)
+			break;
+	}
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+	struct c2_mq_shared __iomem *shared;
+	struct c2_cq *cq;
+
+	cq = to_c2cq(ibcq);
+	shared = cq->mq.peer;
+
+	if (notify == IB_CQ_NEXT_COMP)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+	else if (notify == IB_CQ_SOLICITED)
+		writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+	else
+		return -EINVAL;
+
+	writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+	/*
+	 * Now read back shared->armed to make the PCI
+	 * write synchronous.  This is necessary for
+	 * correct cq notification semantics.
+	 */
+	readb(&shared->armed);
+
+	return 0;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+
+	dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping),
+			 mq->q_size * mq->msg_size, DMA_FROM_DEVICE);
+	free_pages((unsigned long) mq->msg_pool.host,
+		   get_order(mq->q_size * mq->msg_size));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+			   int msg_size)
+{
+	unsigned long pool_start;
+
+	pool_start = __get_free_pages(GFP_KERNEL,
+				      get_order(q_size * msg_size));
+	if (!pool_start)
+		return -ENOMEM;
+
+	c2_mq_rep_init(mq,
+		       0,		/* index (currently unknown) */
+		       q_size,
+		       msg_size,
+		       (u8 *) pool_start,
+		       NULL,	/* peer (currently unknown) */
+		       C2_MQ_HOST_TARGET);
+
+	mq->host_dma = dma_map_single(c2dev->ibdev.dma_device,
+				      (void *)pool_start,
+				      q_size * msg_size, DMA_FROM_DEVICE);
+	pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+	return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+	       struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+	struct c2wr_cq_create_req wr;
+	struct c2wr_cq_create_rep *reply;
+	unsigned long peer_pa;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	might_sleep();
+
+	cq->ibcq.cqe = entries - 1;
+	cq->is_kernel = !ctx;
+
+	/* Allocate a shared pointer */
+	cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+				      &cq->mq.shared_dma, GFP_KERNEL);
+	if (!cq->mq.shared)
+		return -ENOMEM;
+
+	/* Allocate pages for the message pool */
+	err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+	if (err)
+		goto bail0;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+	wr.depth = cpu_to_be32(cq->mq.q_size);
+	wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+	wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+	wr.user_context = (u64) (unsigned long) (cq);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail2;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail2;
+
+	reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	if ((err = c2_errno(reply)) != 0)
+		goto bail3;
+
+	cq->adapter_handle = reply->cq_handle;
+	cq->mq.index = be32_to_cpu(reply->mq_index);
+
+	peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+	cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+	if (!cq->mq.peer) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	spin_lock_init(&cq->lock);
+	atomic_set(&cq->refcount, 1);
+	init_waitqueue_head(&cq->wait);
+
+	/*
+	 * Use the MQ index allocated by the adapter to
+	 * store the CQ in the qptr_array
+	 */
+	cq->cqn = cq->mq.index;
+	c2dev->qptr_array[cq->cqn] = cq;
+
+	return 0;
+
+      bail3:
+	vq_repbuf_free(c2dev, reply);
+      bail2:
+	vq_req_free(c2dev, vq_req);
+      bail1:
+	c2_free_cq_buf(c2dev, &cq->mq);
+      bail0:
+	c2_free_mqsp(cq->mq.shared);
+
+	return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+	int err;
+	struct c2_vq_req *vq_req;
+	struct c2wr_cq_destroy_req wr;
+	struct c2wr_cq_destroy_rep *reply;
+
+	might_sleep();
+
+	/* Clear CQ from the qptr array */
+	spin_lock_irq(&c2dev->lock);
+	c2dev->qptr_array[cq->mq.index] = NULL;
+	atomic_dec(&cq->refcount);
+	spin_unlock_irq(&c2dev->lock);
+
+	wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		goto bail0;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.cq_handle = cq->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	vq_req_free(c2dev, vq_req);
+      bail0:
+	if (cq->is_kernel) {
+		c2_free_cq_buf(c2dev, &cq->mq);
+	}
+
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
new file mode 100644
index 0000000..0d0bc33
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+	unsigned int mq_index;
+
+	while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+		mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+		if (mq_index & 0x80000000) {
+			break;
+		}
+
+		c2dev->hints_read++;
+		handle_mq(c2dev, mq_index);
+	}
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+	if (c2dev->qptr_array[mq_index] == NULL) {
+		pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+			mq_index);
+		return;
+	}
+
+	switch (mq_index) {
+	case (0):
+		/*
+		 * An index of 0 in the activity queue
+		 * indicates the req vq now has messages
+		 * available...
+		 *
+		 * Wake up any waiters waiting on req VQ
+		 * message availability.
+		 */
+		wake_up(&c2dev->req_vq_wo);
+		break;
+	case (1):
+		handle_vq(c2dev, mq_index);
+		break;
+	case (2):
+		/* We have to purge the VQ in case there are pending
+		 * accept reply requests that would result in the
+		 * generation of an ESTABLISHED event. If we don't
+		 * generate these first, a CLOSE event could end up
+		 * being delivered before the ESTABLISHED event.
+		 */
+		handle_vq(c2dev, 1);
+
+		c2_ae_event(c2dev, mq_index);
+		break;
+	default:
+		/* There is no event synchronization between CQ events
+		 * and AE or CM events. In fact, CQE could be
+		 * delivered for all of the I/O up to and including the
+		 * FLUSH for a peer disconenct prior to the ESTABLISHED
+		 * event being delivered to the app. The reason for this
+		 * is that CM events are delivered on a thread, while AE
+		 * and CM events are delivered on interrupt context.
+		 */
+		c2_cq_event(c2dev, mq_index);
+		break;
+	}
+
+	return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+	void *adapter_msg, *reply_msg;
+	struct c2wr_hdr *host_msg;
+	struct c2wr_hdr tmp;
+	struct c2_mq *reply_vq;
+	struct c2_vq_req *req;
+	struct iw_cm_event cm_event;
+	int err;
+
+	reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+	/*
+	 * get next msg from mq_index into adapter_msg.
+	 * don't free it yet.
+	 */
+	adapter_msg = c2_mq_consume(reply_vq);
+	if (adapter_msg == NULL) {
+		return;
+	}
+
+	host_msg = vq_repbuf_alloc(c2dev);
+
+	/*
+	 * If we can't get a host buffer, then we'll still
+	 * wakeup the waiter, we just won't give him the msg.
+	 * It is assumed the waiter will deal with this...
+	 */
+	if (!host_msg) {
+		pr_debug("handle_vq: no repbufs!\n");
+
+		/*
+		 * just copy the WR header into a local variable.
+		 * this allows us to still demux on the context
+		 */
+		host_msg = &tmp;
+		memcpy(host_msg, adapter_msg, sizeof(tmp));
+		reply_msg = NULL;
+	} else {
+		memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+		reply_msg = host_msg;
+	}
+
+	/*
+	 * consume the msg from the MQ
+	 */
+	c2_mq_free(reply_vq);
+
+	/*
+	 * wakeup the waiter.
+	 */
+	req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+	if (req == NULL) {
+		/*
+		 * We should never get here, as the adapter should
+		 * never send us a reply that we're not expecting.
+		 */
+		vq_repbuf_free(c2dev, host_msg);
+		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+		return;
+	}
+
+	err = c2_errno(reply_msg);
+	if (!err) switch (req->event) {
+	case IW_CM_EVENT_ESTABLISHED:
+		c2_set_qp_state(req->qp,
+				C2_QP_STATE_RTS);
+	case IW_CM_EVENT_CLOSE:
+
+		/*
+		 * Move the QP to RTS if this is
+		 * the established event
+		 */
+		cm_event.event = req->event;
+		cm_event.status = 0;
+		cm_event.local_addr = req->cm_id->local_addr;
+		cm_event.remote_addr = req->cm_id->remote_addr;
+		cm_event.private_data = NULL;
+		cm_event.private_data_len = 0;
+		req->cm_id->event_handler(req->cm_id, &cm_event);
+		break;
+	default:
+		break;
+	}
+
+	req->reply_msg = (u64) (unsigned long) (reply_msg);
+	atomic_set(&req->reply_ready, 1);
+	wake_up(&req->wait_object);
+
+	/*
+	 * If the request was cancelled, then this put will
+	 * free the vq_req memory...and reply_msg!!!
+	 */
+	vq_req_put(c2dev, req);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
new file mode 100644
index 0000000..1e4f464
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE:  vq_req is _not_ freed by this function.  The VQ Host
+ *	  Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+		  unsigned long va, u32 pbl_depth,
+		  struct c2_vq_req *vq_req, int pbl_type)
+{
+	u32 pbe_count;		/* amt that fits in a PBL msg */
+	u32 count;		/* amt in this PBL MSG. */
+	struct c2wr_nsmr_pbl_req *wr;	/* PBL WR ptr */
+	struct c2wr_nsmr_pbl_rep *reply;	/* reply ptr */
+ 	int err, pbl_virt, pbl_index, i;
+
+	switch (pbl_type) {
+	case PBL_VIRT:
+		pbl_virt = 1;
+		break;
+	case PBL_PHYS:
+		pbl_virt = 0;
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		return -ENOMEM;
+	}
+	c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+	/*
+	 * Only the last PBL message will generate a reply from the verbs,
+	 * so we set the context to 0 indicating there is no kernel verbs
+	 * handler blocked awaiting this reply.
+	 */
+	wr->hdr.context = 0;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->stag_index = stag_index;	/* already swapped */
+	wr->flags = 0;
+	pbl_index = 0;
+	while (pbl_depth) {
+		count = min(pbe_count, pbl_depth);
+		wr->addrs_length = cpu_to_be32(count);
+
+		/*
+		 *  If this is the last message, then reference the
+		 *  vq request struct cuz we're gonna wait for a reply.
+		 *  also make this PBL msg as the last one.
+		 */
+		if (count == pbl_depth) {
+			/*
+			 * reference the request struct.  dereferenced in the
+			 * int handler.
+			 */
+			vq_req_get(c2dev, vq_req);
+			wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+			/*
+			 * This is the last PBL message.
+			 * Set the context to our VQ Request Object so we can
+			 * wait for the reply.
+			 */
+			wr->hdr.context = (unsigned long) vq_req;
+		}
+
+		/*
+		 * If pbl_virt is set then va is a virtual address
+		 * that describes a virtually contiguous memory
+		 * allocation. The wr needs the start of each virtual page
+		 * to be converted to the corresponding physical address
+		 * of the page. If pbl_virt is not set then va is an array
+		 * of physical addresses and there is no conversion to do.
+		 * Just fill in the wr with what is in the array.
+		 */
+		for (i = 0; i < count; i++) {
+			if (pbl_virt) {
+				va += PAGE_SIZE;
+			} else {
+ 				wr->paddrs[i] =
+				    cpu_to_be64(((u64 *)va)[pbl_index + i]);
+			}
+		}
+
+		/*
+		 * Send WR to adapter
+		 */
+		err = vq_send_wr(c2dev, (union c2wr *) wr);
+		if (err) {
+			if (count <= pbe_count) {
+				vq_req_put(c2dev, vq_req);
+			}
+			goto bail0;
+		}
+		pbl_depth -= count;
+		pbl_index += count;
+	}
+
+	/*
+	 *  Now wait for the reply...
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	kfree(wr);
+	return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ 			   int page_size, int pbl_depth, u32 length,
+ 			   u32 offset, u64 *va, enum c2_acf acf,
+			   struct c2_mr *mr)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_nsmr_register_req *wr;
+	struct c2wr_nsmr_register_rep *reply;
+	u16 flags;
+	int i, pbe_count, count;
+	int err;
+
+	if (!va || !length || !addr_list || !pbl_depth)
+		return -EINTR;
+
+	/*
+	 * Verify PBL depth is within rnic max
+	 */
+	if (pbl_depth > C2_PBL_MAX_DEPTH) {
+		return -EINTR;
+	}
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	/*
+	 * build the WR
+	 */
+	c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+
+	flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+	/*
+	 * compute how many pbes can fit in the message
+	 */
+	pbe_count = (c2dev->req_vq.msg_size -
+		     sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+	if (pbl_depth <= pbe_count) {
+		flags |= MEM_PBL_COMPLETE;
+	}
+	wr->flags = cpu_to_be16(flags);
+	wr->stag_key = 0;	//stag_key;
+	wr->va = cpu_to_be64(*va);
+	wr->pd_id = mr->pd->pd_id;
+	wr->pbe_size = cpu_to_be32(page_size);
+	wr->length = cpu_to_be32(length);
+	wr->pbl_depth = cpu_to_be32(pbl_depth);
+	wr->fbo = cpu_to_be32(offset);
+	count = min(pbl_depth, pbe_count);
+	wr->addrs_length = cpu_to_be32(count);
+
+	/*
+	 * fill out the PBL for this message
+	 */
+	for (i = 0; i < count; i++) {
+		wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+	}
+
+	/*
+	 * regerence the request struct
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * send the WR to the adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	/*
+	 * wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail1;
+	}
+
+	/*
+	 * process reply
+	 */
+	reply =
+	    (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	if ((err = c2_errno(reply))) {
+		goto bail2;
+	}
+	//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+	mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+	vq_repbuf_free(c2dev, reply);
+
+	/*
+	 * if there are still more PBEs we need to send them to
+	 * the adapter and wait for a reply on the final one.
+	 * reuse vq_req for this purpose.
+	 */
+	pbl_depth -= count;
+	if (pbl_depth) {
+
+		vq_req->reply_msg = (unsigned long) NULL;
+		atomic_set(&vq_req->reply_ready, 0);
+		err = send_pbl_messages(c2dev,
+					cpu_to_be32(mr->ibmr.lkey),
+					(unsigned long) &addr_list[i],
+					pbl_depth, vq_req, PBL_PHYS);
+		if (err) {
+			goto bail1;
+		}
+	}
+
+	vq_req_free(c2dev, vq_req);
+	kfree(wr);
+
+	return err;
+
+      bail2:
+	vq_repbuf_free(c2dev, reply);
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+	struct c2_vq_req *vq_req;	/* verbs request object */
+	struct c2wr_stag_dealloc_req wr;	/* work request */
+	struct c2wr_stag_dealloc_rep *reply;	/* WR reply  */
+	int err;
+
+
+	/*
+	 * allocate verbs request object
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Build the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+	wr.hdr.context = (u64) (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.stag_index = cpu_to_be32(stag_index);
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
new file mode 100644
index 0000000..b88a755
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (c2_mq_full(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m =
+		    (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+		m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	if (!c2_mq_full(q)) {
+		q->priv = (q->priv + 1) % q->q_size;
+		q->hint_count++;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (c2_mq_empty(q)) {
+		return NULL;
+	} else {
+#ifdef DEBUG
+		struct c2wr_hdr *m = (struct c2wr_hdr *)
+		    (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+		BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+		return m;
+#else
+		return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+	}
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+	if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+		{
+			struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+			    (q->msg_pool.adapter + q->priv * q->msg_size);
+			__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+		}
+#endif
+		q->priv = (q->priv + 1) % q->q_size;
+		/* Update peer's offset. */
+		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+	}
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+	BUG_ON(q->magic != C2_MQ_MAGIC);
+	BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+	while (wqe_count--) {
+		BUG_ON(c2_mq_empty(q));
+		*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+	}
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+	s32 count;
+
+	if (q->type == C2_MQ_HOST_TARGET)
+		count = be16_to_cpu(*q->shared) - q->priv;
+	else
+		count = q->priv - be16_to_cpu(*q->shared);
+
+	if (count < 0)
+		count += q->q_size;
+
+	return (u32) count;
+}
+#endif  /*  0  */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.adapter = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		    u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+	BUG_ON(!q->shared);
+
+	/* This code assumes the byte swapping has already been done! */
+	q->index = index;
+	q->q_size = q_size;
+	q->msg_size = msg_size;
+	q->msg_pool.host = pool_start;
+	q->peer = (struct c2_mq_shared __iomem *) peer;
+	q->magic = C2_MQ_MAGIC;
+	q->type = type;
+	q->priv = 0;
+	q->hint_count = 0;
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
new file mode 100644
index 0000000..9185bbb
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+	C2_SHARED_ARMED = 0x10,
+	C2_SHARED_NOTIFY = 0x18,
+	C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+	u16 unused1;
+	u8 armed;
+	u8 notification_type;
+	u32 unused2;
+	u16 shared;
+	/* Pad to 64 bytes. */
+	u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+	C2_MQ_HOST_TARGET = 1,
+	C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020	/* 'MQ  ' */
+struct c2_mq {
+	u32 magic;
+	union {
+		u8 *host;
+		u8 __iomem *adapter;
+	} msg_pool;
+	dma_addr_t host_dma;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	u16 hint_count;
+	u16 priv;
+	struct c2_mq_shared __iomem *peer;
+	u16 *shared;
+	dma_addr_t shared_dma;
+	u32 q_size;
+	u32 msg_size;
+	u32 index;
+	enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+	return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+	return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+		       u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+			   u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif				/* _C2_MQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
new file mode 100644
index 0000000..00c7099
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+	u32 obj;
+	int ret = 0;
+
+	spin_lock(&c2dev->pd_table.lock);
+	obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+				 c2dev->pd_table.last);
+	if (obj >= c2dev->pd_table.max)
+		obj = find_first_zero_bit(c2dev->pd_table.table,
+					  c2dev->pd_table.max);
+	if (obj < c2dev->pd_table.max) {
+		pd->pd_id = obj;
+		__set_bit(obj, c2dev->pd_table.table);
+		c2dev->pd_table.last = obj+1;
+		if (c2dev->pd_table.last >= c2dev->pd_table.max)
+			c2dev->pd_table.last = 0;
+	} else
+		ret = -ENOMEM;
+	spin_unlock(&c2dev->pd_table.lock);
+	return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+	spin_lock(&c2dev->pd_table.lock);
+	__clear_bit(pd->pd_id, c2dev->pd_table.table);
+	spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+	c2dev->pd_table.last = 0;
+	c2dev->pd_table.max = c2dev->props.max_pd;
+	spin_lock_init(&c2dev->pd_table.lock);
+	c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+					sizeof(long), GFP_KERNEL);
+	if (!c2dev->pd_table.table)
+		return -ENOMEM;
+	bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+	return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+	kfree(c2dev->pd_table.table);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
new file mode 100644
index 0000000..dd6af55
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -0,0 +1,870 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+			   struct ib_device_attr *props)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	*props = c2dev->props;
+	return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+			 u8 port, struct ib_port_attr *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	props->max_mtu = IB_MTU_4096;
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = IB_PORT_ACTIVE;
+	props->phys_state = 0;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->qkey_viol_cntr = 0;
+	props->active_width = 1;
+	props->active_speed = 1;
+
+	return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+			  u8 port, int port_modify_mask,
+			  struct ib_port_modify *props)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+			 u8 port, u16 index, u16 * pkey)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	*pkey = 0;
+	return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+			int index, union ib_gid *gid)
+{
+	struct c2_dev *c2dev = to_c2dev(ibdev);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+	return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+					     struct ib_udata *udata)
+{
+	struct c2_ucontext *context;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	kfree(context);
+	return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+				 struct ib_ucontext *context,
+				 struct ib_udata *udata)
+{
+	struct c2_pd *pd;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+	if (err) {
+		kfree(pd);
+		return ERR_PTR(err);
+	}
+
+	if (context) {
+		if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+			c2_pd_free(to_c2dev(ibdev), pd);
+			kfree(pd);
+			return ERR_PTR(-EFAULT);
+		}
+	}
+
+	return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+	kfree(pd);
+
+	return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+	struct c2_qp *qp;
+	BUG_ON(!ibqp);
+	qp = to_c2qp(ibqp);
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+	struct c2_dev* c2dev = to_c2dev(device);
+	struct c2_qp *qp;
+
+	qp = c2_find_qpn(c2dev, qpn);
+	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+		__FUNCTION__, qp, qpn, device,
+		(qp?atomic_read(&qp->refcount):0));
+
+	return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+				  struct ib_qp_init_attr *init_attr,
+				  struct ib_udata *udata)
+{
+	struct c2_qp *qp;
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+		if (!qp) {
+			pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+			return ERR_PTR(-ENOMEM);
+		}
+		spin_lock_init(&qp->lock);
+		if (pd->uobject) {
+			/* userspace specific */
+		}
+
+		err = c2_alloc_qp(to_c2dev(pd->device),
+				  to_c2pd(pd), init_attr, qp);
+
+		if (err && pd->uobject) {
+			/* userspace specific */
+		}
+
+		break;
+	default:
+		pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+			init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+		break;
+	}
+
+	if (err) {
+		kfree(qp);
+		return ERR_PTR(err);
+	}
+
+	return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+	struct c2_qp *qp = to_c2qp(ib_qp);
+
+	pr_debug("%s:%u qp=%p,qp->state=%d\n",
+		__FUNCTION__, __LINE__,ib_qp,qp->state);
+	c2_free_qp(to_c2dev(ib_qp->device), qp);
+	kfree(qp);
+	return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata)
+{
+	struct c2_cq *cq;
+	int err;
+
+	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq) {
+		pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+	if (err) {
+		pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+		kfree(cq);
+		return ERR_PTR(err);
+	}
+
+	return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct c2_cq *cq = to_c2cq(ib_cq);
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	c2_free_cq(to_c2dev(ib_cq->device), cq);
+	kfree(cq);
+
+	return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+	return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+	    (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+	    (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+	    C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+				    struct ib_phys_buf *buffer_list,
+				    int num_phys_buf, int acc, u64 * iova_start)
+{
+	struct c2_mr *mr;
+	u64 *page_list;
+	u32 total_len;
+	int err, i, j, k, page_shift, pbl_depth;
+
+	pbl_depth = 0;
+	total_len = 0;
+
+	page_shift = PAGE_SHIFT;
+	/*
+	 * If there is only 1 buffer we assume this could
+	 * be a map of all phy mem...use a 32k page_shift.
+	 */
+	if (num_phys_buf == 1)
+		page_shift += 3;
+
+	for (i = 0; i < num_phys_buf; i++) {
+
+		if (buffer_list[i].addr & ~PAGE_MASK) {
+			pr_debug("Unaligned Memory Buffer: 0x%x\n",
+				(unsigned int) buffer_list[i].addr);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (!buffer_list[i].size) {
+			pr_debug("Invalid Buffer Size\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		total_len += buffer_list[i].size;
+		pbl_depth += ALIGN(buffer_list[i].size,
+				   (1 << page_shift)) >> page_shift;
+	}
+
+	page_list = vmalloc(sizeof(u64) * pbl_depth);
+	if (!page_list) {
+		pr_debug("couldn't vmalloc page_list of size %zd\n",
+			(sizeof(u64) * pbl_depth));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+		int naddrs;
+
+ 		naddrs = ALIGN(buffer_list[i].size,
+			       (1 << page_shift)) >> page_shift;
+		for (k = 0; k < naddrs; k++)
+			page_list[j++] = (buffer_list[i].addr +
+						     (k << page_shift));
+	}
+
+	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->pd = to_c2pd(ib_pd);
+	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+		"*iova_start %llx, first pa %llx, last pa %llx\n",
+		__FUNCTION__, page_shift, pbl_depth, total_len,
+		*iova_start, page_list[0], page_list[pbl_depth-1]);
+  	err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ 					 (1 << page_shift), pbl_depth,
+					 total_len, 0, iova_start,
+					 c2_convert_access(acc), mr);
+	vfree(page_list);
+	if (err) {
+		kfree(mr);
+		return ERR_PTR(err);
+	}
+
+	return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct ib_phys_buf bl;
+	u64 kva = 0;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* AMSO1100 limit */
+	bl.size = 0xffffffff;
+	bl.addr = 0;
+	return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+				    int acc, struct ib_udata *udata)
+{
+	u64 *pages;
+	u64 kva = 0;
+	int shift, n, len;
+	int i, j, k;
+	int err = 0;
+	struct ib_umem_chunk *chunk;
+	struct c2_pd *c2pd = to_c2pd(pd);
+	struct c2_mr *c2mr;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	shift = ffs(region->page_size) - 1;
+
+	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+	if (!c2mr)
+		return ERR_PTR(-ENOMEM);
+	c2mr->pd = c2pd;
+
+	n = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list)
+		n += chunk->nents;
+
+	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+	if (!pages) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	i = 0;
+	list_for_each_entry(chunk, &region->chunk_list, list) {
+		for (j = 0; j < chunk->nmap; ++j) {
+			len = sg_dma_len(&chunk->page_list[j]) >> shift;
+			for (k = 0; k < len; ++k) {
+				pages[i++] =
+					sg_dma_address(&chunk->page_list[j]) +
+					(region->page_size * k);
+			}
+		}
+	}
+
+	kva = (u64)region->virt_base;
+  	err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+					 pages,
+ 					 region->page_size,
+					 i,
+					 region->length,
+					 region->offset,
+					 &kva,
+					 c2_convert_access(acc),
+					 c2mr);
+	kfree(pages);
+	if (err) {
+		kfree(c2mr);
+		return ERR_PTR(err);
+	}
+	return &c2mr->ibmr;
+
+err:
+	kfree(c2mr);
+	return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct c2_mr *mr = to_c2mr(ib_mr);
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+	if (err)
+		pr_debug("c2_stag_dealloc failed: %d\n", err);
+	else
+		kfree(mr);
+
+	return err;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x\n", dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%x.%x.%x\n",
+		       (int) (dev->props.fw_ver >> 32),
+		       (int) (dev->props.fw_ver >> 16) & 0xffff,
+		       (int) (dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *c2_class_attributes[] = {
+	&class_device_attr_hw_rev,
+	&class_device_attr_fw_ver,
+	&class_device_attr_hca_type,
+	&class_device_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+			int attr_mask, struct ib_udata *udata)
+{
+	int err;
+
+	err =
+	    c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+			 attr_mask);
+
+	return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+			  int mad_flags,
+			  u8 port_num,
+			  struct ib_wc *in_wc,
+			  struct ib_grh *in_grh,
+			  struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Request a connection */
+	return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	/* Accept the new connection */
+	return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_reject(cm_id, pdata, pdata_len);
+	return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+	int err;
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	err = c2_llp_service_create(cm_id, backlog);
+	pr_debug("%s:%u err=%d\n",
+		__FUNCTION__, __LINE__,
+		err);
+	return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+	int err;
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+
+	err = c2_llp_service_destroy(cm_id);
+
+	return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("adding...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+	struct in_device *ind;
+	struct c2_dev *c2dev = netdev->priv;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	pr_debug("deleting...\n");
+	for_ifa(ind) {
+#ifdef DEBUG
+		u8 *ip = (u8 *) & ifa->ifa_address;
+
+		pr_debug("%s: %d.%d.%d.%d\n",
+		       ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+		c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+
+	return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int ret = 0;
+
+	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+
+	/* TODO: Tell rnic about new rmda interface mtu */
+	return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+	SET_MODULE_OWNER(netdev);
+	netdev->open = c2_pseudo_up;
+	netdev->stop = c2_pseudo_down;
+	netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+	netdev->get_stats = NULL;
+	netdev->tx_timeout = NULL;
+	netdev->set_mac_address = NULL;
+	netdev->change_mtu = c2_pseudo_change_mtu;
+	netdev->watchdog_timeo = 0;
+	netdev->type = ARPHRD_ETHER;
+	netdev->mtu = 1500;
+	netdev->hard_header_len = ETH_HLEN;
+	netdev->addr_len = ETH_ALEN;
+	netdev->tx_queue_len = 0;
+	netdev->flags |= IFF_NOARP;
+	return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+	char name[IFNAMSIZ];
+	struct net_device *netdev;
+
+	/* change ethxxx to iwxxx */
+	strcpy(name, "iw");
+	strcat(name, &c2dev->netdev->name[3]);
+	netdev = alloc_netdev(sizeof(*netdev), name, setup);
+	if (!netdev) {
+		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
+			__FUNCTION__);
+		return NULL;
+	}
+
+	netdev->priv = c2dev;
+
+	SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+	/* Print out the MAC address */
+	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+		netdev->name,
+		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+	/* Disable network packets */
+	netif_stop_queue(netdev);
+#endif
+	return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+	int ret;
+	int i;
+
+	/* Register pseudo network device */
+	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+	if (dev->pseudo_netdev) {
+		ret = register_netdev(dev->pseudo_netdev);
+		if (ret) {
+			printk(KERN_ERR PFX
+				"Unable to register netdev, ret = %d\n", ret);
+			free_netdev(dev->pseudo_netdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+	dev->ibdev.node_type = RDMA_NODE_RNIC;
+	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.dma_device = &dev->pcidev->dev;
+	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
+	dev->ibdev.query_device = c2_query_device;
+	dev->ibdev.query_port = c2_query_port;
+	dev->ibdev.modify_port = c2_modify_port;
+	dev->ibdev.query_pkey = c2_query_pkey;
+	dev->ibdev.query_gid = c2_query_gid;
+	dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+	dev->ibdev.mmap = c2_mmap_uar;
+	dev->ibdev.alloc_pd = c2_alloc_pd;
+	dev->ibdev.dealloc_pd = c2_dealloc_pd;
+	dev->ibdev.create_ah = c2_ah_create;
+	dev->ibdev.destroy_ah = c2_ah_destroy;
+	dev->ibdev.create_qp = c2_create_qp;
+	dev->ibdev.modify_qp = c2_modify_qp;
+	dev->ibdev.destroy_qp = c2_destroy_qp;
+	dev->ibdev.create_cq = c2_create_cq;
+	dev->ibdev.destroy_cq = c2_destroy_cq;
+	dev->ibdev.poll_cq = c2_poll_cq;
+	dev->ibdev.get_dma_mr = c2_get_dma_mr;
+	dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+	dev->ibdev.reg_user_mr = c2_reg_user_mr;
+	dev->ibdev.dereg_mr = c2_dereg_mr;
+
+	dev->ibdev.alloc_fmr = NULL;
+	dev->ibdev.unmap_fmr = NULL;
+	dev->ibdev.dealloc_fmr = NULL;
+	dev->ibdev.map_phys_fmr = NULL;
+
+	dev->ibdev.attach_mcast = c2_multicast_attach;
+	dev->ibdev.detach_mcast = c2_multicast_detach;
+	dev->ibdev.process_mad = c2_process_mad;
+
+	dev->ibdev.req_notify_cq = c2_arm_cq;
+	dev->ibdev.post_send = c2_post_send;
+	dev->ibdev.post_recv = c2_post_receive;
+
+	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+	dev->ibdev.iwcm->add_ref = c2_add_ref;
+	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+	dev->ibdev.iwcm->get_qp = c2_get_qp;
+	dev->ibdev.iwcm->connect = c2_connect;
+	dev->ibdev.iwcm->accept = c2_accept;
+	dev->ibdev.iwcm->reject = c2_reject;
+	dev->ibdev.iwcm->create_listen = c2_service_create;
+	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+	ret = ib_register_device(&dev->ibdev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
+		ret = class_device_create_file(&dev->ibdev.class_dev,
+					       c2_class_attributes[i]);
+		if (ret) {
+			unregister_netdev(dev->pseudo_netdev);
+			free_netdev(dev->pseudo_netdev);
+			ib_unregister_device(&dev->ibdev);
+			return ret;
+		}
+	}
+
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	return 0;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	unregister_netdev(dev->pseudo_netdev);
+	free_netdev(dev->pseudo_netdev);
+	ib_unregister_device(&dev->ibdev);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
new file mode 100644
index 0000000..fc90622
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC        (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE  (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ   (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE   (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ    (1 << 10)
+
+struct c2_buf_list {
+	void *buf;
+	 DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+	struct ib_pd ibpd;
+	u32 pd_id;
+};
+
+struct c2_mr {
+	struct ib_mr ibmr;
+	struct c2_pd *pd;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+	C2_AH_ON_HCA,
+	C2_AH_PCI_POOL,
+	C2_AH_KMALLOC
+};
+
+struct c2_ah {
+	struct ib_ah ibah;
+};
+
+struct c2_cq {
+	struct ib_cq ibcq;
+	spinlock_t lock;
+	atomic_t refcount;
+	int cqn;
+	int is_kernel;
+	wait_queue_head_t wait;
+
+	u32 adapter_handle;
+	struct c2_mq mq;
+};
+
+struct c2_wq {
+	spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+	struct ib_qp ibqp;
+	struct iw_cm_id *cm_id;
+	spinlock_t lock;
+	atomic_t refcount;
+	wait_queue_head_t wait;
+	int qpn;
+
+	u32 adapter_handle;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u8 state;
+
+	struct c2_mq sq_mq;
+	struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+	struct in_device *ind;
+	int ret = 0;
+
+	ind = in_dev_get(netdev);
+	if (!ind)
+		return 0;
+
+	for_ifa(ind) {
+		if (ifa->ifa_address == addr) {
+			ret = 1;
+			break;
+		}
+	}
+	endfor_ifa(ind);
+	in_dev_put(ind);
+	return ret;
+}
+#endif				/* C2_PROVIDER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
new file mode 100644
index 0000000..1226113
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+	[IB_WR_SEND] = C2_WR_TYPE_SEND,
+	[IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+	[IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+	switch (ib_state) {
+	case IB_QPS_RESET:
+		return C2_QP_STATE_IDLE;
+	case IB_QPS_RTS:
+		return C2_QP_STATE_RTS;
+	case IB_QPS_SQD:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_SQE:
+		return C2_QP_STATE_CLOSING;
+	case IB_QPS_ERR:
+		return C2_QP_STATE_ERROR;
+	default:
+		return -1;
+	}
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+	switch (c2_state) {
+	case C2_QP_STATE_IDLE:
+		return IB_QPS_RESET;
+	case C2_QP_STATE_CONNECTING:
+		return IB_QPS_RTR;
+	case C2_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case C2_QP_STATE_CLOSING:
+		return IB_QPS_SQD;
+	case C2_QP_STATE_ERROR:
+		return IB_QPS_ERR;
+	case C2_QP_STATE_TERMINATE:
+		return IB_QPS_SQE;
+	default:
+		return -1;
+	}
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+	static const char *state_str[] = {
+		"IB_QPS_RESET",
+		"IB_QPS_INIT",
+		"IB_QPS_RTR",
+		"IB_QPS_RTS",
+		"IB_QPS_SQD",
+		"IB_QPS_SQE",
+		"IB_QPS_ERR"
+	};
+	if (ib_state < IB_QPS_RESET ||
+	    ib_state > IB_QPS_ERR)
+		return "<invalid IB QP state>";
+
+	ib_state -= IB_QPS_RESET;
+	return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+	int new_state = to_ib_state(c2_state);
+
+	pr_debug("%s: qp[%p] state modify %s --> %s\n",
+	       __FUNCTION__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(new_state));
+	qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+		 struct ib_qp_attr *attr, int attr_mask)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	unsigned long flags;
+	u8 next_state;
+	int err;
+
+	pr_debug("%s:%d qp=%p, %s --> %s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state),
+		to_ib_state_str(attr->qp_state));
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	if (attr_mask & IB_QP_STATE) {
+		/* Ensure the state is valid */
+		if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
+			return -EINVAL;
+
+		wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+		if (attr->qp_state == IB_QPS_ERR) {
+			spin_lock_irqsave(&qp->lock, flags);
+			if (qp->cm_id && qp->state == IB_QPS_RTS) {
+				pr_debug("Generating CLOSE event for QP-->ERR, "
+					"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+				/* Generate an CLOSE event */
+				vq_req->cm_id = qp->cm_id;
+				vq_req->event = IW_CM_EVENT_CLOSE;
+			}
+			spin_unlock_irqrestore(&qp->lock, flags);
+		}
+		next_state =  attr->qp_state;
+
+	} else if (attr_mask & IB_QP_CUR_STATE) {
+
+		if (attr->cur_qp_state != IB_QPS_RTR &&
+		    attr->cur_qp_state != IB_QPS_RTS &&
+		    attr->cur_qp_state != IB_QPS_SQD &&
+		    attr->cur_qp_state != IB_QPS_SQE)
+			return -EINVAL;
+		else
+			wr.next_qp_state =
+			    cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+		next_state = attr->cur_qp_state;
+
+	} else {
+		err = 0;
+		goto bail0;
+	}
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	if (!err)
+		qp->state = next_state;
+#ifdef DEBUG
+	else
+		pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+#endif
+	/*
+	 * If we're going to error and generating the event here, then
+	 * we need to remove the reference because there will be no
+	 * close event generated by the adapter
+	*/
+	spin_lock_irqsave(&qp->lock, flags);
+	if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+
+	pr_debug("%s:%d qp=%p, cur_state=%s\n",
+		__FUNCTION__, __LINE__,
+		qp,
+		to_ib_state_str(qp->state));
+	return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+			  int ord, int ird)
+{
+	struct c2wr_qp_modify_req wr;
+	struct c2wr_qp_modify_rep *reply;
+	struct c2_vq_req *vq_req;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+	wr.ord = cpu_to_be32(ord);
+	wr.ird = cpu_to_be32(ird);
+	wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+	wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+	/* reference the request struct */
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail0;
+
+	reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+		vq_req->reply_msg;
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_qp_destroy_req wr;
+	struct c2wr_qp_destroy_rep *reply;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * Allocate a verb request message
+	 */
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req) {
+		return -ENOMEM;
+	}
+
+	/*
+	 * Initialize the WR
+	 */
+	c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.qp_handle = qp->adapter_handle;
+
+	/*
+	 * reference the request struct.  dereferenced in the int handler.
+	 */
+	vq_req_get(c2dev, vq_req);
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id && qp->state == IB_QPS_RTS) {
+		pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+			"qp=%p, cm_id=%p\n",qp,qp->cm_id);
+		/* Generate an CLOSE event */
+		vq_req->qp = qp;
+		vq_req->cm_id = qp->cm_id;
+		vq_req->event = IW_CM_EVENT_CLOSE;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	/*
+	 * Send WR to adapter
+	 */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	/*
+	 * Wait for reply from adapter
+	 */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	/*
+	 * Process reply
+	 */
+	reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	spin_lock_irqsave(&qp->lock, flags);
+	if (qp->cm_id) {
+		qp->cm_id->rem_ref(qp->cm_id);
+		qp->cm_id = NULL;
+	}
+	spin_unlock_irqrestore(&qp->lock, flags);
+
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	int ret;
+
+        do {
+		spin_lock_irq(&c2dev->qp_table.lock);
+		ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+					c2dev->qp_table.last++, &qp->qpn);
+		spin_unlock_irq(&c2dev->qp_table.lock);
+        } while ((ret == -EAGAIN) &&
+	 	 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+	return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+	spin_lock_irq(&c2dev->qp_table.lock);
+	idr_remove(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+	unsigned long flags;
+	struct c2_qp *qp;
+
+	spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+	qp = idr_find(&c2dev->qp_table.idr, qpn);
+	spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+	return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+		struct c2_pd *pd,
+		struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+	struct c2wr_qp_create_req wr;
+	struct c2wr_qp_create_rep *reply;
+	struct c2_vq_req *vq_req;
+	struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+	struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+	unsigned long peer_pa;
+	u32 q_size, msg_size, mmap_size;
+	void __iomem *mmap;
+	int err;
+
+	err = c2_alloc_qpn(c2dev, qp);
+	if (err)
+		return err;
+	qp->ibqp.qp_num = qp->qpn;
+	qp->ibqp.qp_type = IB_QPT_RC;
+
+	/* Allocate the SQ and RQ shared pointers */
+	qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->sq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->sq_mq.shared) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					 &qp->rq_mq.shared_dma, GFP_KERNEL);
+	if (!qp->rq_mq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	/* Allocate the verbs request */
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+
+	/* Initialize the work request */
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_QP_CREATE);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+	wr.sq_cq_handle = send_cq->adapter_handle;
+	wr.rq_cq_handle = recv_cq->adapter_handle;
+	wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+	wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+	wr.srq_handle = 0;
+	wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+			       QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+	wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+	wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+	wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+	wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+	wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+	wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+	wr.pd_id = pd->pd_id;
+	wr.user_context = (unsigned long) qp;
+
+	vq_req_get(c2dev, vq_req);
+
+	/* Send the WR to the adapter */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail3;
+	}
+
+	/* Wait for the verb reply  */
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail3;
+	}
+
+	/* Process the reply */
+	reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail3;
+	}
+
+	if ((err = c2_wr_get_result(reply)) != 0) {
+		goto bail4;
+	}
+
+	/* Fill in the kernel QP struct */
+	atomic_set(&qp->refcount, 1);
+	qp->adapter_handle = reply->qp_handle;
+	qp->state = IB_QPS_RESET;
+	qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+	qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+
+	/* Initialize the SQ MQ */
+	q_size = be32_to_cpu(reply->sq_depth);
+	msg_size = be32_to_cpu(reply->sq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail5;
+	}
+
+	c2_mq_req_init(&qp->sq_mq,
+		       be32_to_cpu(reply->sq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the RQ mq */
+	q_size = be32_to_cpu(reply->rq_depth);
+	msg_size = be32_to_cpu(reply->rq_msg_size);
+	peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+	mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+	mmap = ioremap_nocache(peer_pa, mmap_size);
+	if (!mmap) {
+		err = -ENOMEM;
+		goto bail6;
+	}
+
+	c2_mq_req_init(&qp->rq_mq,
+		       be32_to_cpu(reply->rq_mq_index),
+		       q_size,
+		       msg_size,
+		       mmap + sizeof(struct c2_mq_shared),	/* pool start */
+		       mmap,				/* peer */
+		       C2_MQ_ADAPTER_TARGET);
+
+	vq_repbuf_free(c2dev, reply);
+	vq_req_free(c2dev, vq_req);
+
+	return 0;
+
+      bail6:
+	iounmap(qp->sq_mq.peer);
+      bail5:
+	destroy_qp(c2dev, qp);
+      bail4:
+	vq_repbuf_free(c2dev, reply);
+      bail3:
+	vq_req_free(c2dev, vq_req);
+      bail2:
+	c2_free_mqsp(qp->rq_mq.shared);
+      bail1:
+	c2_free_mqsp(qp->sq_mq.shared);
+      bail0:
+	c2_free_qpn(c2dev, qp->qpn);
+	return err;
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+	struct c2_cq *send_cq;
+	struct c2_cq *recv_cq;
+
+	send_cq = to_c2cq(qp->ibqp.send_cq);
+	recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	spin_lock_irq(&send_cq->lock);
+	if (send_cq != recv_cq)
+		spin_lock(&recv_cq->lock);
+
+	c2_free_qpn(c2dev, qp->qpn);
+
+	if (send_cq != recv_cq)
+		spin_unlock(&recv_cq->lock);
+	spin_unlock_irq(&send_cq->lock);
+
+	/*
+	 * Destory qp in the rnic...
+	 */
+	destroy_qp(c2dev, qp);
+
+	/*
+	 * Mark any unreaped CQEs as null and void.
+	 */
+	c2_cq_clean(c2dev, qp, send_cq->cqn);
+	if (send_cq != recv_cq)
+		c2_cq_clean(c2dev, qp, recv_cq->cqn);
+	/*
+	 * Unmap the MQs and return the shared pointers
+	 * to the message pool.
+	 */
+	iounmap(qp->sq_mq.peer);
+	iounmap(qp->rq_mq.peer);
+	c2_free_mqsp(qp->sq_mq.shared);
+	c2_free_mqsp(qp->rq_mq.shared);
+
+	atomic_dec(&qp->refcount);
+	wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst		- ptr to CCIL Work Request message SGL memory.
+ * src		- ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+	 u8 * actual_count)
+{
+	u32 tot = 0;		/* running total */
+	u8 acount = 0;		/* running total non-0 len sge's */
+
+	while (count > 0) {
+		/*
+		 * If the addition of this SGE causes the
+		 * total SGL length to exceed 2^32-1, then
+		 * fail-n-bail.
+		 *
+		 * If the current total plus the next element length
+		 * wraps, then it will go negative and be less than the
+		 * current total...
+		 */
+		if ((tot + src->length) < tot) {
+			return -EINVAL;
+		}
+		/*
+		 * Bug: 1456 (as well as 1498 & 1643)
+		 * Skip over any sge's supplied with len=0
+		 */
+		if (src->length) {
+			tot += src->length;
+			dst->stag = cpu_to_be32(src->lkey);
+			dst->to = cpu_to_be64(src->addr);
+			dst->length = cpu_to_be32(src->length);
+			dst++;
+			acount++;
+		}
+		src++;
+		count--;
+	}
+
+	if (acount == 0) {
+		/*
+		 * Bug: 1476 (as well as 1498, 1456 and 1643)
+		 * Setup the SGL in the WR to make it easier for the RNIC.
+		 * This way, the FW doesn't have to deal with special cases.
+		 * Setting length=0 should be sufficient.
+		 */
+		dst->stag = 0;
+		dst->to = 0;
+		dst->length = 0;
+	}
+
+	*p_len = tot;
+	*actual_count = acount;
+	return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev	- ptr to c2dev structure
+ * mq_index	- mq index to post
+ * shared	- value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+	/*
+	 * First read the register to see if the FIFO is full, and if so,
+	 * spin until it's not.  This isn't perfect -- there is no
+	 * synchronization among the clients of the register, but in
+	 * practice it prevents multiple CPU from hammering the bus
+	 * with PCI RETRY. Note that when this does happen, the card
+	 * cannot get on the bus and the card and system hang in a
+	 * deadlock -- thus the need for this code. [TOT]
+	 */
+	while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(0);
+	}
+
+	__raw_writel(C2_HINT_MAKE(mq_index, shared),
+		     c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg.  Then it posts the message.
+ *
+ * IN:
+ * q		- ptr to user MQ.
+ * wr		- ptr to host-copy of the WR.
+ * qp		- ptr to user qp
+ * size		- Number of bytes to post.  Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+	union c2wr *msg;
+
+	msg = c2_mq_alloc(q);
+	if (msg == NULL) {
+		return -EINVAL;
+	}
+#ifdef CCMSGMAGIC
+	((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+	/*
+	 * Since all header fields in the WR are the same as the
+	 * CQE, set the following so the adapter need not.
+	 */
+	c2_wr_set_result(wr, CCERR_PENDING);
+
+	/*
+	 * Copy the wr down to the adapter
+	 */
+	memcpy((void *) msg, (void *) wr, size);
+
+	c2_mq_produce(q);
+	return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+		 struct ib_send_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	u32 flags;
+	u32 tot_len;
+	u8 actual_sge_count;
+	u32 msg_size;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	while (ib_wr) {
+
+		flags = 0;
+		wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+			flags |= SQ_SIGNALED;
+		}
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+				msg_size = sizeof(struct c2wr_send_req);
+			} else {
+				c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+				msg_size = sizeof(struct c2wr_send_req);
+			}
+
+			wr.sqwr.send.remote_stag = 0;
+			msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+			if (ib_wr->num_sge > qp->send_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_WRITE:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+			msg_size = sizeof(struct c2wr_rdma_write_req) +
+			    (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+			if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->send_flags & IB_SEND_FENCE) {
+				flags |= SQ_READ_FENCE;
+			}
+			wr.sqwr.rdma_write.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_write.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			err = move_sgl((struct c2_data_addr *)
+				       & (wr.sqwr.rdma_write.data),
+				       ib_wr->sg_list,
+				       ib_wr->num_sge,
+				       &tot_len, &actual_sge_count);
+			wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+			c2_wr_set_sge_count(&wr, actual_sge_count);
+			break;
+		case IB_WR_RDMA_READ:
+			c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+			msg_size = sizeof(struct c2wr_rdma_read_req);
+
+			/* IWarp only suppots 1 sge for RDMA reads */
+			if (ib_wr->num_sge > 1) {
+				err = -EINVAL;
+				break;
+			}
+
+			/*
+			 * Move the local and remote stag/to/len into the WR.
+			 */
+			wr.sqwr.rdma_read.local_stag =
+			    cpu_to_be32(ib_wr->sg_list->lkey);
+			wr.sqwr.rdma_read.local_to =
+			    cpu_to_be64(ib_wr->sg_list->addr);
+			wr.sqwr.rdma_read.remote_stag =
+			    cpu_to_be32(ib_wr->wr.rdma.rkey);
+			wr.sqwr.rdma_read.remote_to =
+			    cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+			wr.sqwr.rdma_read.length =
+			    cpu_to_be32(ib_wr->sg_list->length);
+			break;
+		default:
+			/* error */
+			msg_size = 0;
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Store flags
+		 */
+		c2_wr_set_flags(&wr, flags);
+
+		/*
+		 * Post the puppy!
+		 */
+		err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO.
+		 */
+		c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+		    struct ib_recv_wr **bad_wr)
+{
+	struct c2_dev *c2dev = to_c2dev(ibqp->device);
+	struct c2_qp *qp = to_c2qp(ibqp);
+	union c2wr wr;
+	int err = 0;
+
+	if (qp->state > IB_QPS_RTS)
+		return -EINVAL;
+
+	/*
+	 * Try and post each work request
+	 */
+	while (ib_wr) {
+		u32 tot_len;
+		u8 actual_sge_count;
+
+		if (ib_wr->num_sge > qp->recv_sgl_depth) {
+			err = -EINVAL;
+			break;
+		}
+
+		/*
+		 * Create local host-copy of the WR
+		 */
+		wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+		c2_wr_set_id(&wr, CCWR_RECV);
+		c2_wr_set_flags(&wr, 0);
+
+		/* sge_count is limited to eight bits. */
+		BUG_ON(ib_wr->num_sge >= 256);
+		err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+			       ib_wr->sg_list,
+			       ib_wr->num_sge, &tot_len, &actual_sge_count);
+		c2_wr_set_sge_count(&wr, actual_sge_count);
+
+		/*
+		 * If we had an error on the last wr build, then
+		 * break out.  Possible errors include bogus WR
+		 * type, and a bogus SGL length...
+		 */
+		if (err) {
+			break;
+		}
+
+		err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+		if (err) {
+			break;
+		}
+
+		/*
+		 * Enqueue mq index to activity FIFO
+		 */
+		c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+	spin_lock_init(&c2dev->qp_table.lock);
+	idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+	idr_destroy(&c2dev->qp_table.idr);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
new file mode 100644
index 0000000..f49a32b
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+#include <linux/vmalloc.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE  1024
+
+#define C2_MAX_MRS       32768
+#define C2_MAX_QPS       16000
+#define C2_MAX_WQE_SZ    256
+#define C2_MAX_QP_WR     ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES      4
+#define C2_MAX_SGE_RD    1
+#define C2_MAX_CQS       32768
+#define C2_MAX_CQES      4096
+#define C2_MAX_PDS       16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+	int err;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_INIT);
+	wr.hdr.context = 0;
+	wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+	wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+	wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+	wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+	wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+	wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+	/* Post the init message */
+	err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+	return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+	struct c2wr_init_req wr;
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_TERM);
+	wr.hdr.context = 0;
+
+	/* Post the init message */
+	vq_send_wr(c2dev, (union c2wr *) & wr);
+	c2dev->init = 0;
+
+	return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_query_req wr;
+	struct c2wr_rnic_query_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+	wr.hdr.context = (unsigned long) vq_req;
+	wr.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply)
+		err = -ENOMEM;
+
+	err = c2_errno(reply);
+	if (err)
+		goto bail2;
+
+	props->fw_ver =
+		((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+		((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) |
+		(be32_to_cpu(reply->fw_ver_patch) && 0xFFFF);
+	memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+	props->max_mr_size         = 0xFFFFFFFF;
+	props->page_size_cap       = ~(C2_MIN_PAGESIZE-1);
+	props->vendor_id           = be32_to_cpu(reply->vendor_id);
+	props->vendor_part_id      = be32_to_cpu(reply->part_number);
+	props->hw_ver              = be32_to_cpu(reply->hw_version);
+	props->max_qp              = be32_to_cpu(reply->max_qps);
+	props->max_qp_wr           = be32_to_cpu(reply->max_qp_depth);
+	props->device_cap_flags    = c2dev->device_cap_flags;
+	props->max_sge             = C2_MAX_SGES;
+	props->max_sge_rd          = C2_MAX_SGE_RD;
+	props->max_cq              = be32_to_cpu(reply->max_cqs);
+	props->max_cqe             = be32_to_cpu(reply->max_cq_depth);
+	props->max_mr              = be32_to_cpu(reply->max_mrs);
+	props->max_pd              = be32_to_cpu(reply->max_pds);
+	props->max_qp_rd_atom      = be32_to_cpu(reply->max_qp_ird);
+	props->max_ee_rd_atom      = 0;
+	props->max_res_rd_atom     = be32_to_cpu(reply->max_global_ird);
+	props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+	props->max_ee_init_rd_atom = 0;
+	props->atomic_cap          = IB_ATOMIC_NONE;
+	props->max_ee              = 0;
+	props->max_rdd             = 0;
+	props->max_mw              = be32_to_cpu(reply->max_mws);
+	props->max_raw_ipv6_qp     = 0;
+	props->max_raw_ethy_qp     = 0;
+	props->max_mcast_grp       = 0;
+	props->max_mcast_qp_attach = 0;
+	props->max_total_mcast_qp_attach = 0;
+	props->max_ah              = 0;
+	props->max_fmr             = 0;
+	props->max_map_per_fmr     = 0;
+	props->max_srq             = 0;
+	props->max_srq_wr          = 0;
+	props->max_srq_sge         = 0;
+	props->max_pkeys           = 0;
+	props->local_ca_ack_delay  = 0;
+
+ bail2:
+	vq_repbuf_free(c2dev, reply);
+
+ bail1:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+{
+	struct c2_vq_req *vq_req;
+	struct c2wr_rnic_setconfig_req *wr;
+	struct c2wr_rnic_setconfig_rep *reply;
+	struct c2_netaddr netaddr;
+	int err, len;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (!vq_req)
+		return -ENOMEM;
+
+	len = sizeof(struct c2_netaddr);
+	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+	if (!wr) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+	wr->hdr.context = (unsigned long) vq_req;
+	wr->rnic_handle = c2dev->adapter_handle;
+	wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+	netaddr.ip_addr = inaddr;
+	netaddr.netmask = inmask;
+	netaddr.mtu = 0;
+
+	memcpy(wr->data, &netaddr, len);
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, (union c2wr *) wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail1;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err)
+		goto bail1;
+
+	reply =
+	    (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	err = c2_errno(reply);
+	vq_repbuf_free(c2dev, reply);
+
+      bail1:
+	kfree(wr);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_open_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+	wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+	wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+	wr.rnic_open.req.port_num = cpu_to_be16(0);
+	wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = reply->rnic_handle;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *vq_req;
+	union c2wr wr;
+	struct c2wr_rnic_close_rep *reply;
+	int err;
+
+	vq_req = vq_req_alloc(c2dev);
+	if (vq_req == NULL) {
+		return -ENOMEM;
+	}
+
+	memset(&wr, 0, sizeof(wr));
+	c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+	wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+	wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+	vq_req_get(c2dev, vq_req);
+
+	err = vq_send_wr(c2dev, &wr);
+	if (err) {
+		vq_req_put(c2dev, vq_req);
+		goto bail0;
+	}
+
+	err = vq_wait_for_reply(c2dev, vq_req);
+	if (err) {
+		goto bail0;
+	}
+
+	reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+	if (!reply) {
+		err = -ENOMEM;
+		goto bail0;
+	}
+
+	if ((err = c2_errno(reply)) != 0) {
+		goto bail1;
+	}
+
+	c2dev->adapter_handle = 0;
+
+      bail1:
+	vq_repbuf_free(c2dev, reply);
+      bail0:
+	vq_req_free(c2dev, vq_req);
+	return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int c2_rnic_init(struct c2_dev *c2dev)
+{
+	int err;
+	u32 qsize, msgsize;
+	void *q1_pages;
+	void *q2_pages;
+	void __iomem *mmio_regs;
+
+	/* Device capabilities */
+	c2dev->device_cap_flags =
+	    (IB_DEVICE_RESIZE_MAX_WR |
+	     IB_DEVICE_CURR_QP_STATE_MOD |
+	     IB_DEVICE_SYS_IMAGE_GUID |
+	     IB_DEVICE_ZERO_STAG |
+	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+	/* Allocate the qptr_array */
+	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+	if (!c2dev->qptr_array) {
+		return -ENOMEM;
+	}
+
+	/* Inialize the qptr_array */
+	memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+	/* Initialize data structures */
+	init_waitqueue_head(&c2dev->req_vq_wo);
+	spin_lock_init(&c2dev->vqlock);
+	spin_lock_init(&c2dev->lock);
+
+	/* Allocate MQ shared pointer pool for kernel clients. User
+	 * mode client pools are hung off the user context
+	 */
+	err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+	if (err) {
+		goto bail0;
+	}
+
+	/* Allocate shared pointers for Q0, Q1, and Q2 from
+	 * the shared pointer pool.
+	 */
+
+	c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->hint_count_dma,
+					     GFP_KERNEL);
+	c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->req_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					     &c2dev->rep_vq.shared_dma,
+					     GFP_KERNEL);
+	c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+					  &c2dev->aeq.shared_dma, GFP_KERNEL);
+	if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+	    !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+
+	mmio_regs = c2dev->kva;
+	/* Initialize the Verbs Request Queue */
+	c2_mq_req_init(&c2dev->req_vq, 0,
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+		       C2_MQ_ADAPTER_TARGET);
+
+	/* Initialize the Verbs Reply Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+	q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q1_pages) {
+		err = -ENOMEM;
+		goto bail1;
+	}
+	c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q1_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->rep_vq,
+		   1,
+		   qsize,
+		   msgsize,
+		   q1_pages,
+		   mmio_regs +
+		   be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+		   C2_MQ_HOST_TARGET);
+
+	/* Initialize the Asynchronus Event Queue */
+	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
+	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+	q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
+	if (!q2_pages) {
+		err = -ENOMEM;
+		goto bail2;
+	}
+	c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
+					        (void *)q2_pages, qsize * msgsize,
+				      		DMA_FROM_DEVICE);
+	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
+		 (u64)c2dev->rep_vq.host_dma);
+	c2_mq_rep_init(&c2dev->aeq,
+		       2,
+		       qsize,
+		       msgsize,
+		       q2_pages,
+		       mmio_regs +
+		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+		       C2_MQ_HOST_TARGET);
+
+	/* Initialize the verbs request allocator */
+	err = vq_init(c2dev);
+	if (err)
+		goto bail3;
+
+	/* Enable interrupts on the adapter */
+	writel(0, c2dev->regs + C2_IDIS);
+
+	/* create the WR init message */
+	err = c2_adapter_init(c2dev);
+	if (err)
+		goto bail4;
+	c2dev->init++;
+
+	/* open an adapter instance */
+	err = c2_rnic_open(c2dev);
+	if (err)
+		goto bail4;
+
+	/* Initialize cached the adapter limits */
+	if (c2_rnic_query(c2dev, &c2dev->props))
+		goto bail5;
+
+	/* Initialize the PD pool */
+	err = c2_init_pd_table(c2dev);
+	if (err)
+		goto bail5;
+
+	/* Initialize the QP pool */
+	c2_init_qp_table(c2dev);
+	return 0;
+
+      bail5:
+	c2_rnic_close(c2dev);
+      bail4:
+	vq_term(c2dev);
+      bail3:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q2_pages);
+      bail2:
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(q1_pages);
+      bail1:
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+      bail0:
+	vfree(c2dev->qptr_array);
+
+	return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void c2_rnic_term(struct c2_dev *c2dev)
+{
+
+	/* Close the open adapter instance */
+	c2_rnic_close(c2dev);
+
+	/* Send the TERM message to the adapter */
+	c2_adapter_term(c2dev);
+
+	/* Disable interrupts on the adapter */
+	writel(1, c2dev->regs + C2_IDIS);
+
+	/* Free the QP pool */
+	c2_cleanup_qp_table(c2dev);
+
+	/* Free the PD pool */
+	c2_cleanup_pd_table(c2dev);
+
+	/* Free the verbs request allocator */
+	vq_term(c2dev);
+
+	/* Unmap and free the asynchronus event queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->aeq, mapping),
+			 c2dev->aeq.q_size * c2dev->aeq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->aeq.msg_pool.host);
+
+	/* Unmap and free the verbs reply queue */
+	dma_unmap_single(c2dev->ibdev.dma_device,
+			 pci_unmap_addr(&c2dev->rep_vq, mapping),
+			 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+		  	 DMA_FROM_DEVICE);
+	kfree(c2dev->rep_vq.msg_pool.host);
+
+	/* Free the MQ shared pointer pool */
+	c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+	/* Free the qptr_array */
+	vfree(c2dev->qptr_array);
+
+	return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h
new file mode 100644
index 0000000..6ee4aa9
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_status.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef	_C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+	C2_OK = 0,		/* This must be zero */
+	CCERR_INSUFFICIENT_RESOURCES = 1,
+	CCERR_INVALID_MODIFIER = 2,
+	CCERR_INVALID_MODE = 3,
+	CCERR_IN_USE = 4,
+	CCERR_INVALID_RNIC = 5,
+	CCERR_INTERRUPTED_OPERATION = 6,
+	CCERR_INVALID_EH = 7,
+	CCERR_INVALID_CQ = 8,
+	CCERR_CQ_EMPTY = 9,
+	CCERR_NOT_IMPLEMENTED = 10,
+	CCERR_CQ_DEPTH_TOO_SMALL = 11,
+	CCERR_PD_IN_USE = 12,
+	CCERR_INVALID_PD = 13,
+	CCERR_INVALID_SRQ = 14,
+	CCERR_INVALID_ADDRESS = 15,
+	CCERR_INVALID_NETMASK = 16,
+	CCERR_INVALID_QP = 17,
+	CCERR_INVALID_QP_STATE = 18,
+	CCERR_TOO_MANY_WRS_POSTED = 19,
+	CCERR_INVALID_WR_TYPE = 20,
+	CCERR_INVALID_SGL_LENGTH = 21,
+	CCERR_INVALID_SQ_DEPTH = 22,
+	CCERR_INVALID_RQ_DEPTH = 23,
+	CCERR_INVALID_ORD = 24,
+	CCERR_INVALID_IRD = 25,
+	CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+	CCERR_INVALID_STAG = 27,
+	CCERR_QP_IN_USE = 28,
+	CCERR_OUTSTANDING_WRS = 29,
+	CCERR_STAG_IN_USE = 30,
+	CCERR_INVALID_STAG_INDEX = 31,
+	CCERR_INVALID_SGL_FORMAT = 32,
+	CCERR_ADAPTER_TIMEOUT = 33,
+	CCERR_INVALID_CQ_DEPTH = 34,
+	CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+	CCERR_INVALID_EP = 36,
+	CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+	CCERR_FLUSHED = 38,
+	CCERR_INVALID_WQE = 39,
+	CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+	CCERR_REMOTE_TERMINATION_ERROR = 41,
+	CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+	CCERR_ACCESS_VIOLATION = 43,
+	CCERR_INVALID_PD_ID = 44,
+	CCERR_WRAP_ERROR = 45,
+	CCERR_INV_STAG_ACCESS_ERROR = 46,
+	CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+	CCERR_QP_NOT_PRIVILEGED = 48,
+	CCERR_STAG_STATE_NOT_INVALID = 49,
+	CCERR_INVALID_PAGE_SIZE = 50,
+	CCERR_INVALID_BUFFER_SIZE = 51,
+	CCERR_INVALID_PBE = 52,
+	CCERR_INVALID_FBO = 53,
+	CCERR_INVALID_LENGTH = 54,
+	CCERR_INVALID_ACCESS_RIGHTS = 55,
+	CCERR_PBL_TOO_BIG = 56,
+	CCERR_INVALID_VA = 57,
+	CCERR_INVALID_REGION = 58,
+	CCERR_INVALID_WINDOW = 59,
+	CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+	CCERR_INVALID_QP_ID = 61,
+	CCERR_ADDR_IN_USE = 62,
+	CCERR_ADDR_NOT_AVAIL = 63,
+	CCERR_NET_DOWN = 64,
+	CCERR_NET_UNREACHABLE = 65,
+	CCERR_CONN_ABORTED = 66,
+	CCERR_CONN_RESET = 67,
+	CCERR_NO_BUFS = 68,
+	CCERR_CONN_TIMEDOUT = 69,
+	CCERR_CONN_REFUSED = 70,
+	CCERR_HOST_UNREACHABLE = 71,
+	CCERR_INVALID_SEND_SGL_DEPTH = 72,
+	CCERR_INVALID_RECV_SGL_DEPTH = 73,
+	CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+	CCERR_INSUFFICIENT_PRIVILEGES = 75,
+	CCERR_STACK_ERROR = 76,
+	CCERR_INVALID_VERSION = 77,
+	CCERR_INVALID_MTU = 78,
+	CCERR_INVALID_IMAGE = 79,
+	CCERR_PENDING = 98,	/* not an error; user internally by adapter */
+	CCERR_DEFER = 99,	/* not an error; used internally by adapter */
+	CCERR_FAILED_WRITE = 100,
+	CCERR_FAILED_ERASE = 101,
+	CCERR_FAILED_VERIFICATION = 102,
+	CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+	C2_CONN_STATUS_SUCCESS = C2_OK,
+	C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+	C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+	C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+	C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+	C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+	C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+	C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+	C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+	C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+	C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+	C2_FLASH_STATUS_SUCCESS = 0x0000,
+	C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+	C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+	C2_FLASH_STATUS_ECLBS = 0x0400,
+	C2_FLASH_STATUS_PSLBS = 0x0800,
+	C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif				/* _C2_STATUS_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h
new file mode 100644
index 0000000..7e9e7ad
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_user.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+	__u32 qp_tab_size;
+	__u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+	__u32 pdn;
+	__u32 reserved;
+};
+
+struct c2_create_cq {
+	__u32 lkey;
+	__u32 pdn;
+	__u64 arm_db_page;
+	__u64 set_db_page;
+	__u32 arm_db_index;
+	__u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+	__u32 cqn;
+	__u32 reserved;
+};
+
+struct c2_create_qp {
+	__u32 lkey;
+	__u32 reserved;
+	__u64 sq_db_page;
+	__u64 rq_db_page;
+	__u32 sq_db_index;
+	__u32 rq_db_index;
+};
+
+#endif				/* C2_USER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
new file mode 100644
index 0000000..40caeb5
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message.  The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ * 	1) append a copy of the verbs reply message
+ * 	2) mark that the reply is ready
+ * 	3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object.  If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed.  The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE:  If we guarantee that the kernel verb handler will never bail before
+ *        getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message.  The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler.  The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+	sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+		(char) ('0' + c2dev->devnum));
+	c2dev->host_msg_cache =
+	    kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+			      SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (c2dev->host_msg_cache == NULL) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+	kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+	struct c2_vq_req *r;
+
+	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+	if (r) {
+		init_waitqueue_head(&r->wait_object);
+		r->reply_msg = (u64) NULL;
+		r->event = 0;
+		r->cm_id = NULL;
+		r->qp = NULL;
+		atomic_set(&r->refcnt, 1);
+		atomic_set(&r->reply_ready, 0);
+	}
+	return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object.  It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	r->reply_msg = (u64) NULL;
+	if (atomic_dec_and_test(&r->refcnt)) {
+		kfree(r);
+	}
+}
+
+/* vq_req_get - reference a VQ Request Object.  Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message.  If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+	if (atomic_dec_and_test(&r->refcnt)) {
+		if (r->reply_msg != (u64) NULL)
+			vq_repbuf_free(c2dev,
+				       (void *) (unsigned long) r->reply_msg);
+		kfree(r);
+	}
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+	return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+	void *msg;
+	wait_queue_t __wait;
+
+	/*
+	 * grab adapter vq lock
+	 */
+	spin_lock(&c2dev->vqlock);
+
+	/*
+	 * allocate msg
+	 */
+	msg = c2_mq_alloc(&c2dev->req_vq);
+
+	/*
+	 * If we cannot get a msg, then we'll wait
+	 * When a messages are available, the int handler will wake_up()
+	 * any waiters.
+	 */
+	while (msg == NULL) {
+		pr_debug("%s:%d no available msg in VQ, waiting...\n",
+		       __FUNCTION__, __LINE__);
+		init_waitqueue_entry(&__wait, current);
+		add_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_unlock(&c2dev->vqlock);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!c2_mq_full(&c2dev->req_vq)) {
+				break;
+			}
+			if (!signal_pending(current)) {
+				schedule_timeout(1 * HZ);	/* 1 second... */
+				continue;
+			}
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+			return -EINTR;
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+		spin_lock(&c2dev->vqlock);
+		msg = c2_mq_alloc(&c2dev->req_vq);
+	}
+
+	/*
+	 * copy wr into adapter msg
+	 */
+	memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+	/*
+	 * post msg
+	 */
+	c2_mq_produce(&c2dev->req_vq);
+
+	/*
+	 * release adapter vq lock
+	 */
+	spin_unlock(&c2dev->vqlock);
+	return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+	if (!wait_event_timeout(req->wait_object,
+				atomic_read(&req->reply_ready),
+				60*HZ))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+	kmem_cache_free(c2dev->host_msg_cache, reply);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h
new file mode 100644
index 0000000..3380562
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+	u64 reply_msg;		/* ptr to reply msg */
+	wait_queue_head_t wait_object;	/* wait object for vq reqs */
+	atomic_t reply_ready;	/* set when reply is ready */
+	atomic_t refcnt;	/* used to cancel WRs... */
+	int event;
+	struct iw_cm_id *cm_id;
+	struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif				/* _C2_VQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
new file mode 100644
index 0000000..3ec6c43
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_WR_H_
+#define _C2_WR_H_
+
+#ifdef CCDEBUG
+#define CCWR_MAGIC		0xb07700b0
+#endif
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+/* Maximum allowed size in bytes of private_data exchange
+ * on connect.
+ */
+#define C2_MAX_PRIVATE_DATA_SIZE 200
+
+/*
+ * These types are shared among the adapter, host, and CCIL consumer.
+ */
+enum c2_cq_notification_type {
+	C2_CQ_NOTIFICATION_TYPE_NONE = 1,
+	C2_CQ_NOTIFICATION_TYPE_NEXT,
+	C2_CQ_NOTIFICATION_TYPE_NEXT_SE
+};
+
+enum c2_setconfig_cmd {
+	C2_CFG_ADD_ADDR = 1,
+	C2_CFG_DEL_ADDR = 2,
+	C2_CFG_ADD_ROUTE = 3,
+	C2_CFG_DEL_ROUTE = 4
+};
+
+enum c2_getconfig_cmd {
+	C2_GETCONFIG_ROUTES = 1,
+	C2_GETCONFIG_ADDRS
+};
+
+/*
+ *  CCIL Work Request Identifiers
+ */
+enum c2wr_ids {
+	CCWR_RNIC_OPEN = 1,
+	CCWR_RNIC_QUERY,
+	CCWR_RNIC_SETCONFIG,
+	CCWR_RNIC_GETCONFIG,
+	CCWR_RNIC_CLOSE,
+	CCWR_CQ_CREATE,
+	CCWR_CQ_QUERY,
+	CCWR_CQ_MODIFY,
+	CCWR_CQ_DESTROY,
+	CCWR_QP_CONNECT,
+	CCWR_PD_ALLOC,
+	CCWR_PD_DEALLOC,
+	CCWR_SRQ_CREATE,
+	CCWR_SRQ_QUERY,
+	CCWR_SRQ_MODIFY,
+	CCWR_SRQ_DESTROY,
+	CCWR_QP_CREATE,
+	CCWR_QP_QUERY,
+	CCWR_QP_MODIFY,
+	CCWR_QP_DESTROY,
+	CCWR_NSMR_STAG_ALLOC,
+	CCWR_NSMR_REGISTER,
+	CCWR_NSMR_PBL,
+	CCWR_STAG_DEALLOC,
+	CCWR_NSMR_REREGISTER,
+	CCWR_SMR_REGISTER,
+	CCWR_MR_QUERY,
+	CCWR_MW_ALLOC,
+	CCWR_MW_QUERY,
+	CCWR_EP_CREATE,
+	CCWR_EP_GETOPT,
+	CCWR_EP_SETOPT,
+	CCWR_EP_DESTROY,
+	CCWR_EP_BIND,
+	CCWR_EP_CONNECT,
+	CCWR_EP_LISTEN,
+	CCWR_EP_SHUTDOWN,
+	CCWR_EP_LISTEN_CREATE,
+	CCWR_EP_LISTEN_DESTROY,
+	CCWR_EP_QUERY,
+	CCWR_CR_ACCEPT,
+	CCWR_CR_REJECT,
+	CCWR_CONSOLE,
+	CCWR_TERM,
+	CCWR_FLASH_INIT,
+	CCWR_FLASH,
+	CCWR_BUF_ALLOC,
+	CCWR_BUF_FREE,
+	CCWR_FLASH_WRITE,
+	CCWR_INIT,		/* WARNING: Don't move this ever again! */
+
+
+
+	/* Add new IDs here */
+
+
+
+	/*
+	 * WARNING: CCWR_LAST must always be the last verbs id defined!
+	 *          All the preceding IDs are fixed, and must not change.
+	 *          You can add new IDs, but must not remove or reorder
+	 *          any IDs. If you do, YOU will ruin any hope of
+	 *          compatability between versions.
+	 */
+	CCWR_LAST,
+
+	/*
+	 * Start over at 1 so that arrays indexed by user wr id's
+	 * begin at 1.  This is OK since the verbs and user wr id's
+	 * are always used on disjoint sets of queues.
+	 */
+	/*
+	 * The order of the CCWR_SEND_XX verbs must
+	 * match the order of the RDMA_OPs
+	 */
+	CCWR_SEND = 1,
+	CCWR_SEND_INV,
+	CCWR_SEND_SE,
+	CCWR_SEND_SE_INV,
+	CCWR_RDMA_WRITE,
+	CCWR_RDMA_READ,
+	CCWR_RDMA_READ_INV,
+	CCWR_MW_BIND,
+	CCWR_NSMR_FASTREG,
+	CCWR_STAG_INVALIDATE,
+	CCWR_RECV,
+	CCWR_NOP,
+	CCWR_UNIMPL,
+/* WARNING: This must always be the last user wr id defined! */
+};
+#define RDMA_SEND_OPCODE_FROM_WR_ID(x)   (x+2)
+
+/*
+ * SQ/RQ Work Request Types
+ */
+enum c2_wr_type {
+	C2_WR_TYPE_SEND = CCWR_SEND,
+	C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
+	C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
+	C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
+	C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
+	C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
+	C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
+	C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
+	C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
+	C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
+	C2_WR_TYPE_RECV = CCWR_RECV,
+	C2_WR_TYPE_NOP = CCWR_NOP,
+};
+
+struct c2_netaddr {
+	u32 ip_addr;
+	u32 netmask;
+	u32 mtu;
+};
+
+struct c2_route {
+	u32 ip_addr;		/* 0 indicates the default route */
+	u32 netmask;		/* netmask associated with dst */
+	u32 flags;
+	union {
+		u32 ipaddr;	/* address of the nexthop interface */
+		u8 enaddr[6];
+	} nexthop;
+};
+
+/*
+ * A Scatter Gather Entry.
+ */
+struct c2_data_addr {
+	u32 stag;
+	u32 length;
+	u64 to;
+};
+
+/*
+ * MR and MW flags used by the consumer, RI, and RNIC.
+ */
+enum c2_mm_flags {
+	MEM_REMOTE = 0x0001,	/* allow mw binds with remote access. */
+	MEM_VA_BASED = 0x0002,	/* Not Zero-based */
+	MEM_PBL_COMPLETE = 0x0004,	/* PBL array is complete in this msg */
+	MEM_LOCAL_READ = 0x0008,	/* allow local reads */
+	MEM_LOCAL_WRITE = 0x0010,	/* allow local writes */
+	MEM_REMOTE_READ = 0x0020,	/* allow remote reads */
+	MEM_REMOTE_WRITE = 0x0040,	/* allow remote writes */
+	MEM_WINDOW_BIND = 0x0080,	/* binds allowed */
+	MEM_SHARED = 0x0100,	/* set if MR is shared */
+	MEM_STAG_VALID = 0x0200	/* set if STAG is in valid state */
+};
+
+/*
+ * CCIL API ACF flags defined in terms of the low level mem flags.
+ * This minimizes translation needed in the user API
+ */
+enum c2_acf {
+	C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
+	C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
+	C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
+	C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
+	C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
+};
+
+/*
+ * Image types of objects written to flash
+ */
+#define C2_FLASH_IMG_BITFILE 1
+#define C2_FLASH_IMG_OPTION_ROM 2
+#define C2_FLASH_IMG_VPD 3
+
+/*
+ *  to fix bug 1815 we define the max size allowable of the
+ *  terminate message (per the IETF spec).Refer to the IETF
+ *  protocal specification, section 12.1.6, page 64)
+ *  The message is prefixed by 20 types of DDP info.
+ *
+ *  Then the message has 6 bytes for the terminate control
+ *  and DDP segment length info plus a DDP header (either
+ *  14 or 18 byts) plus 28 bytes for the RDMA header.
+ *  Thus the max size in:
+ *  20 + (6 + 18 + 28) = 72
+ */
+#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
+
+/*
+ * Build String Length.  It must be the same as C2_BUILD_STR_LEN in ccil_api.h
+ */
+#define WR_BUILD_STR_LEN 64
+
+/*
+ * WARNING:  All of these structs need to align any 64bit types on
+ * 64 bit boundaries!  64bit types include u64 and u64.
+ */
+
+/*
+ * Clustercore Work Request Header.  Be sensitive to field layout
+ * and alignment.
+ */
+struct c2wr_hdr {
+	/* wqe_count is part of the cqe.  It is put here so the
+	 * adapter can write to it while the wr is pending without
+	 * clobbering part of the wr.  This word need not be dma'd
+	 * from the host to adapter by libccil, but we copy it anyway
+	 * to make the memcpy to the adapter better aligned.
+	 */
+	u32 wqe_count;
+
+	/* Put these fields next so that later 32- and 64-bit
+	 * quantities are naturally aligned.
+	 */
+	u8 id;
+	u8 result;		/* adapter -> host */
+	u8 sge_count;		/* host -> adapter */
+	u8 flags;		/* host -> adapter */
+
+	u64 context;
+#ifdef CCMSGMAGIC
+	u32 magic;
+	u32 pad;
+#endif
+} __attribute__((packed));
+
+/*
+ *------------------------ RNIC ------------------------
+ */
+
+/*
+ * WR_RNIC_OPEN
+ */
+
+/*
+ * Flags for the RNIC WRs
+ */
+enum c2_rnic_flags {
+	RNIC_IRD_STATIC = 0x0001,
+	RNIC_ORD_STATIC = 0x0002,
+	RNIC_QP_STATIC = 0x0004,
+	RNIC_SRQ_SUPPORTED = 0x0008,
+	RNIC_PBL_BLOCK_MODE = 0x0010,
+	RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
+	RNIC_CQ_OVF_DETECTED = 0x0040,
+	RNIC_PRIV_MODE = 0x0080
+};
+
+struct c2wr_rnic_open_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u16 flags;		/* See enum c2_rnic_flags */
+	u16 port_num;
+} __attribute__((packed));
+
+struct c2wr_rnic_open_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+union c2wr_rnic_open {
+	struct c2wr_rnic_open_req req;
+	struct c2wr_rnic_open_rep rep;
+} __attribute__((packed));
+
+struct c2wr_rnic_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_QUERY
+ */
+struct c2wr_rnic_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 vendor_id;
+	u32 part_number;
+	u32 hw_version;
+	u32 fw_ver_major;
+	u32 fw_ver_minor;
+	u32 fw_ver_patch;
+	char fw_ver_build_str[WR_BUILD_STR_LEN];
+	u32 max_qps;
+	u32 max_qp_depth;
+	u32 max_srq_depth;
+	u32 max_send_sgl_depth;
+	u32 max_rdma_sgl_depth;
+	u32 max_cqs;
+	u32 max_cq_depth;
+	u32 max_cq_event_handlers;
+	u32 max_mrs;
+	u32 max_pbl_depth;
+	u32 max_pds;
+	u32 max_global_ird;
+	u32 max_global_ord;
+	u32 max_qp_ird;
+	u32 max_qp_ord;
+	u32 flags;
+	u32 max_mws;
+	u32 pbe_range_low;
+	u32 pbe_range_high;
+	u32 max_srqs;
+	u32 page_size;
+} __attribute__((packed));
+
+union c2wr_rnic_query {
+	struct c2wr_rnic_query_req req;
+	struct c2wr_rnic_query_rep rep;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_GETCONFIG
+ */
+
+struct c2wr_rnic_getconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u64 reply_buf;
+	u32 reply_buf_len;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_getconfig_rep {
+	struct c2wr_hdr hdr;
+	u32 option;		/* see c2_getconfig_cmd_t */
+	u32 count_len;		/* length of the number of addresses configured */
+} __attribute__((packed)) ;
+
+union c2wr_rnic_getconfig {
+	struct c2wr_rnic_getconfig_req req;
+	struct c2wr_rnic_getconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_SETCONFIG
+ */
+struct c2wr_rnic_setconfig_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 option;		/* See c2_setconfig_cmd_t */
+	/* variable data and pad. See c2_netaddr and c2_route */
+	u8 data[0];
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_setconfig_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_setconfig {
+	struct c2wr_rnic_setconfig_req req;
+	struct c2wr_rnic_setconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_CLOSE
+ */
+struct c2wr_rnic_close_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_close_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_close {
+	struct c2wr_rnic_close_req req;
+	struct c2wr_rnic_close_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ CQ ------------------------
+ */
+struct c2wr_cq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u64 msg_pool;
+	u32 rnic_handle;
+	u32 msg_size;
+	u32 depth;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 mq_index;
+	u32 adapter_shared;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_cq_create {
+	struct c2wr_cq_create_req req;
+	struct c2wr_cq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+	u32 new_depth;
+	u64 new_msg_pool;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_modify {
+	struct c2wr_cq_modify_req req;
+	struct c2wr_cq_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 cq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_destroy {
+	struct c2wr_cq_destroy_req req;
+	struct c2wr_cq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ PD ------------------------
+ */
+struct c2wr_pd_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_alloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_alloc {
+	struct c2wr_pd_alloc_req req;
+	struct c2wr_pd_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_dealloc {
+	struct c2wr_pd_dealloc_req req;
+	struct c2wr_pd_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ SRQ ------------------------
+ */
+struct c2wr_srq_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 srq_depth;
+	u32 srq_limit;
+	u32 sgl_depth;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_create_rep {
+	struct c2wr_hdr hdr;
+	u32 srq_depth;
+	u32 sgl_depth;
+	u32 msg_size;
+	u32 mq_index;
+	u32 mq_start;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_srq_create {
+	struct c2wr_srq_create_req req;
+	struct c2wr_srq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 srq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_srq_destroy {
+	struct c2wr_srq_destroy_req req;
+	struct c2wr_srq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ QP ------------------------
+ */
+enum c2wr_qp_flags {
+	QP_RDMA_READ = 0x00000001,	/* RDMA read enabled? */
+	QP_RDMA_WRITE = 0x00000002,	/* RDMA write enabled? */
+	QP_MW_BIND = 0x00000004,	/* MWs enabled */
+	QP_ZERO_STAG = 0x00000008,	/* enabled? */
+	QP_REMOTE_TERMINATION = 0x00000010,	/* remote end terminated */
+	QP_RDMA_READ_RESPONSE = 0x00000020	/* Remote RDMA read  */
+	    /* enabled? */
+};
+
+struct c2wr_qp_create_req {
+	struct c2wr_hdr hdr;
+	u64 shared_sq_ht;
+	u64 shared_rq_ht;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_cq_handle;
+	u32 rq_cq_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 srq_handle;
+	u32 srq_limit;
+	u32 flags;		/* see enum c2wr_qp_flags */
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_create_rep {
+	struct c2wr_hdr hdr;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+union c2wr_qp_create {
+	struct c2wr_qp_create_req req;
+	struct c2wr_qp_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_rep {
+	struct c2wr_hdr hdr;
+	u64 user_context;
+	u32 rnic_handle;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 send_sgl_depth;
+	u32 rdma_write_sgl_depth;
+	u32 recv_sgl_depth;
+	u32 ord;
+	u32 ird;
+	u16 qp_state;
+	u16 flags;		/* see c2wr_qp_flags_t */
+	u32 qp_id;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+	u32 terminate_msg_length;	/* 0 if not present */
+	u8 data[0];
+	/* Terminate Message in-line here. */
+} __attribute__((packed)) ;
+
+union c2wr_qp_query {
+	struct c2wr_qp_query_req req;
+	struct c2wr_qp_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_req {
+	struct c2wr_hdr hdr;
+	u64 stream_msg;
+	u32 stream_msg_length;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 next_qp_state;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 llp_ep_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_rep {
+	struct c2wr_hdr hdr;
+	u32 ord;
+	u32 ird;
+	u32 sq_depth;
+	u32 rq_depth;
+	u32 sq_msg_size;
+	u32 sq_mq_index;
+	u32 sq_mq_start;
+	u32 rq_msg_size;
+	u32 rq_mq_index;
+	u32 rq_mq_start;
+} __attribute__((packed)) ;
+
+union c2wr_qp_modify {
+	struct c2wr_qp_modify_req req;
+	struct c2wr_qp_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_qp_destroy {
+	struct c2wr_qp_destroy_req req;
+	struct c2wr_qp_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * The CCWR_QP_CONNECT msg is posted on the verbs request queue.  It can
+ * only be posted when a QP is in IDLE state.  After the connect request is
+ * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
+ * No synchronous reply from adapter to this WR.  The results of
+ * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
+ * See c2wr_ae_active_connect_results_t
+ */
+struct c2wr_qp_connect_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;
+	u32 remote_addr;
+	u16 remote_port;
+	u16 pad;
+	u32 private_data_length;
+	u8 private_data[0];	/* Private data in-line. */
+} __attribute__((packed)) ;
+
+struct c2wr_qp_connect {
+	struct c2wr_qp_connect_req req;
+	/* no synchronous reply.         */
+} __attribute__((packed)) ;
+
+
+/*
+ *------------------------ MM ------------------------
+ */
+
+struct c2wr_nsmr_stag_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pbl_depth;
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_stag_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_stag_alloc {
+	struct c2wr_nsmr_stag_alloc_req req;
+	struct c2wr_nsmr_stag_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_register {
+	struct c2wr_nsmr_register_req req;
+	struct c2wr_nsmr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 flags;
+	u32 stag_index;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_pbl {
+	struct c2wr_nsmr_pbl_req req;
+	struct c2wr_nsmr_pbl_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+	u32 pbl_depth;
+} __attribute__((packed)) ;
+
+union c2wr_mr_query {
+	struct c2wr_mr_query_req req;
+	struct c2wr_mr_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_rep {
+	struct c2wr_hdr hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 pd_id;
+	u32 flags;
+} __attribute__((packed)) ;
+
+union c2wr_mw_query {
+	struct c2wr_mw_query_req req;
+	struct c2wr_mw_query_rep rep;
+} __attribute__((packed)) ;
+
+
+struct c2wr_stag_dealloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_stag_dealloc_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_stag_dealloc {
+	struct c2wr_stag_dealloc_req req;
+	struct c2wr_stag_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+	u32 pbl_depth;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	u32 pad1;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_rep {
+	struct c2wr_hdr hdr;
+	u32 pbl_depth;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_reregister {
+	struct c2wr_nsmr_reregister_req req;
+	struct c2wr_nsmr_reregister_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_req {
+	struct c2wr_hdr hdr;
+	u64 va;
+	u32 rnic_handle;
+	u16 flags;
+	u8 stag_key;
+	u8 pad;
+	u32 stag_index;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_smr_register {
+	struct c2wr_smr_register_req req;
+	struct c2wr_smr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_mw_alloc {
+	struct c2wr_mw_alloc_req req;
+	struct c2wr_mw_alloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ WRs -----------------------
+ */
+
+struct c2wr_user_hdr {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+} __attribute__((packed)) ;
+
+enum c2_qp_state {
+	C2_QP_STATE_IDLE = 0x01,
+	C2_QP_STATE_CONNECTING = 0x02,
+	C2_QP_STATE_RTS = 0x04,
+	C2_QP_STATE_CLOSING = 0x08,
+	C2_QP_STATE_TERMINATE = 0x10,
+	C2_QP_STATE_ERROR = 0x20,
+};
+
+/* Completion queue entry. */
+struct c2wr_ce {
+	struct c2wr_hdr hdr;		/* Has status and WR Type */
+	u64 qp_user_context;	/* c2_user_qp_t * */
+	u32 qp_state;		/* Current QP State */
+	u32 handle;		/* QPID or EP Handle */
+	u32 bytes_rcvd;		/* valid for RECV WCs */
+	u32 stag;
+} __attribute__((packed)) ;
+
+
+/*
+ * Flags used for all post-sq WRs.  These must fit in the flags
+ * field of the struct c2wr_hdr (eight bits).
+ */
+enum {
+	SQ_SIGNALED = 0x01,
+	SQ_READ_FENCE = 0x02,
+	SQ_FENCE = 0x04,
+};
+
+/*
+ * Common fields for all post-sq WRs.  Namely the standard header and a
+ * secondary header with fields common to all post-sq WRs.
+ */
+struct c2_sq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * Same as above but for post-rq WRs.
+ */
+struct c2_rq_hdr {
+	struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * use the same struct for all sends.
+ */
+struct c2wr_send_req {
+	struct c2_sq_hdr sq_hdr;
+	u32 sge_len;
+	u32 remote_stag;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_send {
+	struct c2wr_send_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_write_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 remote_to;
+	u32 remote_stag;
+	u32 sge_len;
+	u8 data[0];		/* SGE array */
+} __attribute__((packed));
+
+union c2wr_rdma_write {
+	struct c2wr_rdma_write_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_read_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 local_to;
+	u64 remote_to;
+	u32 local_stag;
+	u32 remote_stag;
+	u32 length;
+} __attribute__((packed));
+
+union c2wr_rdma_read {
+	struct c2wr_rdma_read_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_mw_bind_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 mw_stag_index;
+	u32 mr_stag_index;
+	u32 length;
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_mw_bind {
+	struct c2wr_mw_bind_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_nsmr_fastreg_req {
+	struct c2_sq_hdr sq_hdr;
+	u64 va;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+	u32 pbe_size;
+	u32 fbo;
+	u32 length;
+	u32 addrs_length;
+	/* array of paddrs (must be aligned on a 64bit boundary) */
+	u64 paddrs[0];
+} __attribute__((packed));
+
+union c2wr_nsmr_fastreg {
+	struct c2wr_nsmr_fastreg_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_stag_invalidate_req {
+	struct c2_sq_hdr sq_hdr;
+	u8 stag_key;
+	u8 pad[3];
+	u32 stag_index;
+} __attribute__((packed));
+
+union c2wr_stag_invalidate {
+	struct c2wr_stag_invalidate_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+union c2wr_sqwr {
+	struct c2_sq_hdr sq_hdr;
+	struct c2wr_send_req send;
+	struct c2wr_send_req send_se;
+	struct c2wr_send_req send_inv;
+	struct c2wr_send_req send_se_inv;
+	struct c2wr_rdma_write_req rdma_write;
+	struct c2wr_rdma_read_req rdma_read;
+	struct c2wr_mw_bind_req mw_bind;
+	struct c2wr_nsmr_fastreg_req nsmr_fastreg;
+	struct c2wr_stag_invalidate_req stag_inv;
+} __attribute__((packed));
+
+
+/*
+ * RQ WRs
+ */
+struct c2wr_rqwr {
+	struct c2_rq_hdr rq_hdr;
+	u8 data[0];		/* array of SGEs */
+} __attribute__((packed));
+
+union c2wr_recv {
+	struct c2wr_rqwr req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+/*
+ * All AEs start with this header.  Most AEs only need to convey the
+ * information in the header.  Some, like LLP connection events, need
+ * more info.  The union typdef c2wr_ae_t has all the possible AEs.
+ *
+ * hdr.context is the user_context from the rnic_open WR.  NULL If this
+ * is not affiliated with an rnic
+ *
+ * hdr.id is the AE identifier (eg;  CCAE_REMOTE_SHUTDOWN,
+ * CCAE_LLP_CLOSE_COMPLETE)
+ *
+ * resource_type is one of:  C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
+ *
+ * user_context is the context passed down when the host created the resource.
+ */
+struct c2wr_ae_hdr {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* user context for this res. */
+	u32 resource_type;	/* see enum c2_resource_indicator */
+	u32 resource;		/* handle for resource */
+	u32 qp_state;		/* current QP State */
+} __attribute__((packed));
+
+/*
+ * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
+ * the adapter moves the QP into RTS state
+ */
+struct c2wr_ae_active_connect_results {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+/*
+ * When connections are established by the stack (and the private data
+ * MPA frame is received), the adapter will generate an event to the host.
+ * The details of the connection, any private data, and the new connection
+ * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
+ * AE queue:
+ */
+struct c2wr_ae_connection_request {
+	struct c2wr_ae_hdr ae_hdr;
+	u32 cr_handle;		/* connreq handle (sock ptr) */
+	u32 laddr;
+	u32 raddr;
+	u16 lport;
+	u16 rport;
+	u32 private_data_length;
+	u8 private_data[0];	/* data is in-line in the msg. */
+} __attribute__((packed));
+
+union c2wr_ae {
+	struct c2wr_ae_hdr ae_generic;
+	struct c2wr_ae_active_connect_results ae_active_connect_results;
+	struct c2wr_ae_connection_request ae_connection_request;
+} __attribute__((packed));
+
+struct c2wr_init_req {
+	struct c2wr_hdr hdr;
+	u64 hint_count;
+	u64 q0_host_shared;
+	u64 q1_host_shared;
+	u64 q1_host_msg_pool;
+	u64 q2_host_shared;
+	u64 q2_host_msg_pool;
+} __attribute__((packed));
+
+struct c2wr_init_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_init {
+	struct c2wr_init_req req;
+	struct c2wr_init_rep rep;
+} __attribute__((packed));
+
+/*
+ * For upgrading flash.
+ */
+
+struct c2wr_flash_init_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+} __attribute__((packed));
+
+struct c2wr_flash_init_rep {
+	struct c2wr_hdr hdr;
+	u32 adapter_flash_buf_offset;
+	u32 adapter_flash_len;
+} __attribute__((packed));
+
+union c2wr_flash_init {
+	struct c2wr_flash_init_req req;
+	struct c2wr_flash_init_rep rep;
+} __attribute__((packed));
+
+struct c2wr_flash_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 len;
+} __attribute__((packed));
+
+struct c2wr_flash_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash {
+	struct c2wr_flash_req req;
+	struct c2wr_flash_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 size;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_rep {
+	struct c2wr_hdr hdr;
+	u32 offset;		/* 0 if mem not available */
+	u32 size;		/* 0 if mem not available */
+} __attribute__((packed));
+
+union c2wr_buf_alloc {
+	struct c2wr_buf_alloc_req req;
+	struct c2wr_buf_alloc_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_free_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;		/* Must match value from alloc */
+	u32 size;		/* Must match value from alloc */
+} __attribute__((packed));
+
+struct c2wr_buf_free_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_buf_free {
+	struct c2wr_buf_free_req req;
+	struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_flash_write_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 offset;
+	u32 size;
+	u32 type;
+	u32 flags;
+} __attribute__((packed));
+
+struct c2wr_flash_write_rep {
+	struct c2wr_hdr hdr;
+	u32 status;
+} __attribute__((packed));
+
+union c2wr_flash_write {
+	struct c2wr_flash_write_req req;
+	struct c2wr_flash_write_rep rep;
+} __attribute__((packed));
+
+/*
+ * Messages for LLP connection setup.
+ */
+
+/*
+ * Listen Request.  This allocates a listening endpoint to allow passive
+ * connection setup.  Newly established LLP connections are passed up
+ * via an AE.  See c2wr_ae_connection_request_t
+ */
+struct c2wr_ep_listen_create_req {
+	struct c2wr_hdr hdr;
+	u64 user_context;	/* returned in AEs. */
+	u32 rnic_handle;
+	u32 local_addr;		/* local addr, or 0  */
+	u16 local_port;		/* 0 means "pick one" */
+	u16 pad;
+	u32 backlog;		/* tradional tcp listen bl */
+} __attribute__((packed));
+
+struct c2wr_ep_listen_create_rep {
+	struct c2wr_hdr hdr;
+	u32 ep_handle;		/* handle to new listening ep */
+	u16 local_port;		/* resulting port... */
+	u16 pad;
+} __attribute__((packed));
+
+union c2wr_ep_listen_create {
+	struct c2wr_ep_listen_create_req req;
+	struct c2wr_ep_listen_create_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_ep_listen_destroy {
+	struct c2wr_ep_listen_destroy_req req;
+	struct c2wr_ep_listen_destroy_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_query_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_query_rep {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 local_addr;
+	u32 remote_addr;
+	u16 local_port;
+	u16 remote_port;
+} __attribute__((packed));
+
+union c2wr_ep_query {
+	struct c2wr_ep_query_req req;
+	struct c2wr_ep_query_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * The host passes this down to indicate acceptance of a pending iWARP
+ * connection.  The cr_handle was obtained from the CONNECTION_REQUEST
+ * AE passed up by the adapter.  See c2wr_ae_connection_request_t.
+ */
+struct c2wr_cr_accept_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 qp_handle;		/* QP to bind to this LLP conn */
+	u32 ep_handle;		/* LLP  handle to accept */
+	u32 private_data_length;
+	u8 private_data[0];	/* data in-line in msg. */
+} __attribute__((packed));
+
+/*
+ * adapter sends reply when private data is successfully submitted to
+ * the LLP.
+ */
+struct c2wr_cr_accept_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_accept {
+	struct c2wr_cr_accept_req req;
+	struct c2wr_cr_accept_rep rep;
+} __attribute__((packed));
+
+/*
+ * The host sends this down if a given iWARP connection request was
+ * rejected by the consumer.  The cr_handle was obtained from a
+ * previous c2wr_ae_connection_request_t AE sent by the adapter.
+ */
+struct  c2wr_cr_reject_req {
+	struct c2wr_hdr hdr;
+	u32 rnic_handle;
+	u32 ep_handle;		/* LLP handle to reject */
+} __attribute__((packed));
+
+/*
+ * Dunno if this is needed, but we'll add it for now.  The adapter will
+ * send the reject_reply after the LLP endpoint has been destroyed.
+ */
+struct  c2wr_cr_reject_rep {
+	struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_reject {
+	struct c2wr_cr_reject_req req;
+	struct c2wr_cr_reject_rep rep;
+} __attribute__((packed));
+
+/*
+ * console command.  Used to implement a debug console over the verbs
+ * request and reply queues.
+ */
+
+/*
+ * Console request message.  It contains:
+ *	- message hdr with id = CCWR_CONSOLE
+ *	- the physaddr/len of host memory to be used for the reply.
+ *	- the command string.  eg:  "netstat -s" or "zoneinfo"
+ */
+struct c2wr_console_req {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u64 reply_buf;		/* pinned host buf for reply */
+	u32 reply_buf_len;	/* length of reply buffer */
+	u8 command[0];		/* NUL terminated ascii string */
+	/* containing the command req */
+} __attribute__((packed));
+
+/*
+ * flags used in the console reply.
+ */
+enum c2_console_flags {
+	CONS_REPLY_TRUNCATED = 0x00000001	/* reply was truncated */
+} __attribute__((packed));
+
+/*
+ * Console reply message.
+ * hdr.result contains the c2_status_t error if the reply was _not_ generated,
+ * or C2_OK if the reply was generated.
+ */
+struct c2wr_console_rep {
+	struct c2wr_hdr hdr;		/* id = CCWR_CONSOLE */
+	u32 flags;
+} __attribute__((packed));
+
+union c2wr_console {
+	struct c2wr_console_req req;
+	struct c2wr_console_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * Giant union with all WRs.  Makes life easier...
+ */
+union c2wr {
+	struct c2wr_hdr hdr;
+	struct c2wr_user_hdr user_hdr;
+	union c2wr_rnic_open rnic_open;
+	union c2wr_rnic_query rnic_query;
+	union c2wr_rnic_getconfig rnic_getconfig;
+	union c2wr_rnic_setconfig rnic_setconfig;
+	union c2wr_rnic_close rnic_close;
+	union c2wr_cq_create cq_create;
+	union c2wr_cq_modify cq_modify;
+	union c2wr_cq_destroy cq_destroy;
+	union c2wr_pd_alloc pd_alloc;
+	union c2wr_pd_dealloc pd_dealloc;
+	union c2wr_srq_create srq_create;
+	union c2wr_srq_destroy srq_destroy;
+	union c2wr_qp_create qp_create;
+	union c2wr_qp_query qp_query;
+	union c2wr_qp_modify qp_modify;
+	union c2wr_qp_destroy qp_destroy;
+	struct c2wr_qp_connect qp_connect;
+	union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
+	union c2wr_nsmr_register nsmr_register;
+	union c2wr_nsmr_pbl nsmr_pbl;
+	union c2wr_mr_query mr_query;
+	union c2wr_mw_query mw_query;
+	union c2wr_stag_dealloc stag_dealloc;
+	union c2wr_sqwr sqwr;
+	struct c2wr_rqwr rqwr;
+	struct c2wr_ce ce;
+	union c2wr_ae ae;
+	union c2wr_init init;
+	union c2wr_ep_listen_create ep_listen_create;
+	union c2wr_ep_listen_destroy ep_listen_destroy;
+	union c2wr_cr_accept cr_accept;
+	union c2wr_cr_reject cr_reject;
+	union c2wr_console console;
+	union c2wr_flash_init flash_init;
+	union c2wr_flash flash;
+	union c2wr_buf_alloc buf_alloc;
+	union c2wr_buf_free buf_free;
+	union c2wr_flash_write flash_write;
+} __attribute__((packed));
+
+
+/*
+ * Accessors for the wr fields that are packed together tightly to
+ * reduce the wr message size.  The wr arguments are void* so that
+ * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
+ * in the struct c2wr union can be passed in.
+ */
+static __inline__ u8 c2_wr_get_id(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->id;
+}
+static __inline__ void c2_wr_set_id(void *wr, u8 id)
+{
+	((struct c2wr_hdr *) wr)->id = id;
+}
+static __inline__ u8 c2_wr_get_result(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->result;
+}
+static __inline__ void c2_wr_set_result(void *wr, u8 result)
+{
+	((struct c2wr_hdr *) wr)->result = result;
+}
+static __inline__ u8 c2_wr_get_flags(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->flags;
+}
+static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
+{
+	((struct c2wr_hdr *) wr)->flags = flags;
+}
+static __inline__ u8 c2_wr_get_sge_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->sge_count;
+}
+static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
+{
+	((struct c2wr_hdr *) wr)->sge_count = sge_count;
+}
+static __inline__ u32 c2_wr_get_wqe_count(void *wr)
+{
+	return ((struct c2wr_hdr *) wr)->wqe_count;
+}
+static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
+{
+	((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
+}
+
+#endif				/* _C2_WR_H_ */
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
new file mode 100644
index 0000000..922389b
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -0,0 +1,16 @@
+config INFINIBAND_EHCA
+	tristate "eHCA support"
+	depends on IBMEBUS && INFINIBAND
+	---help---
+	This driver supports the IBM pSeries eHCA InfiniBand adapter.
+
+	To compile the driver as a module, choose M here. The module
+	will be called ib_ehca.
+
+config INFINIBAND_EHCA_SCALING
+	bool "Scaling support (EXPERIMENTAL)"
+	depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
+	---help---
+	eHCA scaling support schedules the CQ callbacks to different CPUs.
+
+	To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile
new file mode 100644
index 0000000..74d284e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Makefile
@@ -0,0 +1,16 @@
+#  Authors: Heiko J Schick <schickhj@de.ibm.com>
+#           Christoph Raisch <raisch@de.ibm.com>
+#           Joachim Fenkes <fenkes@de.ibm.com>
+#
+#  Copyright (c) 2005 IBM Corporation
+#
+#  All rights reserved.
+#
+#  This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
+
+obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
+
+ib_ehca-objs  = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
+		ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
+		ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
+
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
new file mode 100644
index 0000000..3bac197
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -0,0 +1,271 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  adress vector functions
+ *
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *av_cache;
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	struct ehca_av *av;
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+
+	av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
+	if (!av) {
+		ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
+			 pd, ah_attr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	av->av.sl = ah_attr->sl;
+	av->av.dlid = ah_attr->dlid;
+	av->av.slid_path_bits = ah_attr->src_path_bits;
+
+	if (ehca_static_rate < 0) {
+		int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
+		int ehca_mult =
+			ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
+
+		if (ah_mult >= ehca_mult)
+			av->av.ipd = 0;
+		else
+			av->av.ipd = (ah_mult > 0) ?
+				((ehca_mult - 1) / ah_mult) : 0;
+	} else
+	        av->av.ipd = ehca_static_rate;
+
+	av->av.lnh = ah_attr->ah_flags;
+	av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
+					    ah_attr->grh.traffic_class);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+					    ah_attr->grh.flow_label);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+					    ah_attr->grh.hop_limit);
+	av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(pd->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ret = -EINVAL;
+			ehca_err(pd->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(pd->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ret = -EINVAL;
+			ehca_err(pd->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "pd=%p ah_attr=%p", rc, pd, ah_attr);
+			goto create_ah_exit1;
+		}
+		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
+	}
+	/* for the time being we use a hard coded PMTU of 2048 Bytes */
+	av->av.pmtu = 4;
+
+	/* dgid comes in grh.word_3 */
+	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	return &av->ib_ah;
+
+create_ah_exit1:
+	kmem_cache_free(av_cache, av);
+
+	return ERR_PTR(ret);
+}
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av;
+	struct ehca_ud_av new_ehca_av;
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
+	new_ehca_av.sl = ah_attr->sl;
+	new_ehca_av.dlid = ah_attr->dlid;
+	new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
+	new_ehca_av.ipd = ah_attr->static_rate;
+	new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
+					 (ah_attr->ah_flags & IB_AH_GRH) > 0);
+	new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
+						ah_attr->grh.traffic_class);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+						 ah_attr->grh.flow_label);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+						 ah_attr->grh.hop_limit);
+	new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
+
+	/* set sgid in grh.word_1 */
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		int rc;
+		struct ib_port_attr port_attr;
+		union ib_gid gid;
+		memset(&port_attr, 0, sizeof(port_attr));
+		rc = ehca_query_port(ah->device, ah_attr->port_num,
+				     &port_attr);
+		if (rc) { /* invalid port number */
+			ehca_err(ah->device, "Invalid port number "
+				 "ehca_query_port() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x",
+				 rc, ah, ah_attr, ah_attr->port_num);
+			return -EINVAL;
+		}
+		memset(&gid, 0, sizeof(gid));
+		rc = ehca_query_gid(ah->device,
+				    ah_attr->port_num,
+				    ah_attr->grh.sgid_index, &gid);
+		if (rc) {
+			ehca_err(ah->device, "Failed to retrieve sgid "
+				 "ehca_query_gid() returned %x "
+				 "ah=%p ah_attr=%p port_num=%x "
+				 "sgid_index=%x",
+				 rc, ah, ah_attr, ah_attr->port_num,
+				 ah_attr->grh.sgid_index);
+			return -EINVAL;
+		}
+		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
+	}
+
+	new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
+
+	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
+	       sizeof(ah_attr->grh.dgid));
+
+	av = container_of(ah, struct ehca_av, ib_ah);
+	av->av = new_ehca_av;
+
+	return 0;
+}
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
+	       sizeof(ah_attr->grh.dgid));
+	ah_attr->sl = av->av.sl;
+
+	ah_attr->dlid = av->av.dlid;
+
+	ah_attr->src_path_bits = av->av.slid_path_bits;
+	ah_attr->static_rate = av->av.ipd;
+	ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
+	ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
+						    av->av.grh.word_0);
+	ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
+						av->av.grh.word_0);
+	ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
+						 av->av.grh.word_0);
+
+	return 0;
+}
+
+int ehca_destroy_ah(struct ib_ah *ah)
+{
+	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
+
+	return 0;
+}
+
+int ehca_init_av_cache(void)
+{
+	av_cache = kmem_cache_create("ehca_cache_av",
+				   sizeof(struct ehca_av), 0,
+				   SLAB_HWCACHE_ALIGN,
+				   NULL, NULL);
+	if (!av_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_av_cache(void)
+{
+	if (av_cache)
+		kmem_cache_destroy(av_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
new file mode 100644
index 0000000..1c72203
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -0,0 +1,346 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Struct definition for eHCA internal structures
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+#include "ehca_classes.h"
+#include "ipz_pt_fn.h"
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ehca_irq.h"
+
+struct ehca_eq {
+	u32 length;
+	struct ipz_queue ipz_queue;
+	struct ipz_eq_handle ipz_eq_handle;
+	struct work_struct work;
+	struct h_galpas galpas;
+	int is_initialized;
+	struct ehca_pfeq pf;
+	spinlock_t spinlock;
+	struct tasklet_struct interrupt_task;
+	u32 ist;
+};
+
+struct ehca_sport {
+	struct ib_cq *ibcq_aqp1;
+	struct ib_qp *ibqp_aqp1;
+	enum ib_rate  rate;
+	enum ib_port_state port_state;
+};
+
+struct ehca_shca {
+	struct ib_device ib_device;
+	struct ibmebus_dev *ibmebus_dev;
+	u8 num_ports;
+	int hw_level;
+	struct list_head shca_list;
+	struct ipz_adapter_handle ipz_hca_handle;
+	struct ehca_sport sport[2];
+	struct ehca_eq eq;
+	struct ehca_eq neq;
+	struct ehca_mr *maxmr;
+	struct ehca_pd *pd;
+	struct h_galpas galpas;
+};
+
+struct ehca_pd {
+	struct ib_pd ib_pd;
+	struct ipz_pd fw_pd;
+	u32 ownpid;
+};
+
+struct ehca_qp {
+	struct ib_qp ib_qp;
+	u32 qp_type;
+	struct ipz_queue ipz_squeue;
+	struct ipz_queue ipz_rqueue;
+	struct h_galpas galpas;
+	u32 qkey;
+	u32 real_qp_num;
+	u32 token;
+	spinlock_t spinlock_s;
+	spinlock_t spinlock_r;
+	u32 sq_max_inline_data_size;
+	struct ipz_qp_handle ipz_qp_handle;
+	struct ehca_pfqp pf;
+	struct ib_qp_init_attr init_attr;
+	u64 uspace_squeue;
+	u64 uspace_rqueue;
+	u64 uspace_fwh;
+	struct ehca_cq *send_cq;
+	struct ehca_cq *recv_cq;
+	unsigned int sqerr_purgeflag;
+	struct hlist_node list_entries;
+};
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+	struct ib_cq ib_cq;
+	struct ipz_queue ipz_queue;
+	struct h_galpas galpas;
+	spinlock_t spinlock;
+	u32 cq_number;
+	u32 token;
+	u32 nr_of_entries;
+	struct ipz_cq_handle ipz_cq_handle;
+	struct ehca_pfcq pf;
+	spinlock_t cb_lock;
+	u64 uspace_queue;
+	u64 uspace_fwh;
+	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+	struct list_head entry;
+	u32 nr_callbacks;
+	spinlock_t task_lock;
+	u32 ownpid;
+};
+
+enum ehca_mr_flag {
+	EHCA_MR_FLAG_FMR = 0x80000000,	 /* FMR, created with ehca_alloc_fmr */
+	EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR                           */
+};
+
+struct ehca_mr {
+	union {
+		struct ib_mr ib_mr;	/* must always be first in ehca_mr */
+		struct ib_fmr ib_fmr;	/* must always be first in ehca_mr */
+	} ib;
+	spinlock_t mrlock;
+
+	enum ehca_mr_flag flags;
+	u32 num_pages;		/* number of MR pages */
+	u32 num_4k;		/* number of 4k "page" portions to form MR */
+	int acl;		/* ACL (stored here for usage in reregister) */
+	u64 *start;		/* virtual start address (stored here for */
+	                        /* usage in reregister) */
+	u64 size;		/* size (stored here for usage in reregister) */
+	u32 fmr_page_size;	/* page size for FMR */
+	u32 fmr_max_pages;	/* max pages for FMR */
+	u32 fmr_max_maps;	/* max outstanding maps for FMR */
+	u32 fmr_map_cnt;	/* map counter for FMR */
+	/* fw specific data */
+	struct ipz_mrmw_handle ipz_mr_handle;	/* MR handle for h-calls */
+	struct h_galpas galpas;
+	/* data for userspace bridge */
+	u32 nr_of_pages;
+	void *pagearray;
+};
+
+struct ehca_mw {
+	struct ib_mw ib_mw;	/* gen2 mw, must always be first in ehca_mw */
+	spinlock_t mwlock;
+
+	u8 never_bound;		/* indication MW was never bound */
+	struct ipz_mrmw_handle ipz_mw_handle;	/* MW handle for h-calls */
+	struct h_galpas galpas;
+};
+
+enum ehca_mr_pgi_type {
+	EHCA_MR_PGI_PHYS   = 1,  /* type of ehca_reg_phys_mr,
+				  * ehca_rereg_phys_mr,
+				  * ehca_reg_internal_maxmr */
+	EHCA_MR_PGI_USER   = 2,  /* type of ehca_reg_user_mr */
+	EHCA_MR_PGI_FMR    = 3   /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+	enum ehca_mr_pgi_type type;
+	u64 num_pages;
+	u64 page_cnt;
+	u64 num_4k;       /* number of 4k "page" portions */
+	u64 page_4k_cnt;  /* counter for 4k "page" portions */
+	u64 next_4k;      /* next 4k "page" portion in buffer/chunk/listelem */
+
+	/* type EHCA_MR_PGI_PHYS section */
+	int num_phys_buf;
+	struct ib_phys_buf *phys_buf_array;
+	u64 next_buf;
+
+	/* type EHCA_MR_PGI_USER section */
+	struct ib_umem *region;
+	struct ib_umem_chunk *next_chunk;
+	u64 next_nmap;
+
+	/* type EHCA_MR_PGI_FMR section */
+	u64 *page_list;
+	u64 next_listelem;
+	/* next_4k also used within EHCA_MR_PGI_FMR */
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 lkey;
+	u32 rkey;
+	u64 len;
+	u64 vaddr;
+	u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+	struct ipz_mrmw_handle handle;
+	u32 rkey;
+};
+
+struct ehca_av {
+	struct ib_ah ib_ah;
+	struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+	struct ib_ucontext ib_ucontext;
+};
+
+struct ehca_module *ehca_module_new(void);
+
+int ehca_module_delete(struct ehca_module *me);
+
+int ehca_eq_ctor(struct ehca_eq *eq);
+
+int ehca_eq_dtor(struct ehca_eq *eq);
+
+struct ehca_shca *ehca_shca_new(void);
+
+int ehca_shca_delete(struct ehca_shca *me);
+
+struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
+
+int ehca_init_pd_cache(void);
+void ehca_cleanup_pd_cache(void);
+int ehca_init_cq_cache(void);
+void ehca_cleanup_cq_cache(void);
+int ehca_init_qp_cache(void);
+void ehca_cleanup_qp_cache(void);
+int ehca_init_av_cache(void);
+void ehca_cleanup_av_cache(void);
+int ehca_init_mrmw_cache(void);
+void ehca_cleanup_mrmw_cache(void);
+
+extern spinlock_t ehca_qp_idr_lock;
+extern spinlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+extern int ehca_static_rate;
+extern int ehca_port_act_time;
+extern int ehca_use_hp_mr;
+
+struct ipzu_queue_resp {
+	u64 queue;        /* points to first queue entry */
+	u32 qe_size;      /* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length; /* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;
+	u32 dummy; /* padding for 8 byte alignment */
+};
+
+struct ehca_create_cq_resp {
+	u32 cq_number;
+	u32 token;
+	struct ipzu_queue_resp ipz_queue;
+	struct h_galpas galpas;
+};
+
+struct ehca_create_qp_resp {
+	u32 qp_num;
+	u32 token;
+	u32 qp_type;
+	u32 qkey;
+	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+	u32 real_qp_num;
+	u32 dummy; /* padding for 8 byte alignment */
+	struct ipzu_queue_resp ipz_squeue;
+	struct ipzu_queue_resp ipz_rqueue;
+	struct h_galpas galpas;
+};
+
+struct ehca_alloc_cq_parms {
+	u32 nr_cqe;
+	u32 act_nr_of_entries;
+	u32 act_pages;
+	struct ipz_eq_handle eq_handle;
+};
+
+struct ehca_alloc_qp_parms {
+	int servicetype;
+	int sigtype;
+	int daqp_ctrl;
+	int max_send_sge;
+	int max_recv_sge;
+	int ud_av_l_key_ctl;
+
+	u16 act_nr_send_wqes;
+	u16 act_nr_recv_wqes;
+	u8  act_nr_recv_sges;
+	u8  act_nr_send_sges;
+
+	u32 nr_rq_pages;
+	u32 nr_sq_pages;
+
+	struct ipz_eq_handle ipz_eq_handle;
+	struct ipz_pd pd;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
new file mode 100644
index 0000000..5665f21
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -0,0 +1,236 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  pSeries interface definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfqp {
+	struct ipz_qpt sqpt;
+	struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+	struct ipz_qpt qpt;
+	u32 cqnr;
+};
+
+struct ehca_pfeq {
+	struct ipz_qpt qpt;
+	struct h_galpa galpa;
+	u32 eqnr;
+};
+
+struct ipz_adapter_handle {
+	u64 handle;
+};
+
+struct ipz_cq_handle {
+	u64 handle;
+};
+
+struct ipz_eq_handle {
+	u64 handle;
+};
+
+struct ipz_qp_handle {
+	u64 handle;
+};
+struct ipz_mrmw_handle {
+	u64 handle;
+};
+
+struct ipz_pd {
+	u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+	u32 qkey;                      /* 00 */
+	u32 rdd;                       /* reliable datagram domain */
+	u32 send_psn;                  /* 02 */
+	u32 receive_psn;               /* 03 */
+	u32 prim_phys_port;            /* 04 */
+	u32 alt_phys_port;             /* 05 */
+	u32 prim_p_key_idx;            /* 06 */
+	u32 alt_p_key_idx;             /* 07 */
+	u32 rdma_atomic_ctrl;          /* 08 */
+	u32 qp_state;                  /* 09 */
+	u32 reserved_10;               /* 10 */
+	u32 rdma_nr_atomic_resp_res;   /* 11 */
+	u32 path_migration_state;      /* 12 */
+	u32 rdma_atomic_outst_dest_qp; /* 13 */
+	u32 dest_qp_nr;                /* 14 */
+	u32 min_rnr_nak_timer_field;   /* 15 */
+	u32 service_level;             /* 16 */
+	u32 send_grh_flag;             /* 17 */
+	u32 retry_count;               /* 18 */
+	u32 timeout;                   /* 19 */
+	u32 path_mtu;                  /* 20 */
+	u32 max_static_rate;           /* 21 */
+	u32 dlid;                      /* 22 */
+	u32 rnr_retry_count;           /* 23 */
+	u32 source_path_bits;          /* 24 */
+	u32 traffic_class;             /* 25 */
+	u32 hop_limit;                 /* 26 */
+	u32 source_gid_idx;            /* 27 */
+	u32 flow_label;                /* 28 */
+	u32 reserved_29;               /* 29 */
+	union {                        /* 30 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid;
+	u32 service_level_al;          /* 34 */
+	u32 send_grh_flag_al;          /* 35 */
+	u32 retry_count_al;            /* 36 */
+	u32 timeout_al;                /* 37 */
+	u32 max_static_rate_al;        /* 38 */
+	u32 dlid_al;                   /* 39 */
+	u32 rnr_retry_count_al;        /* 40 */
+	u32 source_path_bits_al;       /* 41 */
+	u32 traffic_class_al;          /* 42 */
+	u32 hop_limit_al;              /* 43 */
+	u32 source_gid_idx_al;         /* 44 */
+	u32 flow_label_al;             /* 45 */
+	u32 reserved_46;               /* 46 */
+	u32 reserved_47;               /* 47 */
+	union {                        /* 48 */
+		u64 dw[2];
+		u8 byte[16];
+	} dest_gid_al;
+	u32 max_nr_outst_send_wr;      /* 52 */
+	u32 max_nr_outst_recv_wr;      /* 53 */
+	u32 disable_ete_credit_check;  /* 54 */
+	u32 qp_number;                 /* 55 */
+	u64 send_queue_handle;         /* 56 */
+	u64 recv_queue_handle;         /* 58 */
+	u32 actual_nr_sges_in_sq_wqe;  /* 60 */
+	u32 actual_nr_sges_in_rq_wqe;  /* 61 */
+	u32 qp_enable;                 /* 62 */
+	u32 curr_srq_limit;            /* 63 */
+	u64 qp_aff_asyn_ev_log_reg;    /* 64 */
+	u64 shared_rq_hndl;            /* 66 */
+	u64 trigg_doorbell_qp_hndl;    /* 68 */
+	u32 reserved_70_127[58];       /* 70 */
+};
+
+#define MQPCB_MASK_QKEY                         EHCA_BMASK_IBM(0,0)
+#define MQPCB_MASK_SEND_PSN                     EHCA_BMASK_IBM(2,2)
+#define MQPCB_MASK_RECEIVE_PSN                  EHCA_BMASK_IBM(3,3)
+#define MQPCB_MASK_PRIM_PHYS_PORT               EHCA_BMASK_IBM(4,4)
+#define MQPCB_PRIM_PHYS_PORT                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_PHYS_PORT                EHCA_BMASK_IBM(5,5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX               EHCA_BMASK_IBM(6,6)
+#define MQPCB_PRIM_P_KEY_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_ALT_P_KEY_IDX                EHCA_BMASK_IBM(7,7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL             EHCA_BMASK_IBM(8,8)
+#define MQPCB_MASK_QP_STATE                     EHCA_BMASK_IBM(9,9)
+#define MQPCB_QP_STATE                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES      EHCA_BMASK_IBM(11,11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE         EHCA_BMASK_IBM(12,12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP    EHCA_BMASK_IBM(13,13)
+#define MQPCB_MASK_DEST_QP_NR                   EHCA_BMASK_IBM(14,14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD      EHCA_BMASK_IBM(15,15)
+#define MQPCB_MASK_SERVICE_LEVEL                EHCA_BMASK_IBM(16,16)
+#define MQPCB_MASK_SEND_GRH_FLAG                EHCA_BMASK_IBM(17,17)
+#define MQPCB_MASK_RETRY_COUNT                  EHCA_BMASK_IBM(18,18)
+#define MQPCB_MASK_TIMEOUT                      EHCA_BMASK_IBM(19,19)
+#define MQPCB_MASK_PATH_MTU                     EHCA_BMASK_IBM(20,20)
+#define MQPCB_PATH_MTU                          EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_MAX_STATIC_RATE              EHCA_BMASK_IBM(21,21)
+#define MQPCB_MAX_STATIC_RATE                   EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID                         EHCA_BMASK_IBM(22,22)
+#define MQPCB_DLID                              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT              EHCA_BMASK_IBM(23,23)
+#define MQPCB_RNR_RETRY_COUNT                   EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS             EHCA_BMASK_IBM(24,24)
+#define MQPCB_SOURCE_PATH_BITS                  EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS                EHCA_BMASK_IBM(25,25)
+#define MQPCB_TRAFFIC_CLASS                     EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT                    EHCA_BMASK_IBM(26,26)
+#define MQPCB_HOP_LIMIT                         EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX               EHCA_BMASK_IBM(27,27)
+#define MQPCB_SOURCE_GID_IDX                    EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL                   EHCA_BMASK_IBM(28,28)
+#define MQPCB_FLOW_LABEL                        EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID                     EHCA_BMASK_IBM(30,30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL             EHCA_BMASK_IBM(31,31)
+#define MQPCB_SERVICE_LEVEL_AL                  EHCA_BMASK_IBM(28,31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL             EHCA_BMASK_IBM(32,32)
+#define MQPCB_SEND_GRH_FLAG_AL                  EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_RETRY_COUNT_AL               EHCA_BMASK_IBM(33,33)
+#define MQPCB_RETRY_COUNT_AL                    EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_TIMEOUT_AL                   EHCA_BMASK_IBM(34,34)
+#define MQPCB_TIMEOUT_AL                        EHCA_BMASK_IBM(27,31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL           EHCA_BMASK_IBM(35,35)
+#define MQPCB_MAX_STATIC_RATE_AL                EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_DLID_AL                      EHCA_BMASK_IBM(36,36)
+#define MQPCB_DLID_AL                           EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL           EHCA_BMASK_IBM(37,37)
+#define MQPCB_RNR_RETRY_COUNT_AL                EHCA_BMASK_IBM(29,31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL          EHCA_BMASK_IBM(38,38)
+#define MQPCB_SOURCE_PATH_BITS_AL               EHCA_BMASK_IBM(25,31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL             EHCA_BMASK_IBM(39,39)
+#define MQPCB_TRAFFIC_CLASS_AL                  EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_HOP_LIMIT_AL                 EHCA_BMASK_IBM(40,40)
+#define MQPCB_HOP_LIMIT_AL                      EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL            EHCA_BMASK_IBM(41,41)
+#define MQPCB_SOURCE_GID_IDX_AL                 EHCA_BMASK_IBM(24,31)
+#define MQPCB_MASK_FLOW_LABEL_AL                EHCA_BMASK_IBM(42,42)
+#define MQPCB_FLOW_LABEL_AL                     EHCA_BMASK_IBM(12,31)
+#define MQPCB_MASK_DEST_GID_AL                  EHCA_BMASK_IBM(44,44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR         EHCA_BMASK_IBM(45,45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR         EHCA_BMASK_IBM(46,46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR              EHCA_BMASK_IBM(16,31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK     EHCA_BMASK_IBM(47,47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK          EHCA_BMASK_IBM(31,31)
+#define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
+#define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
+#define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
+#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
+#define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
new file mode 100644
index 0000000..458fe19
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -0,0 +1,427 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Completion queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *cq_cache;
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
+{
+	unsigned int qp_num = qp->real_qp_num;
+	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+
+	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
+		 cq->cq_number, qp_num);
+
+	return 0;
+}
+
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
+{
+	int ret = -EINVAL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	unsigned long spl_flags;
+
+	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			hlist_del(iter);
+			ehca_dbg(cq->ib_cq.device,
+				 "removed qp from cq .cq_num=%x real_qp_num=%x",
+				 cq->cq_number, real_qp_num);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+	if (ret)
+		ehca_err(cq->ib_cq.device,
+			 "qp not found cq_num=%x real_qp_num=%x",
+			 cq->cq_number, real_qp_num);
+
+	return ret;
+}
+
+struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+{
+	struct ehca_qp *ret = NULL;
+	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+	struct hlist_node *iter;
+	struct ehca_qp *qp;
+	hlist_for_each(iter, &cq->qp_hashtab[key]) {
+		qp = hlist_entry(iter, struct ehca_qp, list_entries);
+		if (qp->real_qp_num == real_qp_num) {
+			ret = qp;
+			break;
+		}
+	}
+	return ret;
+}
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata)
+{
+	static const u32 additional_cqe = 20;
+	struct ib_cq *cq;
+	struct ehca_cq *my_cq;
+	struct ehca_shca *shca =
+		container_of(device, struct ehca_shca, ib_device);
+	struct ipz_adapter_handle adapter_handle;
+	struct ehca_alloc_cq_parms param; /* h_call's out parameters */
+	struct h_galpa gal;
+	void *vpage;
+	u32 counter;
+	u64 rpage, cqx_fec, h_ret;
+	int ipz_rc, ret, i;
+	unsigned long flags;
+
+	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
+		return ERR_PTR(-EINVAL);
+
+	my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
+	if (!my_cq) {
+		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
+			 device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_cq, 0, sizeof(struct ehca_cq));
+	memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
+
+	spin_lock_init(&my_cq->spinlock);
+	spin_lock_init(&my_cq->cb_lock);
+	spin_lock_init(&my_cq->task_lock);
+	my_cq->ownpid = current->tgid;
+
+	cq = &my_cq->ib_cq;
+
+	adapter_handle = shca->ipz_hca_handle;
+	param.eq_handle = shca->eq.ipz_eq_handle;
+
+	do {
+		if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
+			cq = ERR_PTR(-ENOMEM);
+			ehca_err(device, "Can't reserve idr nr. device=%p",
+				 device);
+			goto create_cq_exit1;
+		}
+
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		cq = ERR_PTR(-ENOMEM);
+		ehca_err(device, "Can't allocate new idr entry. device=%p",
+			 device);
+		goto create_cq_exit1;
+	}
+
+	/*
+	 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
+	 * for receiving errors CQEs.
+	 */
+	param.nr_cqe = cqe + additional_cqe;
+	h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_alloc_resource_cq() failed "
+			 "h_ret=%lx device=%p", h_ret, device);
+		cq = ERR_PTR(ehca2ib_return_code(h_ret));
+		goto create_cq_exit2;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
+				EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+	if (!ipz_rc) {
+		ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+			 ipz_rc, device);
+		cq = ERR_PTR(-EINVAL);
+		goto create_cq_exit3;
+	}
+
+	for (counter = 0; counter < param.act_pages; counter++) {
+		vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+		if (!vpage) {
+			ehca_err(device, "ipz_qpageit_get_inc() "
+				 "returns NULL device=%p", device);
+			cq = ERR_PTR(-EAGAIN);
+			goto create_cq_exit4;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_cq(adapter_handle,
+						 my_cq->ipz_cq_handle,
+						 &my_cq->pf,
+						 0,
+						 0,
+						 rpage,
+						 1,
+						 my_cq->galpas.
+						 kernel);
+
+		if (h_ret < H_SUCCESS) {
+			ehca_err(device, "hipz_h_register_rpage_cq() failed "
+				 "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+				 "act_pages=%i", my_cq, my_cq->cq_number,
+				 h_ret, counter, param.act_pages);
+			cq = ERR_PTR(-EINVAL);
+			goto create_cq_exit4;
+		}
+
+		if (counter == (param.act_pages - 1)) {
+			vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+			if ((h_ret != H_SUCCESS) || vpage) {
+				ehca_err(device, "Registration of pages not "
+					 "complete ehca_cq=%p cq_num=%x "
+					 "h_ret=%lx", my_cq, my_cq->cq_number,
+					 h_ret);
+				cq = ERR_PTR(-EAGAIN);
+				goto create_cq_exit4;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(device, "Registration of page failed "
+					 "ehca_cq=%p cq_num=%x h_ret=%lx"
+					 "counter=%i act_pages=%i",
+					 my_cq, my_cq->cq_number,
+					 h_ret, counter, param.act_pages);
+				cq = ERR_PTR(-ENOMEM);
+				goto create_cq_exit4;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_cq->ipz_queue);
+
+	gal = my_cq->galpas.kernel;
+	cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
+	ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+		 my_cq, my_cq->cq_number, cqx_fec);
+
+	my_cq->ib_cq.cqe = my_cq->nr_of_entries =
+		param.act_nr_of_entries - additional_cqe;
+	my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
+
+	for (i = 0; i < QP_HASHTAB_LEN; i++)
+		INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+
+	if (context) {
+		struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
+		struct ehca_create_cq_resp resp;
+		struct vm_area_struct *vma;
+		memset(&resp, 0, sizeof(resp));
+		resp.cq_number = my_cq->cq_number;
+		resp.token = my_cq->token;
+		resp.ipz_queue.qe_size = ipz_queue->qe_size;
+		resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
+		resp.ipz_queue.queue_length = ipz_queue->queue_length;
+		resp.ipz_queue.pagesize = ipz_queue->pagesize;
+		resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
+				       ipz_queue->queue_length,
+				       (void**)&resp.ipz_queue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap queue pages");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit4;
+		}
+		my_cq->uspace_queue = resp.ipz_queue.queue;
+		resp.galpas = my_cq->galpas;
+		ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(device, "Could not mmap fw_handle");
+			cq = ERR_PTR(ret);
+			goto create_cq_exit5;
+		}
+		my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			ehca_err(device, "Copy to udata failed.");
+			goto create_cq_exit6;
+		}
+	}
+
+	return cq;
+
+create_cq_exit6:
+	ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+
+create_cq_exit5:
+	ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
+
+create_cq_exit4:
+	ipz_queue_dtor(&my_cq->ipz_queue);
+
+create_cq_exit3:
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+	if (h_ret != H_SUCCESS)
+		ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
+			 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+
+create_cq_exit2:
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+create_cq_exit1:
+	kmem_cache_free(cq_cache, my_cq);
+
+	return cq;
+}
+
+int ehca_destroy_cq(struct ib_cq *cq)
+{
+	u64 h_ret;
+	int ret;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int cq_num = my_cq->cq_number;
+	struct ib_device *device = cq->device;
+	struct ehca_shca *shca = container_of(device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	while (my_cq->nr_callbacks)
+		yield();
+
+	idr_remove(&ehca_cq_idr, my_cq->token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* un-mmap if vma alloc */
+	if (my_cq->uspace_queue ) {
+		ret = ehca_munmap(my_cq->uspace_queue,
+				  my_cq->ipz_queue.queue_length);
+		if (ret)
+			ehca_err(device, "Could not munmap queue ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+		ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(device, "Could not munmap fwh ehca_cq=%p "
+				 "cq_num=%x", my_cq, cq_num);
+	}
+
+	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
+	if (h_ret == H_R_STATE) {
+		/* cq in err: read err data and destroy it forcibly */
+		ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+			 "state. Try to delete it forcibly.",
+			 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
+		ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
+		h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+		if (h_ret == H_SUCCESS)
+			ehca_dbg(device, "cq_num=%x deleted successfully.",
+				 cq_num);
+	}
+	if (h_ret != H_SUCCESS) {
+		ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+			 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
+		return ehca2ib_return_code(h_ret);
+	}
+	ipz_queue_dtor(&my_cq->ipz_queue);
+	kmem_cache_free(cq_cache, my_cq);
+
+	return 0;
+}
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	u32 cur_pid = current->tgid;
+
+	if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+		ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_cq->ownpid);
+		return -EINVAL;
+	}
+
+	/* TODO: proper resize needs to be done */
+	ehca_err(cq->device, "not implemented yet");
+
+	return -EFAULT;
+}
+
+int ehca_init_cq_cache(void)
+{
+	cq_cache = kmem_cache_create("ehca_cache_cq",
+				     sizeof(struct ehca_cq), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!cq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_cq_cache(void)
+{
+	if (cq_cache)
+		kmem_cache_destroy(cq_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
new file mode 100644
index 0000000..5281dec
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -0,0 +1,185 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Event queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_qes.h"
+#include "hcp_if.h"
+#include "ipz_pt_fn.h"
+
+int ehca_create_eq(struct ehca_shca *shca,
+		   struct ehca_eq *eq,
+		   const enum ehca_eq_type type, const u32 length)
+{
+	u64 ret;
+	u32 nr_pages;
+	u32 i;
+	void *vpage;
+	struct ib_device *ib_dev = &shca->ib_device;
+
+	spin_lock_init(&eq->spinlock);
+	eq->is_initialized = 0;
+
+	if (type != EHCA_EQ && type != EHCA_NEQ) {
+		ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
+		return -EINVAL;
+	}
+	if (!length) {
+		ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+				       &eq->pf,
+				       type,
+				       length,
+				       &eq->ipz_eq_handle,
+				       &eq->length,
+				       &nr_pages, &eq->ist);
+
+	if (ret != H_SUCCESS) {
+		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
+		return -EINVAL;
+	}
+
+	ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
+			     EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+	if (!ret) {
+		ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
+		goto create_eq_exit1;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		u64 rpage;
+
+		if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
+			ret = H_RESOURCE;
+			goto create_eq_exit2;
+		}
+
+		rpage = virt_to_abs(vpage);
+		ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+					       eq->ipz_eq_handle,
+					       &eq->pf,
+					       0, 0, rpage, 1);
+
+		if (i == (nr_pages - 1)) {
+			/* last page */
+			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+			if (ret != H_SUCCESS || vpage)
+				goto create_eq_exit2;
+		} else {
+			if (ret != H_PAGE_REGISTERED || !vpage)
+				goto create_eq_exit2;
+		}
+	}
+
+	ipz_qeit_reset(&eq->ipz_queue);
+
+	/* register interrupt handlers and initialize work queues */
+	if (type == EHCA_EQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
+					  SA_INTERRUPT, "ehca_eq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+	} else if (type == EHCA_NEQ) {
+		ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
+					  SA_INTERRUPT, "ehca_neq",
+					  (void *)shca);
+		if (ret < 0)
+			ehca_err(ib_dev, "Can't map interrupt handler.");
+
+		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+	}
+
+	eq->is_initialized = 1;
+
+	return 0;
+
+create_eq_exit2:
+	ipz_queue_dtor(&eq->ipz_queue);
+
+create_eq_exit1:
+	hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	return -EINVAL;
+}
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	void *eqe;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	return eqe;
+}
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+	unsigned long flags;
+	u64 h_ret;
+
+	spin_lock_irqsave(&eq->spinlock, flags);
+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+	spin_unlock_irqrestore(&eq->spinlock, flags);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't free EQ resources.");
+		return -EINVAL;
+	}
+	ipz_queue_dtor(&eq->ipz_queue);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
new file mode 100644
index 0000000..5eae6ac
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -0,0 +1,241 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HCA query functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto query_device1;
+	}
+
+	memset(props, 0, sizeof(struct ib_device_attr));
+	props->fw_ver          = rblock->hw_ver;
+	props->max_mr_size     = rblock->max_mr_size;
+	props->vendor_id       = rblock->vendor_id >> 8;
+	props->vendor_part_id  = rblock->vendor_part_id >> 16;
+	props->hw_ver          = rblock->hw_ver;
+	props->max_qp          = min_t(int, rblock->max_qp, INT_MAX);
+	props->max_qp_wr       = min_t(int, rblock->max_wqes_wq, INT_MAX);
+	props->max_sge         = min_t(int, rblock->max_sge, INT_MAX);
+	props->max_sge_rd      = min_t(int, rblock->max_sge_rd, INT_MAX);
+	props->max_cq          = min_t(int, rblock->max_cq, INT_MAX);
+	props->max_cqe         = min_t(int, rblock->max_cqe, INT_MAX);
+	props->max_mr          = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_mw          = min_t(int, rblock->max_mw, INT_MAX);
+	props->max_pd          = min_t(int, rblock->max_pd, INT_MAX);
+	props->max_ah          = min_t(int, rblock->max_ah, INT_MAX);
+	props->max_fmr         = min_t(int, rblock->max_mr, INT_MAX);
+	props->max_srq         = 0;
+	props->max_srq_wr      = 0;
+	props->max_srq_sge     = 0;
+	props->max_pkeys       = 16;
+	props->local_ca_ack_delay
+		= rblock->local_ca_ack_delay;
+	props->max_raw_ipv6_qp
+		= min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
+	props->max_raw_ethy_qp
+		= min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
+	props->max_mcast_grp
+		= min_t(int, rblock->max_mcast_grp, INT_MAX);
+	props->max_mcast_qp_attach
+		= min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+	props->max_total_mcast_qp_attach
+		= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+
+query_device1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_port(struct ib_device *ibdev,
+		    u8 port, struct ib_port_attr *props)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_port1;
+	}
+
+	memset(props, 0, sizeof(struct ib_port_attr));
+	props->state = rblock->state;
+
+	switch (rblock->max_mtu) {
+	case 0x1:
+		props->active_mtu = props->max_mtu = IB_MTU_256;
+		break;
+	case 0x2:
+		props->active_mtu = props->max_mtu = IB_MTU_512;
+		break;
+	case 0x3:
+		props->active_mtu = props->max_mtu = IB_MTU_1024;
+		break;
+	case 0x4:
+		props->active_mtu = props->max_mtu = IB_MTU_2048;
+		break;
+	case 0x5:
+		props->active_mtu = props->max_mtu = IB_MTU_4096;
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+			 rblock->max_mtu);
+		break;
+	}
+
+	props->gid_tbl_len     = rblock->gid_tbl_len;
+	props->max_msg_sz      = rblock->max_msg_sz;
+	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
+	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
+	props->pkey_tbl_len    = rblock->pkey_tbl_len;
+	props->lid             = rblock->lid;
+	props->sm_lid          = rblock->sm_lid;
+	props->lmc             = rblock->lmc;
+	props->sm_sl           = rblock->sm_sl;
+	props->subnet_timeout  = rblock->subnet_timeout;
+	props->init_type_reply = rblock->init_type_reply;
+
+	props->active_width    = IB_WIDTH_12X;
+	props->active_speed    = 0x1;
+
+query_port1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 16) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_pkey1;
+	}
+
+	memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
+
+query_pkey1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port,
+		   int index, union ib_gid *gid)
+{
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+					      ib_device);
+	struct hipz_query_port *rblock;
+
+	if (index > 255) {
+		ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+		return -EINVAL;
+	}
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_gid1;
+	}
+
+	memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
+	memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
+
+query_gid1:
+	kfree(rblock);
+
+	return ret;
+}
+
+int ehca_modify_port(struct ib_device *ibdev,
+		     u8 port, int port_modify_mask,
+		     struct ib_port_modify *props)
+{
+	/* Not implemented yet */
+	return -EFAULT;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 0000000..2a65b5b
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,762 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
+#define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool* pool;
+static struct notifier_block comp_pool_callback_nb;
+
+#endif
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+	if (!cq->ib_cq.comp_handler)
+		return;
+
+	spin_lock(&cq->cb_lock);
+	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+	spin_unlock(&cq->cb_lock);
+
+	return;
+}
+
+static void print_error_data(struct ehca_shca * shca, void* data,
+			     u64* rblock, int length)
+{
+	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+	u64 resource = rblock[1];
+
+	switch (type) {
+	case 0x1: /* Queue Pair */
+	{
+		struct ehca_qp *qp = (struct ehca_qp*)data;
+
+		/* only print error data if AER is set */
+		if (rblock[6] == 0)
+			return;
+
+		ehca_err(&shca->ib_device,
+			 "QP 0x%x (resource=%lx) has errors.",
+			 qp->ib_qp.qp_num, resource);
+		break;
+	}
+	case 0x4: /* Completion Queue */
+	{
+		struct ehca_cq *cq = (struct ehca_cq*)data;
+
+		ehca_err(&shca->ib_device,
+			 "CQ 0x%x (resource=%lx) has errors.",
+			 cq->cq_number, resource);
+		break;
+	}
+	default:
+		ehca_err(&shca->ib_device,
+			 "Unknown errror type: %lx on %s.",
+			 type, shca->ib_device.name);
+		break;
+	}
+
+	ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+		 "---------------------------------------------------");
+	ehca_dmp(rblock, length, "resource=%lx", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data end "
+		 "----------------------------------------------------");
+
+	return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+		    u64 resource)
+{
+
+	unsigned long ret;
+	u64 *rblock;
+	unsigned long block_count;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+		ret = -ENOMEM;
+		goto error_data1;
+	}
+
+	ret = hipz_h_error_data(shca->ipz_hca_handle,
+				resource,
+				rblock,
+				&block_count);
+
+	if (ret == H_R_STATE) {
+		ehca_err(&shca->ib_device,
+			 "No error data is available: %lx.", resource);
+	}
+	else if (ret == H_SUCCESS) {
+		int length;
+
+		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+		if (length > PAGE_SIZE)
+			length = PAGE_SIZE;
+
+		print_error_data(shca, data, rblock, length);
+	}
+	else {
+		ehca_err(&shca->ib_device,
+			 "Error data could not be fetched: %lx", resource);
+	}
+
+	kfree(rblock);
+
+error_data1:
+	return ret;
+
+}
+
+static void qp_event_callback(struct ehca_shca *shca,
+			      u64 eqe,
+			      enum ib_event_type event_type)
+{
+	struct ib_event event;
+	struct ehca_qp *qp;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	qp = idr_find(&ehca_qp_idr, token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+
+	if (!qp)
+		return;
+
+	ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+	if (!qp->ib_qp.event_handler)
+		return;
+
+	event.device     = &shca->ib_device;
+	event.event      = event_type;
+	event.element.qp = &qp->ib_qp;
+
+	qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+	return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+					  u64 eqe)
+{
+	struct ehca_cq *cq;
+	unsigned long flags;
+	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	cq = idr_find(&ehca_cq_idr, token);
+	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	if (!cq)
+		return;
+
+	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+	return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+	switch (identifier) {
+	case 0x02: /* path migrated */
+		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+		break;
+	case 0x03: /* communication established */
+		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+		break;
+	case 0x04: /* send queue drained */
+		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+		break;
+	case 0x05: /* QP error */
+	case 0x06: /* QP error */
+		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+		break;
+	case 0x07: /* CQ error */
+	case 0x08: /* CQ error */
+		cq_event_callback(shca, eqe);
+		break;
+	case 0x09: /* MRMWPTE error */
+		ehca_err(&shca->ib_device, "MRMWPTE error.");
+		break;
+	case 0x0A: /* port event */
+		ehca_err(&shca->ib_device, "Port event.");
+		break;
+	case 0x0B: /* MR access error */
+		ehca_err(&shca->ib_device, "MR access error.");
+		break;
+	case 0x0C: /* EQ error */
+		ehca_err(&shca->ib_device, "EQ error.");
+		break;
+	case 0x0D: /* P/Q_Key mismatch */
+		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+		break;
+	case 0x10: /* sampling complete */
+		ehca_err(&shca->ib_device, "Sampling complete.");
+		break;
+	case 0x11: /* unaffiliated access error */
+		ehca_err(&shca->ib_device, "Unaffiliated access error.");
+		break;
+	case 0x12: /* path migrating error */
+		ehca_err(&shca->ib_device, "Path migration error.");
+		break;
+	case 0x13: /* interface trace stopped */
+		ehca_err(&shca->ib_device, "Interface trace stopped.");
+		break;
+	case 0x14: /* first error capture info available */
+	default:
+		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+			 identifier, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+	struct ib_event event;
+	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+
+	switch (ec) {
+	case 0x30: /* port availability change */
+		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+			ehca_info(&shca->ib_device,
+				  "port %x is active.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ACTIVE;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+			ib_dispatch_event(&event);
+		} else {
+			ehca_info(&shca->ib_device,
+				  "port %x is inactive.", port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ERR;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_DOWN;
+			ib_dispatch_event(&event);
+		}
+		break;
+	case 0x31:
+		/* port configuration change
+		 * disruptive change is caused by
+		 * LID, PKEY or SM change
+		 */
+		ehca_warn(&shca->ib_device,
+			  "disruptive port %x configuration change", port);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is inactive.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+
+		ehca_info(&shca->ib_device,
+			 "port %x is active.", port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ACTIVE;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+		ib_dispatch_event(&event);
+		break;
+	case 0x32: /* adapter malfunction */
+		ehca_err(&shca->ib_device, "Adapter malfunction.");
+		break;
+	case 0x33:  /* trace stopped */
+		ehca_err(&shca->ib_device, "Traced stopped.");
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+			 ec, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+	u64 CQx_EP;
+	struct h_galpa gal = cq->galpas.kernel;
+
+	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	u64 ret;
+
+	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+	while (eqe) {
+		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+			parse_ec(shca, eqe->entry);
+
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+	}
+
+	ret = hipz_h_reset_event(shca->ipz_hca_handle,
+				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+	if (ret != H_SUCCESS)
+		ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	int int_state;
+	int query_cnt = 0;
+
+	do {
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		if ((shca->hw_level >= 2) && eqe)
+			int_state = 1;
+		else
+			int_state = 0;
+
+		while ((int_state == 1) || eqe) {
+			while (eqe) {
+				u64 eqe_value = eqe->entry;
+
+				ehca_dbg(&shca->ib_device,
+					 "eqe_value=%lx", eqe_value);
+
+				/* TODO: better structure */
+				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
+						   eqe_value)) {
+					unsigned long flags;
+					u32 token;
+					struct ehca_cq *cq;
+
+					ehca_dbg(&shca->ib_device,
+						 "... completion event");
+					token =
+						EHCA_BMASK_GET(EQE_CQ_TOKEN,
+							       eqe_value);
+					spin_lock_irqsave(&ehca_cq_idr_lock,
+							  flags);
+					cq = idr_find(&ehca_cq_idr, token);
+
+					if (cq == NULL) {
+						spin_unlock(&ehca_cq_idr_lock);
+						break;
+					}
+
+					reset_eq_pending(cq);
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+					queue_comp_task(cq);
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+#else
+					spin_unlock_irqrestore(&ehca_cq_idr_lock,
+							       flags);
+					comp_event_callback(cq);
+#endif
+				} else {
+					ehca_dbg(&shca->ib_device,
+						 "... non completion event");
+					parse_identifier(shca, eqe_value);
+				}
+				eqe =
+					(struct ehca_eqe *)ehca_poll_eq(shca,
+								    &shca->eq);
+			}
+
+			if (shca->hw_level >= 2) {
+				int_state =
+				    hipz_h_query_int_state(shca->ipz_hca_handle,
+							   shca->eq.ist);
+				query_cnt++;
+				iosync();
+				if (query_cnt >= 100) {
+					query_cnt = 0;
+					int_state = 0;
+				}
+			}
+			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		}
+	} while (int_state != 0);
+
+	return;
+}
+
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+
+static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+{
+	unsigned long flags_last_cpu;
+
+	if (ehca_debug_level)
+		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+
+	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
+	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (pool->last_cpu == NR_CPUS)
+		pool->last_cpu = first_cpu(cpu_online_map);
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+
+	return pool->last_cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+			      struct ehca_cpu_comp_task *cct)
+{
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+
+	if (__cq->nr_callbacks == 0) {
+		__cq->nr_callbacks++;
+		list_add_tail(&__cq->entry, &cct->cq_list);
+		cct->cq_jobs++;
+		wake_up(&cct->wait_queue);
+	}
+	else
+		__cq->nr_callbacks++;
+
+	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+	int cpu;
+	int cpu_id;
+	struct ehca_cpu_comp_task *cct;
+
+	cpu = get_cpu();
+	cpu_id = find_next_online_cpu(pool);
+
+	BUG_ON(!cpu_online(cpu_id));
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+
+	if (cct->cq_jobs > 0) {
+		cpu_id = find_next_online_cpu(pool);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	}
+
+	__queue_comp_task(__cq, cct);
+
+	put_cpu();
+
+	return;
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task* cct)
+{
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+	unsigned long flags_cq;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	while (!list_empty(&cct->cq_list)) {
+		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		comp_event_callback(cq);
+		spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		cq->nr_callbacks--;
+		if (cq->nr_callbacks == 0) {
+			list_del_init(cct->cq_list.next);
+			cct->cq_jobs--;
+		}
+		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
+
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	return;
+}
+
+static int comp_task(void *__cct)
+{
+	struct ehca_cpu_comp_task* cct = __cct;
+	DECLARE_WAITQUEUE(wait, current);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while(!kthread_should_stop()) {
+		add_wait_queue(&cct->wait_queue, &wait);
+
+		if (list_empty(&cct->cq_list))
+			schedule();
+		else
+			__set_current_state(TASK_RUNNING);
+
+		remove_wait_queue(&cct->wait_queue, &wait);
+
+		if (!list_empty(&cct->cq_list))
+			run_comp_task(__cct);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
+					    int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	spin_lock_init(&cct->task_lock);
+	INIT_LIST_HEAD(&cct->cq_list);
+	init_waitqueue_head(&cct->wait_queue);
+	cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
+
+	return cct->task;
+}
+
+static void destroy_comp_task(struct ehca_comp_pool *pool,
+			      int cpu)
+{
+	struct ehca_cpu_comp_task *cct;
+	struct task_struct *task;
+	unsigned long flags_cct;
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	task = cct->task;
+	cct->task = NULL;
+	cct->cq_jobs = 0;
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+	if (task)
+		kthread_stop(task);
+
+	return;
+}
+
+static void take_over_work(struct ehca_comp_pool *pool,
+			   int cpu)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	LIST_HEAD(list);
+	struct ehca_cq *cq;
+	unsigned long flags_cct;
+
+	spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+	list_splice_init(&cct->cq_list, &list);
+
+	while(!list_empty(&list)) {
+	       cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+
+	       list_del(&cq->entry);
+	       __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+						 smp_processor_id()));
+	}
+
+	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+}
+
+static int comp_pool_callback(struct notifier_block *nfb,
+			      unsigned long action,
+			      void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct ehca_cpu_comp_task *cct;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
+		if(!create_comp_task(pool, cpu)) {
+			ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	case CPU_UP_CANCELED:
+		ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+		destroy_comp_task(pool, cpu);
+		break;
+	case CPU_ONLINE:
+		ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		kthread_bind(cct->task, cpu);
+		wake_up_process(cct->task);
+		break;
+	case CPU_DOWN_PREPARE:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
+		break;
+	case CPU_DOWN_FAILED:
+		ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
+		break;
+	case CPU_DEAD:
+		ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
+		destroy_comp_task(pool, cpu);
+		take_over_work(pool, cpu);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+#endif
+
+int ehca_create_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int cpu;
+	struct task_struct *task;
+
+	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+	if (pool == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&pool->last_cpu_lock);
+	pool->last_cpu = any_online_cpu(cpu_online_map);
+
+	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+	if (pool->cpu_comp_tasks == NULL) {
+		kfree(pool);
+		return -EINVAL;
+	}
+
+	for_each_online_cpu(cpu) {
+		task = create_comp_task(pool, cpu);
+		if (task) {
+			kthread_bind(task, cpu);
+			wake_up_process(task);
+		}
+	}
+
+	comp_pool_callback_nb.notifier_call = comp_pool_callback;
+	comp_pool_callback_nb.priority =0;
+	register_cpu_notifier(&comp_pool_callback_nb);
+#endif
+
+	return 0;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+	int i;
+
+	unregister_cpu_notifier(&comp_pool_callback_nb);
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i))
+			destroy_comp_task(pool, i);
+	}
+#endif
+
+	return;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644
index 0000000..85bf1fe
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -0,0 +1,77 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions and structs for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <asm/atomic.h>
+
+int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_neq(unsigned long data);
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
+void ehca_tasklet_eq(unsigned long data);
+
+struct ehca_cpu_comp_task {
+	wait_queue_head_t wait_queue;
+	struct list_head cq_list;
+	struct task_struct *task;
+	spinlock_t task_lock;
+	int cq_jobs;
+};
+
+struct ehca_comp_pool {
+	struct ehca_cpu_comp_task *cpu_comp_tasks;
+	int last_cpu;
+	spinlock_t last_cpu_lock;
+};
+
+int ehca_create_comp_pool(void);
+void ehca_destroy_comp_pool(void);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
new file mode 100644
index 0000000..319c39d
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -0,0 +1,182 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions for internal functions
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Dietmar Decker <ddecker@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IVERBS_H__
+#define __EHCA_IVERBS_H__
+
+#include "ehca_classes.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
+
+int ehca_query_port(struct ib_device *ibdev, u8 port,
+		    struct ib_port_attr *props);
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
+		   union ib_gid *gid);
+
+int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
+		     struct ib_port_modify *props);
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+
+int ehca_dealloc_pd(struct ib_pd *pd);
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_destroy_ah(struct ib_ah *ah);
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags, u64 *iova_start);
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags, struct ib_udata *udata);
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf, int mr_access_flags, u64 *iova_start);
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+int ehca_dereg_mr(struct ib_mr *mr);
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+
+int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind);
+
+int ehca_dealloc_mw(struct ib_mw *mw);
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr);
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list, int list_len, u64 iova);
+
+int ehca_unmap_fmr(struct list_head *fmr_list);
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr);
+
+enum ehca_eq_type {
+	EHCA_EQ = 0, /* Event Queue              */
+	EHCA_NEQ     /* Notification Event Queue */
+};
+
+int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
+		   enum ehca_eq_type type, const u32 length);
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+			     struct ib_ucontext *context,
+			     struct ib_udata *udata);
+
+int ehca_destroy_cq(struct ib_cq *cq);
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+
+int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata);
+
+int ehca_destroy_qp(struct ib_qp *qp);
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata);
+
+int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr);
+
+int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr);
+
+u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
+		    struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata);
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+void ehca_poll_eqs(unsigned long data);
+
+int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
+		     struct vm_area_struct **vma);
+
+int ehca_mmap_register(u64 physical,void **mapped,
+		       struct vm_area_struct **vma);
+
+int ehca_munmap(unsigned long addr, size_t len);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
new file mode 100644
index 0000000..2380994
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -0,0 +1,818 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  module start stop, hca detection
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION("SVNEHCA_0016");
+
+int ehca_open_aqp1     = 0;
+int ehca_debug_level   = 0;
+int ehca_hw_level      = 0;
+int ehca_nr_ports      = 2;
+int ehca_use_hp_mr     = 0;
+int ehca_port_act_time = 30;
+int ehca_poll_all_eqs  = 1;
+int ehca_static_rate   = -1;
+
+module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
+module_param_named(debug_level,   ehca_debug_level,   int, 0);
+module_param_named(hw_level,      ehca_hw_level,      int, 0);
+module_param_named(nr_ports,      ehca_nr_ports,      int, 0);
+module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
+module_param_named(port_act_time, ehca_port_act_time, int, 0);
+module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
+module_param_named(static_rate,   ehca_static_rate,   int, 0);
+
+MODULE_PARM_DESC(open_aqp1,
+		 "AQP1 on startup (0: no (default), 1: yes)");
+MODULE_PARM_DESC(debug_level,
+		 "debug level"
+		 " (0: no debug traces (default), 1: with debug traces)");
+MODULE_PARM_DESC(hw_level,
+		 "hardware level"
+		 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+MODULE_PARM_DESC(nr_ports,
+		 "number of connected ports (default: 2)");
+MODULE_PARM_DESC(use_hp_mr,
+		 "high performance MRs (0: no (default), 1: yes)");
+MODULE_PARM_DESC(port_act_time,
+		 "time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+		 "polls all event queues periodically"
+		 " (0: no, 1: yes (default))");
+MODULE_PARM_DESC(static_rate,
+		 "set permanent static rate (default: disabled)");
+
+spinlock_t ehca_qp_idr_lock;
+spinlock_t ehca_cq_idr_lock;
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+static struct list_head shca_list; /* list of all registered ehcas */
+static spinlock_t shca_list_lock;
+
+static struct timer_list poll_eqs_timer;
+
+static int ehca_create_slab_caches(void)
+{
+	int ret;
+
+	ret = ehca_init_pd_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create PD SLAB cache.");
+		return ret;
+	}
+
+	ret = ehca_init_cq_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create CQ SLAB cache.");
+		goto create_slab_caches2;
+	}
+
+	ret = ehca_init_qp_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create QP SLAB cache.");
+		goto create_slab_caches3;
+	}
+
+	ret = ehca_init_av_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create AV SLAB cache.");
+		goto create_slab_caches4;
+	}
+
+	ret = ehca_init_mrmw_cache();
+	if (ret) {
+		ehca_gen_err("Cannot create MR&MW SLAB cache.");
+		goto create_slab_caches5;
+	}
+
+	return 0;
+
+create_slab_caches5:
+	ehca_cleanup_av_cache();
+
+create_slab_caches4:
+	ehca_cleanup_qp_cache();
+
+create_slab_caches3:
+	ehca_cleanup_cq_cache();
+
+create_slab_caches2:
+	ehca_cleanup_pd_cache();
+
+	return ret;
+}
+
+static void ehca_destroy_slab_caches(void)
+{
+	ehca_cleanup_mrmw_cache();
+	ehca_cleanup_av_cache();
+	ehca_cleanup_qp_cache();
+	ehca_cleanup_cq_cache();
+	ehca_cleanup_pd_cache();
+}
+
+#define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
+#define EHCA_REVID    EHCA_BMASK_IBM(40,63)
+
+int ehca_sense_attributes(struct ehca_shca *shca)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_gen_err("Cannot allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+	if (h_ret != H_SUCCESS) {
+		ehca_gen_err("Cannot query device properties. h_ret=%lx",
+			     h_ret);
+		ret = -EPERM;
+		goto num_ports1;
+	}
+
+	if (ehca_nr_ports == 1)
+		shca->num_ports = 1;
+	else
+		shca->num_ports = (u8)rblock->num_ports;
+
+	ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
+
+	if (ehca_hw_level == 0) {
+		u32 hcaaver;
+		u32 revid;
+
+		hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+		revid   = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
+
+		if ((hcaaver == 1) && (revid == 0))
+			shca->hw_level = 0;
+		else if ((hcaaver == 1) && (revid == 1))
+			shca->hw_level = 1;
+		else if ((hcaaver == 1) && (revid == 2))
+			shca->hw_level = 2;
+	}
+	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
+
+	shca->sport[0].rate = IB_RATE_30_GBPS;
+	shca->sport[1].rate = IB_RATE_30_GBPS;
+
+num_ports1:
+	kfree(rblock);
+	return ret;
+}
+
+static int init_node_guid(struct ehca_shca *shca)
+{
+	int ret = 0;
+	struct hipz_query_hca *rblock;
+
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query device properties");
+		ret = -EINVAL;
+		goto init_node_guid1;
+	}
+
+	memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
+
+init_node_guid1:
+	kfree(rblock);
+	return ret;
+}
+
+int ehca_register_device(struct ehca_shca *shca)
+{
+	int ret;
+
+	ret = init_node_guid(shca);
+	if (ret)
+		return ret;
+
+	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+	shca->ib_device.owner               = THIS_MODULE;
+
+	shca->ib_device.uverbs_abi_ver	    = 5;
+	shca->ib_device.uverbs_cmd_mask	    =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+	shca->ib_device.node_type           = RDMA_NODE_IB_CA;
+	shca->ib_device.phys_port_cnt       = shca->num_ports;
+	shca->ib_device.dma_device          = &shca->ibmebus_dev->ofdev.dev;
+	shca->ib_device.query_device        = ehca_query_device;
+	shca->ib_device.query_port          = ehca_query_port;
+	shca->ib_device.query_gid           = ehca_query_gid;
+	shca->ib_device.query_pkey          = ehca_query_pkey;
+	/* shca->in_device.modify_device    = ehca_modify_device    */
+	shca->ib_device.modify_port         = ehca_modify_port;
+	shca->ib_device.alloc_ucontext      = ehca_alloc_ucontext;
+	shca->ib_device.dealloc_ucontext    = ehca_dealloc_ucontext;
+	shca->ib_device.alloc_pd            = ehca_alloc_pd;
+	shca->ib_device.dealloc_pd          = ehca_dealloc_pd;
+	shca->ib_device.create_ah	    = ehca_create_ah;
+	/* shca->ib_device.modify_ah	    = ehca_modify_ah;	    */
+	shca->ib_device.query_ah	    = ehca_query_ah;
+	shca->ib_device.destroy_ah	    = ehca_destroy_ah;
+	shca->ib_device.create_qp	    = ehca_create_qp;
+	shca->ib_device.modify_qp	    = ehca_modify_qp;
+	shca->ib_device.query_qp	    = ehca_query_qp;
+	shca->ib_device.destroy_qp	    = ehca_destroy_qp;
+	shca->ib_device.post_send	    = ehca_post_send;
+	shca->ib_device.post_recv	    = ehca_post_recv;
+	shca->ib_device.create_cq	    = ehca_create_cq;
+	shca->ib_device.destroy_cq	    = ehca_destroy_cq;
+	shca->ib_device.resize_cq	    = ehca_resize_cq;
+	shca->ib_device.poll_cq		    = ehca_poll_cq;
+	/* shca->ib_device.peek_cq	    = ehca_peek_cq;	    */
+	shca->ib_device.req_notify_cq	    = ehca_req_notify_cq;
+	/* shca->ib_device.req_ncomp_notif  = ehca_req_ncomp_notif; */
+	shca->ib_device.get_dma_mr	    = ehca_get_dma_mr;
+	shca->ib_device.reg_phys_mr	    = ehca_reg_phys_mr;
+	shca->ib_device.reg_user_mr	    = ehca_reg_user_mr;
+	shca->ib_device.query_mr	    = ehca_query_mr;
+	shca->ib_device.dereg_mr	    = ehca_dereg_mr;
+	shca->ib_device.rereg_phys_mr	    = ehca_rereg_phys_mr;
+	shca->ib_device.alloc_mw	    = ehca_alloc_mw;
+	shca->ib_device.bind_mw		    = ehca_bind_mw;
+	shca->ib_device.dealloc_mw	    = ehca_dealloc_mw;
+	shca->ib_device.alloc_fmr	    = ehca_alloc_fmr;
+	shca->ib_device.map_phys_fmr	    = ehca_map_phys_fmr;
+	shca->ib_device.unmap_fmr	    = ehca_unmap_fmr;
+	shca->ib_device.dealloc_fmr	    = ehca_dealloc_fmr;
+	shca->ib_device.attach_mcast	    = ehca_attach_mcast;
+	shca->ib_device.detach_mcast	    = ehca_detach_mcast;
+	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
+	shca->ib_device.mmap		    = ehca_mmap;
+
+	ret = ib_register_device(&shca->ib_device);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "ib_register_device() failed ret=%x", ret);
+
+	return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+	struct ehca_sport *sport = &shca->sport[port - 1];
+	struct ib_cq *ibcq;
+	struct ib_qp *ibqp;
+	struct ib_qp_init_attr qp_init_attr;
+	int ret;
+
+	if (sport->ibcq_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
+		return -EPERM;
+	}
+
+	ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+	if (IS_ERR(ibcq)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
+		return PTR_ERR(ibcq);
+	}
+	sport->ibcq_aqp1 = ibcq;
+
+	if (sport->ibqp_aqp1) {
+		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+		ret = -EPERM;
+		goto create_aqp1;
+	}
+
+	memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+	qp_init_attr.send_cq          = ibcq;
+	qp_init_attr.recv_cq          = ibcq;
+	qp_init_attr.sq_sig_type      = IB_SIGNAL_ALL_WR;
+	qp_init_attr.cap.max_send_wr  = 100;
+	qp_init_attr.cap.max_recv_wr  = 100;
+	qp_init_attr.cap.max_send_sge = 2;
+	qp_init_attr.cap.max_recv_sge = 1;
+	qp_init_attr.qp_type          = IB_QPT_GSI;
+	qp_init_attr.port_num         = port;
+	qp_init_attr.qp_context       = NULL;
+	qp_init_attr.event_handler    = NULL;
+	qp_init_attr.srq              = NULL;
+
+	ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+	if (IS_ERR(ibqp)) {
+		ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
+		ret = PTR_ERR(ibqp);
+		goto create_aqp1;
+	}
+	sport->ibqp_aqp1 = ibqp;
+
+	return 0;
+
+create_aqp1:
+	ib_destroy_cq(sport->ibcq_aqp1);
+	return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+	int ret;
+
+	ret = ib_destroy_qp(sport->ibqp_aqp1);
+	if (ret) {
+		ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+		return ret;
+	}
+
+	ret = ib_destroy_cq(sport->ibcq_aqp1);
+	if (ret)
+		ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+
+	return ret;
+}
+
+static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
+{
+	return  snprintf(buf, PAGE_SIZE, "%d\n",
+			 ehca_debug_level);
+}
+
+static ssize_t ehca_store_debug_level(struct device_driver *ddp,
+				      const char *buf, size_t count)
+{
+	int value = (*buf) - '0';
+	if (value >= 0 && value <= 9)
+		ehca_debug_level = value;
+	return 1;
+}
+
+DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+	    ehca_show_debug_level, ehca_store_debug_level);
+
+void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_create_file(&drv->driver, &driver_attr_debug_level);
+}
+
+void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
+{
+	driver_remove_file(&drv->driver, &driver_attr_debug_level);
+}
+
+#define EHCA_RESOURCE_ATTR(name)                                           \
+static ssize_t  ehca_show_##name(struct device *dev,                       \
+				 struct device_attribute *attr,            \
+				 char *buf)                                \
+{									   \
+	struct ehca_shca *shca;						   \
+	struct hipz_query_hca *rblock;				           \
+	int data;                                                          \
+									   \
+	shca = dev->driver_data;					   \
+									   \
+	rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);			   \
+	if (!rblock) {						           \
+		dev_err(dev, "Can't allocate rblock memory.");		   \
+		return 0;						   \
+	}								   \
+									   \
+	if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+		dev_err(dev, "Can't query device properties");	   	   \
+		kfree(rblock);					   	   \
+		return 0;					   	   \
+	}								   \
+									   \
+	data = rblock->name;                                               \
+	kfree(rblock);                                                     \
+									   \
+	if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1))	   \
+		return snprintf(buf, 256, "1\n");			   \
+	else								   \
+		return snprintf(buf, 256, "%d\n", data);		   \
+									   \
+}									   \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ehca_shca *shca = dev->driver_data;
+
+	return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+
+void ehca_create_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
+{
+	device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
+	device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
+}
+
+static int __devinit ehca_probe(struct ibmebus_dev *dev,
+				const struct of_device_id *id)
+{
+	struct ehca_shca *shca;
+	u64 *handle;
+	struct ib_pd *ibpd;
+	int ret;
+
+	handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+	if (!handle) {
+		ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	if (!(*handle)) {
+		ehca_gen_err("Wrong eHCA handle for adapter: %s.",
+			     dev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+
+	shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+	if (!shca) {
+		ehca_gen_err("Cannot allocate shca memory.");
+		return -ENOMEM;
+	}
+
+	shca->ibmebus_dev = dev;
+	shca->ipz_hca_handle.handle = *handle;
+	dev->ofdev.dev.driver_data = shca;
+
+	ret = ehca_sense_attributes(shca);
+	if (ret < 0) {
+		ehca_gen_err("Cannot sense eHCA attributes.");
+		goto probe1;
+	}
+
+	ret = ehca_register_device(shca);
+	if (ret) {
+		ehca_gen_err("Cannot register Infiniband device");
+		goto probe1;
+	}
+
+	/* create event queues */
+	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create EQ.");
+		goto probe2;
+	}
+
+	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create NEQ.");
+		goto probe3;
+	}
+
+	/* create internal protection domain */
+	ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+	if (IS_ERR(ibpd)) {
+		ehca_err(&shca->ib_device, "Cannot create internal PD.");
+		ret = PTR_ERR(ibpd);
+		goto probe4;
+	}
+
+	shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+	shca->pd->ib_pd.device = &shca->ib_device;
+
+	/* create internal max MR */
+	ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+
+	if (ret) {
+		ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+			 ret);
+		goto probe5;
+	}
+
+	/* create AQP1 for port 1 */
+	if (ehca_open_aqp1 == 1) {
+		shca->sport[0].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 1);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 1.");
+			goto probe6;
+		}
+	}
+
+	/* create AQP1 for port 2 */
+	if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+		shca->sport[1].port_state = IB_PORT_DOWN;
+		ret = ehca_create_aqp1(shca, 2);
+		if (ret) {
+			ehca_err(&shca->ib_device,
+				 "Cannot create AQP1 for port 2.");
+			goto probe7;
+		}
+	}
+
+	ehca_create_device_sysfs(dev);
+
+	spin_lock(&shca_list_lock);
+	list_add(&shca->shca_list, &shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return 0;
+
+probe7:
+	ret = ehca_destroy_aqp1(&shca->sport[0]);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy AQP1 for port 1. ret=%x", ret);
+
+probe6:
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+probe5:
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+probe4:
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy NEQ. ret=%x", ret);
+
+probe3:
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy EQ. ret=%x", ret);
+
+probe2:
+	ib_unregister_device(&shca->ib_device);
+
+probe1:
+	ib_dealloc_device(&shca->ib_device);
+
+	return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct ibmebus_dev *dev)
+{
+	struct ehca_shca *shca = dev->ofdev.dev.driver_data;
+	int ret;
+
+	ehca_remove_device_sysfs(dev);
+
+	if (ehca_open_aqp1 == 1) {
+		int i;
+		for (i = 0; i < shca->num_ports; i++) {
+			ret = ehca_destroy_aqp1(&shca->sport[i]);
+			if (ret)
+				ehca_err(&shca->ib_device,
+					 "Cannot destroy AQP1 for port %x "
+					 "ret=%x", ret, i);
+		}
+	}
+
+	ib_unregister_device(&shca->ib_device);
+
+	ret = ehca_dereg_internal_maxmr(shca);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal MR. ret=%x", ret);
+
+	ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+	if (ret)
+		ehca_err(&shca->ib_device,
+			 "Cannot destroy internal PD. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->eq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+
+	ret = ehca_destroy_eq(shca, &shca->neq);
+	if (ret)
+		ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+
+	ib_dealloc_device(&shca->ib_device);
+
+	spin_lock(&shca_list_lock);
+	list_del(&shca->shca_list);
+	spin_unlock(&shca_list_lock);
+
+	return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+	{
+		.name       = "lhca",
+		.compatible = "IBM,lhca",
+	},
+	{},
+};
+
+static struct ibmebus_driver ehca_driver = {
+	.name     = "ehca",
+	.id_table = ehca_device_table,
+	.probe    = ehca_probe,
+	.remove   = ehca_remove,
+};
+
+void ehca_poll_eqs(unsigned long data)
+{
+	struct ehca_shca *shca;
+
+	spin_lock(&shca_list_lock);
+	list_for_each_entry(shca, &shca_list, shca_list) {
+		if (shca->eq.is_initialized)
+			ehca_tasklet_eq((unsigned long)(void*)shca);
+	}
+	mod_timer(&poll_eqs_timer, jiffies + HZ);
+	spin_unlock(&shca_list_lock);
+}
+
+int __init ehca_module_init(void)
+{
+	int ret;
+
+	printk(KERN_INFO "eHCA Infiniband Device Driver "
+	                 "(Rel.: SVNEHCA_0016)\n");
+	idr_init(&ehca_qp_idr);
+	idr_init(&ehca_cq_idr);
+	spin_lock_init(&ehca_qp_idr_lock);
+	spin_lock_init(&ehca_cq_idr_lock);
+
+	INIT_LIST_HEAD(&shca_list);
+	spin_lock_init(&shca_list_lock);
+
+	if ((ret = ehca_create_comp_pool())) {
+		ehca_gen_err("Cannot create comp pool.");
+		return ret;
+	}
+
+	if ((ret = ehca_create_slab_caches())) {
+		ehca_gen_err("Cannot create SLAB caches");
+		ret = -ENOMEM;
+		goto module_init1;
+	}
+
+	if ((ret = ibmebus_register_driver(&ehca_driver))) {
+		ehca_gen_err("Cannot register eHCA device driver");
+		ret = -EINVAL;
+		goto module_init2;
+	}
+
+	ehca_create_driver_sysfs(&ehca_driver);
+
+	if (ehca_poll_all_eqs != 1) {
+		ehca_gen_err("WARNING!!!");
+		ehca_gen_err("It is possible to lose interrupts.");
+	} else {
+		init_timer(&poll_eqs_timer);
+		poll_eqs_timer.function = ehca_poll_eqs;
+		poll_eqs_timer.expires = jiffies + HZ;
+		add_timer(&poll_eqs_timer);
+	}
+
+	return 0;
+
+module_init2:
+	ehca_destroy_slab_caches();
+
+module_init1:
+	ehca_destroy_comp_pool();
+	return ret;
+};
+
+void __exit ehca_module_exit(void)
+{
+	if (ehca_poll_all_eqs == 1)
+		del_timer_sync(&poll_eqs_timer);
+
+	ehca_remove_driver_sysfs(&ehca_driver);
+	ibmebus_unregister_driver(&ehca_driver);
+
+	ehca_destroy_slab_caches();
+
+	ehca_destroy_comp_pool();
+
+	idr_destroy(&ehca_cq_idr);
+	idr_destroy(&ehca_qp_idr);
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
new file mode 100644
index 0000000..32a8706
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -0,0 +1,131 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  mcast  functions
+ *
+ *  Authors: Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define MAX_MC_LID 0xFFFE
+#define MIN_MC_LID 0xC000	/* Multicast limits */
+#define EHCA_VALID_MULTICAST_GID(gid)  ((gid)[0] == 0xFF)
+#define EHCA_VALID_MULTICAST_LID(lid) \
+	(((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
+
+int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
+
+int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->pd->device,
+					      struct ehca_shca, ib_device);
+	union ib_gid my_gid;
+	u64 subnet_prefix, interface_id, h_ret;
+
+	if (ibqp->qp_type != IB_QPT_UD) {
+		ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
+		return -EINVAL;
+	}
+
+	if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+		ehca_err(ibqp->device, "invalid mulitcast gid");
+		return -EINVAL;
+	} else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+		ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+		return -EINVAL;
+	}
+
+	memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+	subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+	interface_id = be64_to_cpu(my_gid.global.interface_id);
+	h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
+				   my_qp->ipz_qp_handle,
+				   my_qp->galpas.kernel,
+				   lid, subnet_prefix, interface_id);
+	if (h_ret != H_SUCCESS)
+		ehca_err(ibqp->device,
+			 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
+			 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+
+	return ehca2ib_return_code(h_ret);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
new file mode 100644
index 0000000..5ca6544
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -0,0 +1,2261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "hcp_if.h"
+#include "hipz_hw.h"
+
+static struct kmem_cache *mr_cache;
+static struct kmem_cache *mw_cache;
+
+static struct ehca_mr *ehca_mr_new(void)
+{
+	struct ehca_mr *me;
+
+	me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
+	if (me) {
+		memset(me, 0, sizeof(struct ehca_mr));
+		spin_lock_init(&me->mrlock);
+	} else
+		ehca_gen_err("alloc failed");
+
+	return me;
+}
+
+static void ehca_mr_delete(struct ehca_mr *me)
+{
+	kmem_cache_free(mr_cache, me);
+}
+
+static struct ehca_mw *ehca_mw_new(void)
+{
+	struct ehca_mw *me;
+
+	me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
+	if (me) {
+		memset(me, 0, sizeof(struct ehca_mw));
+		spin_lock_init(&me->mwlock);
+	} else
+		ehca_gen_err("alloc failed");
+
+	return me;
+}
+
+static void ehca_mw_delete(struct ehca_mw *me)
+{
+	kmem_cache_free(mw_cache, me);
+}
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+	struct ib_mr *ib_mr;
+	int ret;
+	struct ehca_mr *e_maxmr;
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+
+	if (shca->maxmr) {
+		e_maxmr = ehca_mr_new();
+		if (!e_maxmr) {
+			ehca_err(&shca->ib_device, "out of memory");
+			ib_mr = ERR_PTR(-ENOMEM);
+			goto get_dma_mr_exit0;
+		}
+
+		ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
+				     mr_access_flags, e_pd,
+				     &e_maxmr->ib.ib_mr.lkey,
+				     &e_maxmr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto get_dma_mr_exit0;
+		}
+		ib_mr = &e_maxmr->ib.ib_mr;
+	} else {
+		ehca_err(&shca->ib_device, "no internal max-MR exist!");
+		ib_mr = ERR_PTR(-EINVAL);
+		goto get_dma_mr_exit0;
+	}
+
+get_dma_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
+			 PTR_ERR(ib_mr), pd, mr_access_flags);
+	return ib_mr;
+} /* end ehca_get_dma_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+			       struct ib_phys_buf *phys_buf_array,
+			       int num_phys_buf,
+			       int mr_access_flags,
+			       u64 *iova_start)
+{
+	struct ib_mr *ib_mr;
+	int ret;
+	struct ehca_mr *e_mr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	u64 size;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	if ((num_phys_buf <= 0) || !phys_buf_array) {
+		ehca_err(pd->device, "bad input values: num_phys_buf=%x "
+			 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+
+	/* check physical buffer list and calculate size */
+	ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
+					    iova_start, &size);
+	if (ret) {
+		ib_mr = ERR_PTR(ret);
+		goto reg_phys_mr_exit0;
+	}
+	if ((size == 0) ||
+	    (((u64)iova_start + size) < (u64)iova_start)) {
+		ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+			 size, iova_start);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_phys_mr_exit0;
+	}
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(pd->device, "out of memory");
+		ib_mr = ERR_PTR(-ENOMEM);
+		goto reg_phys_mr_exit0;
+	}
+
+	/* determine number of MR pages */
+	num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	/* register MR on HCA */
+	if (ehca_mr_is_maxmr(size, iova_start)) {
+		e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+		ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
+				     e_pd, &e_mr->ib.ib_mr.lkey,
+				     &e_mr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto reg_phys_mr_exit1;
+		}
+	} else {
+		pginfo.type           = EHCA_MR_PGI_PHYS;
+		pginfo.num_pages      = num_pages_mr;
+		pginfo.num_4k         = num_pages_4k;
+		pginfo.num_phys_buf   = num_phys_buf;
+		pginfo.phys_buf_array = phys_buf_array;
+		pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+					 EHCA_PAGESIZE);
+
+		ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
+				  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+				  &e_mr->ib.ib_mr.rkey);
+		if (ret) {
+			ib_mr = ERR_PTR(ret);
+			goto reg_phys_mr_exit1;
+		}
+	}
+
+	/* successful registration of all pages */
+	return &e_mr->ib.ib_mr;
+
+reg_phys_mr_exit1:
+	ehca_mr_delete(e_mr);
+reg_phys_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
+			 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
+			 PTR_ERR(ib_mr), pd, phys_buf_array,
+			 num_phys_buf, mr_access_flags, iova_start);
+	return ib_mr;
+} /* end ehca_reg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
+			       struct ib_umem *region,
+			       int mr_access_flags,
+			       struct ib_udata *udata)
+{
+	struct ib_mr *ib_mr;
+	struct ehca_mr *e_mr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	int ret;
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	if (!pd) {
+		ehca_gen_err("bad pd=%p", pd);
+		return ERR_PTR(-EFAULT);
+	}
+	if (!region) {
+		ehca_err(pd->device, "bad input values: region=%p", region);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+	if (region->page_size != PAGE_SIZE) {
+		ehca_err(pd->device, "page size not supported, "
+			 "region->page_size=%x", region->page_size);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+
+	if ((region->length == 0) ||
+	    ((region->virt_base + region->length) < region->virt_base)) {
+		ehca_err(pd->device, "bad input values: length=%lx "
+			 "virt_base=%lx", region->length, region->virt_base);
+		ib_mr = ERR_PTR(-EINVAL);
+		goto reg_user_mr_exit0;
+	}
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(pd->device, "out of memory");
+		ib_mr = ERR_PTR(-ENOMEM);
+		goto reg_user_mr_exit0;
+	}
+
+	/* determine number of MR pages */
+	num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	/* register MR on HCA */
+	pginfo.type       = EHCA_MR_PGI_USER;
+	pginfo.num_pages  = num_pages_mr;
+	pginfo.num_4k     = num_pages_4k;
+	pginfo.region     = region;
+	pginfo.next_4k	  = region->offset / EHCA_PAGESIZE;
+	pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
+					       (&region->chunk_list),
+					       list);
+
+	ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
+			  region->length, mr_access_flags, e_pd, &pginfo,
+			  &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+	if (ret) {
+		ib_mr = ERR_PTR(ret);
+		goto reg_user_mr_exit1;
+	}
+
+	/* successful registration of all pages */
+	return &e_mr->ib.ib_mr;
+
+reg_user_mr_exit1:
+	ehca_mr_delete(e_mr);
+reg_user_mr_exit0:
+	if (IS_ERR(ib_mr))
+		ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+			 " udata=%p",
+			 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+	return ib_mr;
+} /* end ehca_reg_user_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+		       int mr_rereg_mask,
+		       struct ib_pd *pd,
+		       struct ib_phys_buf *phys_buf_array,
+		       int num_phys_buf,
+		       int mr_access_flags,
+		       u64 *iova_start)
+{
+	int ret;
+
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u64 new_size;
+	u64 *new_start;
+	u32 new_acl;
+	struct ehca_pd *new_pd;
+	u32 tmp_lkey, tmp_rkey;
+	unsigned long sl_flags;
+	u32 num_pages_mr = 0;
+	u32 num_pages_4k = 0; /* 4k portion "pages" */
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
+		/* TODO not supported, because PHYP rereg hCall needs pages */
+		ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
+			 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	if (mr_rereg_mask & IB_MR_REREG_PD) {
+		if (!pd) {
+			ehca_err(mr->device, "rereg with bad pd, pd=%p "
+				 "mr_rereg_mask=%x", pd, mr_rereg_mask);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+	}
+
+	if ((mr_rereg_mask &
+	     ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
+	    (mr_rereg_mask == 0)) {
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	/* check other parameters */
+	if (e_mr == shca->maxmr) {
+		/* should be impossible, however reject to be sure */
+		ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
+			 "shca->maxmr=%p mr->lkey=%x",
+			 mr, shca->maxmr, mr->lkey);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
+		if (e_mr->flags & EHCA_MR_FLAG_FMR) {
+			ehca_err(mr->device, "not supported for FMR, mr=%p "
+				 "flags=%x", mr, e_mr->flags);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+		if (!phys_buf_array || num_phys_buf <= 0) {
+			ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
+				 " phys_buf_array=%p num_phys_buf=%x",
+				 mr_rereg_mask, phys_buf_array, num_phys_buf);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit0;
+		}
+	}
+	if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&	/* change ACL */
+	    (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	     ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	      !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
+			 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
+		ret = -EINVAL;
+		goto rereg_phys_mr_exit0;
+	}
+
+	/* set requested values dependent on rereg request */
+	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+	new_start = e_mr->start;  /* new == old address */
+	new_size  = e_mr->size;	  /* new == old length */
+	new_acl   = e_mr->acl;	  /* new == old access control */
+	new_pd    = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
+
+	if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+		new_start = iova_start;	/* change address */
+		/* check physical buffer list and calculate size */
+		ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
+						    num_phys_buf, iova_start,
+						    &new_size);
+		if (ret)
+			goto rereg_phys_mr_exit1;
+		if ((new_size == 0) ||
+		    (((u64)iova_start + new_size) < (u64)iova_start)) {
+			ehca_err(mr->device, "bad input values: new_size=%lx "
+				 "iova_start=%p", new_size, iova_start);
+			ret = -EINVAL;
+			goto rereg_phys_mr_exit1;
+		}
+		num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
+				 PAGE_SIZE - 1) / PAGE_SIZE);
+		num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
+				 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+		pginfo.type           = EHCA_MR_PGI_PHYS;
+		pginfo.num_pages      = num_pages_mr;
+		pginfo.num_4k         = num_pages_4k;
+		pginfo.num_phys_buf   = num_phys_buf;
+		pginfo.phys_buf_array = phys_buf_array;
+		pginfo.next_4k        = (((u64)iova_start & ~PAGE_MASK) /
+					 EHCA_PAGESIZE);
+	}
+	if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+		new_acl = mr_access_flags;
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		new_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
+			    new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+	if (ret)
+		goto rereg_phys_mr_exit1;
+
+	/* successful reregistration */
+	if (mr_rereg_mask & IB_MR_REREG_PD)
+		mr->pd = pd;
+	mr->lkey = tmp_lkey;
+	mr->rkey = tmp_rkey;
+
+rereg_phys_mr_exit1:
+	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+rereg_phys_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
+			 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
+			 "iova_start=%p",
+			 ret, mr, mr_rereg_mask, pd, phys_buf_array,
+			 num_phys_buf, mr_access_flags, iova_start);
+	return ret;
+} /* end ehca_rereg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+	unsigned long sl_flags;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto query_mr_exit0;
+	}
+
+	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+		ret = -EINVAL;
+		goto query_mr_exit0;
+	}
+
+	memset(mr_attr, 0, sizeof(struct ib_mr_attr));
+	spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+
+	h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
+			 "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, mr, shca->ipz_hca_handle.handle,
+			 e_mr->ipz_mr_handle.handle, mr->lkey);
+		ret = ehca_mrmw_map_hrc_query_mr(h_ret);
+		goto query_mr_exit1;
+	}
+	mr_attr->pd               = mr->pd;
+	mr_attr->device_virt_addr = hipzout.vaddr;
+	mr_attr->size             = hipzout.len;
+	mr_attr->lkey             = hipzout.lkey;
+	mr_attr->rkey             = hipzout.rkey;
+	ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
+
+query_mr_exit1:
+	spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+query_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
+			 ret, mr, mr_attr);
+	return ret;
+} /* end ehca_query_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_mr(struct ib_mr *mr)
+{
+	int ret = 0;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    (my_pd->ownpid != cur_pid)) {
+		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	}
+
+	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	} else if (e_mr == shca->maxmr) {
+		/* should be impossible, however reject to be sure */
+		ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
+			 "shca->maxmr=%p mr->lkey=%x",
+			 mr, shca->maxmr, mr->lkey);
+		ret = -EINVAL;
+		goto dereg_mr_exit0;
+	}
+
+	/* TODO: BUSY: MR still has bound window(s) */
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
+			 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+			 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
+			 e_mr->ipz_mr_handle.handle, mr->lkey);
+		ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+		goto dereg_mr_exit0;
+	}
+
+	/* successful deregistration */
+	ehca_mr_delete(e_mr);
+
+dereg_mr_exit0:
+	if (ret)
+		ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
+	return ret;
+} /* end ehca_dereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+{
+	struct ib_mw *ib_mw;
+	u64 h_ret;
+	struct ehca_mw *e_mw;
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_mw_hipzout_parms hipzout = {{0},0};
+
+	e_mw = ehca_mw_new();
+	if (!e_mw) {
+		ib_mw = ERR_PTR(-ENOMEM);
+		goto alloc_mw_exit0;
+	}
+
+	h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
+					 e_pd->fw_pd, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
+			 "shca=%p hca_hndl=%lx mw=%p",
+			 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
+		ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
+		goto alloc_mw_exit1;
+	}
+	/* successful MW allocation */
+	e_mw->ipz_mw_handle = hipzout.handle;
+	e_mw->ib_mw.rkey    = hipzout.rkey;
+	return &e_mw->ib_mw;
+
+alloc_mw_exit1:
+	ehca_mw_delete(e_mw);
+alloc_mw_exit0:
+	if (IS_ERR(ib_mw))
+		ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
+	return ib_mw;
+} /* end ehca_alloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_bind_mw(struct ib_qp *qp,
+		 struct ib_mw *mw,
+		 struct ib_mw_bind *mw_bind)
+{
+	/* TODO: not supported up to now */
+	ehca_gen_err("bind MW currently not supported by HCAD");
+
+	return -EPERM;
+} /* end ehca_bind_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_mw(struct ib_mw *mw)
+{
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(mw->device, struct ehca_shca, ib_device);
+	struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
+
+	h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
+			 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+			 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
+			 e_mw->ipz_mw_handle.handle);
+		return ehca_mrmw_map_hrc_free_mw(h_ret);
+	}
+	/* successful deallocation */
+	ehca_mw_delete(e_mw);
+	return 0;
+} /* end ehca_dealloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+			      int mr_access_flags,
+			      struct ib_fmr_attr *fmr_attr)
+{
+	struct ib_fmr *ib_fmr;
+	struct ehca_shca *shca =
+		container_of(pd->device, struct ehca_shca, ib_device);
+	struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_mr *e_fmr;
+	int ret;
+	u32 tmp_lkey, tmp_rkey;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+
+	/* check other parameters */
+	if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+	    ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	     !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+		/*
+		 * Remote Write Access requires Local Write Access
+		 * Remote Atomic Access requires Local Write Access
+		 */
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if (mr_access_flags & IB_ACCESS_MW_BIND) {
+		ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+			 mr_access_flags);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
+		ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
+			 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
+			 fmr_attr->max_pages, fmr_attr->max_maps,
+			 fmr_attr->page_shift);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+	if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
+	    ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
+		ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
+			 fmr_attr->page_shift);
+		ib_fmr = ERR_PTR(-EINVAL);
+		goto alloc_fmr_exit0;
+	}
+
+	e_fmr = ehca_mr_new();
+	if (!e_fmr) {
+		ib_fmr = ERR_PTR(-ENOMEM);
+		goto alloc_fmr_exit0;
+	}
+	e_fmr->flags |= EHCA_MR_FLAG_FMR;
+
+	/* register MR on HCA */
+	ret = ehca_reg_mr(shca, e_fmr, NULL,
+			  fmr_attr->max_pages * (1 << fmr_attr->page_shift),
+			  mr_access_flags, e_pd, &pginfo,
+			  &tmp_lkey, &tmp_rkey);
+	if (ret) {
+		ib_fmr = ERR_PTR(ret);
+		goto alloc_fmr_exit1;
+	}
+
+	/* successful */
+	e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
+	e_fmr->fmr_max_pages = fmr_attr->max_pages;
+	e_fmr->fmr_max_maps = fmr_attr->max_maps;
+	e_fmr->fmr_map_cnt = 0;
+	return &e_fmr->ib.ib_fmr;
+
+alloc_fmr_exit1:
+	ehca_mr_delete(e_fmr);
+alloc_fmr_exit0:
+	if (IS_ERR(ib_fmr))
+		ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
+			 "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
+			 mr_access_flags, fmr_attr);
+	return ib_fmr;
+} /* end ehca_alloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+		      u64 *page_list,
+		      int list_len,
+		      u64 iova)
+{
+	int ret;
+	struct ehca_shca *shca =
+		container_of(fmr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+	struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	u32 tmp_lkey, tmp_rkey;
+
+	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+			 e_fmr, e_fmr->flags);
+		ret = -EINVAL;
+		goto map_phys_fmr_exit0;
+	}
+	ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
+	if (ret)
+		goto map_phys_fmr_exit0;
+	if (iova % e_fmr->fmr_page_size) {
+		/* only whole-numbered pages */
+		ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+			 iova, e_fmr->fmr_page_size);
+		ret = -EINVAL;
+		goto map_phys_fmr_exit0;
+	}
+	if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
+		/* HCAD does not limit the maps, however trace this anyway */
+		ehca_info(fmr->device, "map limit exceeded, fmr=%p "
+			  "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
+			  fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
+	}
+
+	pginfo.type      = EHCA_MR_PGI_FMR;
+	pginfo.num_pages = list_len;
+	pginfo.num_4k    = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
+	pginfo.page_list = page_list;
+	pginfo.next_4k   = ((iova & (e_fmr->fmr_page_size-1)) /
+			    EHCA_PAGESIZE);
+
+	ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
+			    list_len * e_fmr->fmr_page_size,
+			    e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+	if (ret)
+		goto map_phys_fmr_exit0;
+
+	/* successful reregistration */
+	e_fmr->fmr_map_cnt++;
+	e_fmr->ib.ib_fmr.lkey = tmp_lkey;
+	e_fmr->ib.ib_fmr.rkey = tmp_rkey;
+	return 0;
+
+map_phys_fmr_exit0:
+	if (ret)
+		ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
+			 "iova=%lx",
+			 ret, fmr, page_list, list_len, iova);
+	return ret;
+} /* end ehca_map_phys_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_fmr(struct list_head *fmr_list)
+{
+	int ret = 0;
+	struct ib_fmr *ib_fmr;
+	struct ehca_shca *shca = NULL;
+	struct ehca_shca *prev_shca;
+	struct ehca_mr *e_fmr;
+	u32 num_fmr = 0;
+	u32 unmap_fmr_cnt = 0;
+
+	/* check all FMR belong to same SHCA, and check internal flag */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		prev_shca = shca;
+		if (!ib_fmr) {
+			ehca_gen_err("bad fmr=%p in list", ib_fmr);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		shca = container_of(ib_fmr->device, struct ehca_shca,
+				    ib_device);
+		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+		if ((shca != prev_shca) && prev_shca) {
+			ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
+				 "prev_shca=%p e_fmr=%p",
+				 shca, prev_shca, e_fmr);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+			ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
+				 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
+			ret = -EINVAL;
+			goto unmap_fmr_exit0;
+		}
+		num_fmr++;
+	}
+
+	/* loop over all FMRs to unmap */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		unmap_fmr_cnt++;
+		e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+		shca = container_of(ib_fmr->device, struct ehca_shca,
+				    ib_device);
+		ret = ehca_unmap_one_fmr(shca, e_fmr);
+		if (ret) {
+			/* unmap failed, stop unmapping of rest of FMRs */
+			ehca_err(&shca->ib_device, "unmap of one FMR failed, "
+				 "stop rest, e_fmr=%p num_fmr=%x "
+				 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
+				 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
+			goto unmap_fmr_exit0;
+		}
+	}
+
+unmap_fmr_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+			     ret, fmr_list, num_fmr, unmap_fmr_cnt);
+	return ret;
+} /* end ehca_unmap_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr)
+{
+	int ret;
+	u64 h_ret;
+	struct ehca_shca *shca =
+		container_of(fmr->device, struct ehca_shca, ib_device);
+	struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+
+	if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+		ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+			 e_fmr, e_fmr->flags);
+		ret = -EINVAL;
+		goto free_fmr_exit0;
+	}
+
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
+			 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+			 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+			 e_fmr->ipz_mr_handle.handle, fmr->lkey);
+		ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+		goto free_fmr_exit0;
+	}
+	/* successful deregistration */
+	ehca_mr_delete(e_fmr);
+	return 0;
+
+free_fmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
+	return ret;
+} /* end ehca_dealloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey, /*OUT*/
+		u32 *rkey) /*OUT*/
+{
+	int ret;
+	u64 h_ret;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+	if (ehca_use_hp_mr == 1)
+	        hipz_acl |= 0x00000001;
+
+	h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
+					 (u64)iova_start, size, hipz_acl,
+					 e_pd->fw_pd, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
+			 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
+		ret = ehca_mrmw_map_hrc_alloc(h_ret);
+		goto ehca_reg_mr_exit0;
+	}
+
+	e_mr->ipz_mr_handle = hipzout.handle;
+
+	ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+	if (ret)
+		goto ehca_reg_mr_exit1;
+
+	/* successful registration */
+	e_mr->num_pages = pginfo->num_pages;
+	e_mr->num_4k    = pginfo->num_4k;
+	e_mr->start     = iova_start;
+	e_mr->size      = size;
+	e_mr->acl       = acl;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+
+ehca_reg_mr_exit1:
+	h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
+			 "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
+			 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
+			 hipzout.lkey, pginfo, pginfo->num_pages,
+			 pginfo->num_4k, ret);
+		ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
+			 "not recoverable");
+	}
+ehca_reg_mr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+			 "num_pages=%lx num_4k=%lx",
+			 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
+			 pginfo->num_pages, pginfo->num_4k);
+	return ret;
+} /* end ehca_reg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo)
+{
+	int ret = 0;
+	u64 h_ret;
+	u32 rnum;
+	u64 rpage;
+	u32 i;
+	u64 *kpage;
+
+	kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!kpage) {
+		ehca_err(&shca->ib_device, "kpage alloc failed");
+		ret = -ENOMEM;
+		goto ehca_reg_mr_rpages_exit0;
+	}
+
+	/* max 512 pages per shot */
+	for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
+
+		if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+			rnum = pginfo->num_4k % 512; /* last shot */
+			if (rnum == 0)
+				rnum = 512;      /* last shot is full */
+		} else
+			rnum = 512;
+
+		if (rnum > 1) {
+			ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
+			if (ret) {
+				ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+					 "bad rc, ret=%x rnum=%x kpage=%p",
+					 ret, rnum, kpage);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+			rpage = virt_to_abs(kpage);
+			if (!rpage) {
+				ehca_err(&shca->ib_device, "kpage=%p i=%x",
+					 kpage, i);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+		} else {  /* rnum==1 */
+			ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
+			if (ret) {
+				ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
+					 "bad rc, ret=%x i=%x", ret, i);
+				ret = -EFAULT;
+				goto ehca_reg_mr_rpages_exit1;
+			}
+		}
+
+		h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
+						 0, /* pagesize 4k */
+						 0, rpage, rnum);
+
+		if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+			/*
+			 * check for 'registration complete'==H_SUCCESS
+			 * and for 'page registered'==H_PAGE_REGISTERED
+			 */
+			if (h_ret != H_SUCCESS) {
+				ehca_err(&shca->ib_device, "last "
+					 "hipz_reg_rpage_mr failed, h_ret=%lx "
+					 "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+					 " lkey=%x", h_ret, e_mr, i,
+					 shca->ipz_hca_handle.handle,
+					 e_mr->ipz_mr_handle.handle,
+					 e_mr->ib.ib_mr.lkey);
+				ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
+				break;
+			} else
+				ret = 0;
+		} else if (h_ret != H_PAGE_REGISTERED) {
+			ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
+				 "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
+				 "mr_hndl=%lx", h_ret, e_mr, i,
+				 e_mr->ib.ib_mr.lkey,
+				 shca->ipz_hca_handle.handle,
+				 e_mr->ipz_mr_handle.handle);
+			ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
+			break;
+		} else
+			ret = 0;
+	} /* end for(i) */
+
+
+ehca_reg_mr_rpages_exit1:
+	kfree(kpage);
+ehca_reg_mr_rpages_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
+			 "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
+			 pginfo->num_pages, pginfo->num_4k);
+	return ret;
+} /* end ehca_reg_mr_rpages() */
+
+/*----------------------------------------------------------------------*/
+
+inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
+				struct ehca_mr *e_mr,
+				u64 *iova_start,
+				u64 size,
+				u32 acl,
+				struct ehca_pd *e_pd,
+				struct ehca_mr_pginfo *pginfo,
+				u32 *lkey, /*OUT*/
+				u32 *rkey) /*OUT*/
+{
+	int ret;
+	u64 h_ret;
+	u32 hipz_acl;
+	u64 *kpage;
+	u64 rpage;
+	struct ehca_mr_pginfo pginfo_save;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (!kpage) {
+		ehca_err(&shca->ib_device, "kpage alloc failed");
+		ret = -ENOMEM;
+		goto ehca_rereg_mr_rereg1_exit0;
+	}
+
+	pginfo_save = *pginfo;
+	ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
+	if (ret) {
+		ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
+			 "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
+			 e_mr, pginfo, pginfo->type, pginfo->num_pages,
+			 pginfo->num_4k,kpage);
+		goto ehca_rereg_mr_rereg1_exit1;
+	}
+	rpage = virt_to_abs(kpage);
+	if (!rpage) {
+		ehca_err(&shca->ib_device, "kpage=%p", kpage);
+		ret = -EFAULT;
+		goto ehca_rereg_mr_rereg1_exit1;
+	}
+	h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
+				      (u64)iova_start, size, hipz_acl,
+				      e_pd->fw_pd, rpage, &hipzout);
+	if (h_ret != H_SUCCESS) {
+		/*
+		 * reregistration unsuccessful, try it again with the 3 hCalls,
+		 * e.g. this is required in case H_MR_CONDITION
+		 * (MW bound or MR is shared)
+		 */
+		ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
+			  "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
+		*pginfo = pginfo_save;
+		ret = -EAGAIN;
+	} else if ((u64*)hipzout.vaddr != iova_start) {
+		ehca_err(&shca->ib_device, "PHYP changed iova_start in "
+			 "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
+			 "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+			 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
+			 e_mr->ib.ib_mr.lkey, hipzout.lkey);
+		ret = -EFAULT;
+	} else {
+		/*
+		 * successful reregistration
+		 * note: start and start_out are identical for eServer HCAs
+		 */
+		e_mr->num_pages = pginfo->num_pages;
+		e_mr->num_4k    = pginfo->num_4k;
+		e_mr->start     = iova_start;
+		e_mr->size      = size;
+		e_mr->acl       = acl;
+		*lkey = hipzout.lkey;
+		*rkey = hipzout.rkey;
+	}
+
+ehca_rereg_mr_rereg1_exit1:
+	kfree(kpage);
+ehca_rereg_mr_rereg1_exit0:
+	if ( ret && (ret != -EAGAIN) )
+		ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
+			 "pginfo=%p num_pages=%lx num_4k=%lx",
+			 ret, *lkey, *rkey, pginfo, pginfo->num_pages,
+			 pginfo->num_4k);
+	return ret;
+} /* end ehca_rereg_mr_rereg1() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int acl,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey)
+{
+	int ret = 0;
+	u64 h_ret;
+	int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
+	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
+
+	/* first determine reregistration hCall(s) */
+	if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
+	    (pginfo->num_4k > e_mr->num_4k)) {
+		ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
+			 "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+	}
+
+	if (e_mr->flags & EHCA_MR_FLAG_MAXMR) {	/* check for max-MR */
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+		e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
+		ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
+			 e_mr);
+	}
+
+	if (rereg_1_hcall) {
+		ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
+					   acl, e_pd, pginfo, lkey, rkey);
+		if (ret) {
+			if (ret == -EAGAIN)
+				rereg_3_hcall = 1;
+			else
+				goto ehca_rereg_mr_exit0;
+		}
+	}
+
+	if (rereg_3_hcall) {
+		struct ehca_mr save_mr;
+
+		/* first deregister old MR */
+		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+				 "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+				 "mr->lkey=%x",
+				 h_ret, e_mr, shca->ipz_hca_handle.handle,
+				 e_mr->ipz_mr_handle.handle,
+				 e_mr->ib.ib_mr.lkey);
+			ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+			goto ehca_rereg_mr_exit0;
+		}
+		/* clean ehca_mr_t, without changing struct ib_mr and lock */
+		save_mr = *e_mr;
+		ehca_mr_deletenew(e_mr);
+
+		/* set some MR values */
+		e_mr->flags = save_mr.flags;
+		e_mr->fmr_page_size = save_mr.fmr_page_size;
+		e_mr->fmr_max_pages = save_mr.fmr_max_pages;
+		e_mr->fmr_max_maps = save_mr.fmr_max_maps;
+		e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
+
+		ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
+				      e_pd, pginfo, lkey, rkey);
+		if (ret) {
+			u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
+			memcpy(&e_mr->flags, &(save_mr.flags),
+			       sizeof(struct ehca_mr) - offset);
+			goto ehca_rereg_mr_exit0;
+		}
+	}
+
+ehca_rereg_mr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+			 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+			 "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+			 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
+			 acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
+			 rereg_1_hcall, rereg_3_hcall);
+	return ret;
+} /* end ehca_rereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr)
+{
+	int ret = 0;
+	u64 h_ret;
+	int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
+	int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
+	struct ehca_pd *e_pd =
+		container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
+	struct ehca_mr save_fmr;
+	u32 tmp_lkey, tmp_rkey;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	/* first check if reregistration hCall can be used for unmap */
+	if (e_fmr->fmr_max_pages > 512) {
+		rereg_1_hcall = 0;
+		rereg_3_hcall = 1;
+	}
+
+	if (rereg_1_hcall) {
+		/*
+		 * note: after using rereg hcall with len=0,
+		 * rereg hcall must be used again for registering pages
+		 */
+		h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
+					      0, 0, e_pd->fw_pd, 0, &hipzout);
+		if (h_ret != H_SUCCESS) {
+			/*
+			 * should not happen, because length checked above,
+			 * FMRs are not shared and no MW bound to FMRs
+			 */
+			ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+				 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
+				 "mr_hndl=%lx lkey=%x lkey_out=%x",
+				 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+				 e_fmr->ipz_mr_handle.handle,
+				 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+			rereg_3_hcall = 1;
+		} else {
+			/* successful reregistration */
+			e_fmr->start = NULL;
+			e_fmr->size = 0;
+			tmp_lkey = hipzout.lkey;
+			tmp_rkey = hipzout.rkey;
+		}
+	}
+
+	if (rereg_3_hcall) {
+		struct ehca_mr save_mr;
+
+		/* first free old FMR */
+		h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+				 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+				 "lkey=%x",
+				 h_ret, e_fmr, shca->ipz_hca_handle.handle,
+				 e_fmr->ipz_mr_handle.handle,
+				 e_fmr->ib.ib_fmr.lkey);
+			ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+			goto ehca_unmap_one_fmr_exit0;
+		}
+		/* clean ehca_mr_t, without changing lock */
+		save_fmr = *e_fmr;
+		ehca_mr_deletenew(e_fmr);
+
+		/* set some MR values */
+		e_fmr->flags = save_fmr.flags;
+		e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+		e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+		e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+		e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+		e_fmr->acl = save_fmr.acl;
+
+		pginfo.type      = EHCA_MR_PGI_FMR;
+		pginfo.num_pages = 0;
+		pginfo.num_4k    = 0;
+		ret = ehca_reg_mr(shca, e_fmr, NULL,
+				  (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+				  e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+				  &tmp_rkey);
+		if (ret) {
+			u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+			memcpy(&e_fmr->flags, &(save_mr.flags),
+			       sizeof(struct ehca_mr) - offset);
+			goto ehca_unmap_one_fmr_exit0;
+		}
+	}
+
+ehca_unmap_one_fmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
+			 "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
+			 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
+			 rereg_1_hcall, rereg_3_hcall);
+	return ret;
+} /* end ehca_unmap_one_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey, /*OUT*/
+		 u32 *rkey) /*OUT*/
+{
+	int ret = 0;
+	u64 h_ret;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
+				    &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+			 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
+			 "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
+			 shca->ipz_hca_handle.handle,
+			 e_origmr->ipz_mr_handle.handle,
+			 e_origmr->ib.ib_mr.lkey);
+		ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
+		goto ehca_reg_smr_exit0;
+	}
+	/* successful registration */
+	e_newmr->num_pages     = e_origmr->num_pages;
+	e_newmr->num_4k        = e_origmr->num_4k;
+	e_newmr->start         = iova_start;
+	e_newmr->size          = e_origmr->size;
+	e_newmr->acl           = acl;
+	e_newmr->ipz_mr_handle = hipzout.handle;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+
+ehca_reg_smr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
+			 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
+			 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
+	return ret;
+} /* end ehca_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/* register internal max-MR to internal SHCA */
+int ehca_reg_internal_maxmr(
+	struct ehca_shca *shca,
+	struct ehca_pd *e_pd,
+	struct ehca_mr **e_maxmr)  /*OUT*/
+{
+	int ret;
+	struct ehca_mr *e_mr;
+	u64 *iova_start;
+	u64 size_maxmr;
+	struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+	struct ib_phys_buf ib_pbuf;
+	u32 num_pages_mr;
+	u32 num_pages_4k; /* 4k portion "pages" */
+
+	e_mr = ehca_mr_new();
+	if (!e_mr) {
+		ehca_err(&shca->ib_device, "out of memory");
+		ret = -ENOMEM;
+		goto ehca_reg_internal_maxmr_exit0;
+	}
+	e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+
+	/* register internal max-MR on HCA */
+	size_maxmr = (u64)high_memory - PAGE_OFFSET;
+	iova_start = (u64*)KERNELBASE;
+	ib_pbuf.addr = 0;
+	ib_pbuf.size = size_maxmr;
+	num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
+			 PAGE_SIZE - 1) / PAGE_SIZE);
+	num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
+			 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+
+	pginfo.type           = EHCA_MR_PGI_PHYS;
+	pginfo.num_pages      = num_pages_mr;
+	pginfo.num_4k         = num_pages_4k;
+	pginfo.num_phys_buf   = 1;
+	pginfo.phys_buf_array = &ib_pbuf;
+
+	ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
+			  &pginfo, &e_mr->ib.ib_mr.lkey,
+			  &e_mr->ib.ib_mr.rkey);
+	if (ret) {
+		ehca_err(&shca->ib_device, "reg of internal max MR failed, "
+			 "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
+			 "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
+			 num_pages_mr, num_pages_4k);
+		goto ehca_reg_internal_maxmr_exit1;
+	}
+
+	/* successful registration of all pages */
+	e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
+	e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
+	e_mr->ib.ib_mr.uobject = NULL;
+	atomic_inc(&(e_pd->ib_pd.usecnt));
+	atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
+	*e_maxmr = e_mr;
+	return 0;
+
+ehca_reg_internal_maxmr_exit1:
+	ehca_mr_delete(e_mr);
+ehca_reg_internal_maxmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
+			 ret, shca, e_pd, e_maxmr);
+	return ret;
+} /* end ehca_reg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey)
+{
+	u64 h_ret;
+	struct ehca_mr *e_origmr = shca->maxmr;
+	u32 hipz_acl;
+	struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+
+	ehca_mrmw_map_acl(acl, &hipz_acl);
+	ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+
+	h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+				    (u64)iova_start, hipz_acl, e_pd->fw_pd,
+				    &hipzout);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+			 "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+			 h_ret, e_origmr, shca->ipz_hca_handle.handle,
+			 e_origmr->ipz_mr_handle.handle,
+			 e_origmr->ib.ib_mr.lkey);
+		return ehca_mrmw_map_hrc_reg_smr(h_ret);
+	}
+	/* successful registration */
+	e_newmr->num_pages     = e_origmr->num_pages;
+	e_newmr->num_4k        = e_origmr->num_4k;
+	e_newmr->start         = iova_start;
+	e_newmr->size          = e_origmr->size;
+	e_newmr->acl           = acl;
+	e_newmr->ipz_mr_handle = hipzout.handle;
+	*lkey = hipzout.lkey;
+	*rkey = hipzout.rkey;
+	return 0;
+} /* end ehca_reg_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
+{
+	int ret;
+	struct ehca_mr *e_maxmr;
+	struct ib_pd *ib_pd;
+
+	if (!shca->maxmr) {
+		ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
+		ret = -EINVAL;
+		goto ehca_dereg_internal_maxmr_exit0;
+	}
+
+	e_maxmr = shca->maxmr;
+	ib_pd = e_maxmr->ib.ib_mr.pd;
+	shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
+
+	ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
+	if (ret) {
+		ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
+			 "ret=%x e_maxmr=%p shca=%p lkey=%x",
+			 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
+		shca->maxmr = e_maxmr;
+		goto ehca_dereg_internal_maxmr_exit0;
+	}
+
+	atomic_dec(&ib_pd->usecnt);
+
+ehca_dereg_internal_maxmr_exit0:
+	if (ret)
+		ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
+			 ret, shca, shca->maxmr);
+	return ret;
+} /* end ehca_dereg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check physical buffer array of MR verbs for validness and
+ * calculates MR size
+ */
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size)
+{
+	struct ib_phys_buf *pbuf = phys_buf_array;
+	u64 size_count = 0;
+	u32 i;
+
+	if (num_phys_buf == 0) {
+		ehca_gen_err("bad phys buf array len, num_phys_buf=0");
+		return -EINVAL;
+	}
+	/* check first buffer */
+	if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
+		ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
+			     "pbuf->addr=%lx pbuf->size=%lx",
+			     iova_start, pbuf->addr, pbuf->size);
+		return -EINVAL;
+	}
+	if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
+	    (num_phys_buf > 1)) {
+		ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
+			     "pbuf->size=%lx", pbuf->addr, pbuf->size);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < num_phys_buf; i++) {
+		if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
+			ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
+				     "pbuf->size=%lx",
+				     i, pbuf->addr, pbuf->size);
+			return -EINVAL;
+		}
+		if (((i > 0) &&	/* not 1st */
+		     (i < (num_phys_buf - 1)) &&	/* not last */
+		     (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
+			ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+				     i, pbuf->size);
+			return -EINVAL;
+		}
+		size_count += pbuf->size;
+		pbuf++;
+	}
+
+	*size = size_count;
+	return 0;
+} /* end ehca_mr_chk_buf_and_calc_size() */
+
+/*----------------------------------------------------------------------*/
+
+/* check page list of map FMR verb for validness */
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len)
+{
+	u32 i;
+	u64 *page;
+
+	if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
+		ehca_gen_err("bad list_len, list_len=%x "
+			     "e_fmr->fmr_max_pages=%x fmr=%p",
+			     list_len, e_fmr->fmr_max_pages, e_fmr);
+		return -EINVAL;
+	}
+
+	/* each page must be aligned */
+	page = page_list;
+	for (i = 0; i < list_len; i++) {
+		if (*page % e_fmr->fmr_page_size) {
+			ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+				     "fmr_page_size=%x", i, *page, page, e_fmr,
+				     e_fmr->fmr_page_size);
+			return -EINVAL;
+		}
+		page++;
+	}
+
+	return 0;
+} /* end ehca_fmr_check_page_list() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage)
+{
+	int ret = 0;
+	struct ib_umem_chunk *prev_chunk;
+	struct ib_umem_chunk *chunk;
+	struct ib_phys_buf *pbuf;
+	u64 *fmrlist;
+	u64 num4k, pgaddr, offs4k;
+	u32 i = 0;
+	u32 j = 0;
+
+	if (pginfo->type == EHCA_MR_PGI_PHYS) {
+		/* loop over desired phys_buf_array entries */
+		while (i < number) {
+			pbuf   = pginfo->phys_buf_array + pginfo->next_buf;
+			num4k  = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
+				  EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+			offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+			while (pginfo->next_4k < offs4k + num4k) {
+				/* sanity check */
+				if ((pginfo->page_cnt >= pginfo->num_pages) ||
+				    (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+					ehca_gen_err("page_cnt >= num_pages, "
+						     "page_cnt=%lx "
+						     "num_pages=%lx "
+						     "page_4k_cnt=%lx "
+						     "num_4k=%lx i=%x",
+						     pginfo->page_cnt,
+						     pginfo->num_pages,
+						     pginfo->page_4k_cnt,
+						     pginfo->num_4k, i);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				*kpage = phys_to_abs(
+					(pbuf->addr & EHCA_PAGEMASK)
+					+ (pginfo->next_4k * EHCA_PAGESIZE));
+				if ( !(*kpage) && pbuf->addr ) {
+					ehca_gen_err("pbuf->addr=%lx "
+						     "pbuf->size=%lx "
+						     "next_4k=%lx", pbuf->addr,
+						     pbuf->size,
+						     pginfo->next_4k);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				(pginfo->page_4k_cnt)++;
+				(pginfo->next_4k)++;
+				if (pginfo->next_4k %
+				    (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+					(pginfo->page_cnt)++;
+				kpage++;
+				i++;
+				if (i >= number) break;
+			}
+			if (pginfo->next_4k >= offs4k + num4k) {
+				(pginfo->next_buf)++;
+				pginfo->next_4k = 0;
+			}
+		}
+	} else if (pginfo->type == EHCA_MR_PGI_USER) {
+		/* loop over desired chunk entries */
+		chunk      = pginfo->next_chunk;
+		prev_chunk = pginfo->next_chunk;
+		list_for_each_entry_continue(chunk,
+					     (&(pginfo->region->chunk_list)),
+					     list) {
+			for (i = pginfo->next_nmap; i < chunk->nmap; ) {
+				pgaddr = ( page_to_pfn(chunk->page_list[i].page)
+					   << PAGE_SHIFT );
+				*kpage = phys_to_abs(pgaddr +
+						     (pginfo->next_4k *
+						      EHCA_PAGESIZE));
+				if ( !(*kpage) ) {
+					ehca_gen_err("pgaddr=%lx "
+						     "chunk->page_list[i]=%lx "
+						     "i=%x next_4k=%lx mr=%p",
+						     pgaddr,
+						     (u64)sg_dma_address(
+							     &chunk->
+							     page_list[i]),
+						     i, pginfo->next_4k, e_mr);
+					ret = -EFAULT;
+					goto ehca_set_pagebuf_exit0;
+				}
+				(pginfo->page_4k_cnt)++;
+				(pginfo->next_4k)++;
+				kpage++;
+				if (pginfo->next_4k %
+				    (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+					(pginfo->page_cnt)++;
+					(pginfo->next_nmap)++;
+					pginfo->next_4k = 0;
+					i++;
+				}
+				j++;
+				if (j >= number) break;
+			}
+			if ((pginfo->next_nmap >= chunk->nmap) &&
+			    (j >= number)) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+				break;
+			} else if (pginfo->next_nmap >= chunk->nmap) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+			} else if (j >= number)
+				break;
+			else
+				prev_chunk = chunk;
+		}
+		pginfo->next_chunk =
+			list_prepare_entry(prev_chunk,
+					   (&(pginfo->region->chunk_list)),
+					   list);
+	} else if (pginfo->type == EHCA_MR_PGI_FMR) {
+		/* loop over desired page_list entries */
+		fmrlist = pginfo->page_list + pginfo->next_listelem;
+		for (i = 0; i < number; i++) {
+			*kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+					     pginfo->next_4k * EHCA_PAGESIZE);
+			if ( !(*kpage) ) {
+				ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+					     "next_listelem=%lx next_4k=%lx",
+					     *fmrlist, fmrlist,
+					     pginfo->next_listelem,
+					     pginfo->next_4k);
+				ret = -EFAULT;
+				goto ehca_set_pagebuf_exit0;
+			}
+			(pginfo->page_4k_cnt)++;
+			(pginfo->next_4k)++;
+			kpage++;
+			if (pginfo->next_4k %
+			    (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+				(pginfo->page_cnt)++;
+				(pginfo->next_listelem)++;
+				fmrlist++;
+				pginfo->next_4k = 0;
+			}
+		}
+	} else {
+		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+		ret = -EFAULT;
+		goto ehca_set_pagebuf_exit0;
+	}
+
+ehca_set_pagebuf_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+			     "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
+			     "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
+			     "next_listelem=%lx region=%p next_chunk=%p "
+			     "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
+			     pginfo->num_pages, pginfo->num_4k,
+			     pginfo->next_buf, pginfo->next_4k, number, kpage,
+			     pginfo->page_cnt, pginfo->page_4k_cnt, i,
+			     pginfo->next_listelem, pginfo->region,
+			     pginfo->next_chunk, pginfo->next_nmap);
+	return ret;
+} /* end ehca_set_pagebuf() */
+
+/*----------------------------------------------------------------------*/
+
+/* setup 1 page from page info page buffer */
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage)
+{
+	int ret = 0;
+	struct ib_phys_buf *tmp_pbuf;
+	u64 *fmrlist;
+	struct ib_umem_chunk *chunk;
+	struct ib_umem_chunk *prev_chunk;
+	u64 pgaddr, num4k, offs4k;
+
+	if (pginfo->type == EHCA_MR_PGI_PHYS) {
+		/* sanity check */
+		if ((pginfo->page_cnt >= pginfo->num_pages) ||
+		    (pginfo->page_4k_cnt >= pginfo->num_4k)) {
+			ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
+				     "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
+				     pginfo->page_cnt, pginfo->num_pages,
+				     pginfo->page_4k_cnt, pginfo->num_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
+		num4k  = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
+			  EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
+		offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
+		*rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
+				     (pginfo->next_4k * EHCA_PAGESIZE));
+		if ( !(*rpage) && tmp_pbuf->addr ) {
+			ehca_gen_err("tmp_pbuf->addr=%lx"
+				     " tmp_pbuf->size=%lx next_4k=%lx",
+				     tmp_pbuf->addr, tmp_pbuf->size,
+				     pginfo->next_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		(pginfo->page_4k_cnt)++;
+		(pginfo->next_4k)++;
+		if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
+			(pginfo->page_cnt)++;
+		if (pginfo->next_4k >= offs4k + num4k) {
+			(pginfo->next_buf)++;
+			pginfo->next_4k = 0;
+		}
+	} else if (pginfo->type == EHCA_MR_PGI_USER) {
+		chunk      = pginfo->next_chunk;
+		prev_chunk = pginfo->next_chunk;
+		list_for_each_entry_continue(chunk,
+					     (&(pginfo->region->chunk_list)),
+					     list) {
+			pgaddr = ( page_to_pfn(chunk->page_list[
+						       pginfo->next_nmap].page)
+				   << PAGE_SHIFT);
+			*rpage = phys_to_abs(pgaddr +
+					     (pginfo->next_4k * EHCA_PAGESIZE));
+			if ( !(*rpage) ) {
+				ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
+					     " next_nmap=%lx next_4k=%lx mr=%p",
+					     pgaddr, (u64)sg_dma_address(
+						     &chunk->page_list[
+							     pginfo->
+							     next_nmap]),
+					     pginfo->next_nmap, pginfo->next_4k,
+					     e_mr);
+				ret = -EFAULT;
+				goto ehca_set_pagebuf_1_exit0;
+			}
+			(pginfo->page_4k_cnt)++;
+			(pginfo->next_4k)++;
+			if (pginfo->next_4k %
+			    (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
+				(pginfo->page_cnt)++;
+				(pginfo->next_nmap)++;
+				pginfo->next_4k = 0;
+			}
+			if (pginfo->next_nmap >= chunk->nmap) {
+				pginfo->next_nmap = 0;
+				prev_chunk = chunk;
+			}
+			break;
+		}
+		pginfo->next_chunk =
+			list_prepare_entry(prev_chunk,
+					   (&(pginfo->region->chunk_list)),
+					   list);
+	} else if (pginfo->type == EHCA_MR_PGI_FMR) {
+		fmrlist = pginfo->page_list + pginfo->next_listelem;
+		*rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
+				     pginfo->next_4k * EHCA_PAGESIZE);
+		if ( !(*rpage) ) {
+			ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+				     "next_listelem=%lx next_4k=%lx",
+				     *fmrlist, fmrlist, pginfo->next_listelem,
+				     pginfo->next_4k);
+			ret = -EFAULT;
+			goto ehca_set_pagebuf_1_exit0;
+		}
+		(pginfo->page_4k_cnt)++;
+		(pginfo->next_4k)++;
+		if (pginfo->next_4k %
+		    (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
+			(pginfo->page_cnt)++;
+			(pginfo->next_listelem)++;
+			pginfo->next_4k = 0;
+		}
+	} else {
+		ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+		ret = -EFAULT;
+		goto ehca_set_pagebuf_1_exit0;
+	}
+
+ehca_set_pagebuf_1_exit0:
+	if (ret)
+		ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
+			     "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
+			     "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
+			     "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
+			     pginfo, pginfo->type, pginfo->num_pages,
+			     pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
+			     rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
+			     pginfo->next_listelem, pginfo->region,
+			     pginfo->next_chunk, pginfo->next_nmap);
+	return ret;
+} /* end ehca_set_pagebuf_1() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check MR if it is a max-MR, i.e. uses whole memory
+ * in case it's a max-MR 1 is returned, else 0
+ */
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start)
+{
+	/* a MR is treated as max-MR only if it fits following: */
+	if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
+	    (iova_start == (void*)KERNELBASE)) {
+		ehca_gen_dbg("this is a max-MR");
+		return 1;
+	} else
+		return 0;
+} /* end ehca_mr_is_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/* map access control for MR/MW. This routine is used for MR and MW. */
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl)
+{
+	*hipz_acl = 0;
+	if (ib_acl & IB_ACCESS_REMOTE_READ)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
+	if (ib_acl & IB_ACCESS_REMOTE_WRITE)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
+	if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
+		*hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
+	if (ib_acl & IB_ACCESS_LOCAL_WRITE)
+		*hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
+	if (ib_acl & IB_ACCESS_MW_BIND)
+		*hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
+} /* end ehca_mrmw_map_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/* sets page size in hipz access control for MR/MW. */
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
+{
+	return; /* HCA supports only 4k */
+} /* end ehca_mrmw_set_pgsize_hipz_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * reverse map access control for MR/MW.
+ * This routine is used for MR and MW.
+ */
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl) /*OUT*/
+{
+	*ib_acl = 0;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
+		*ib_acl |= IB_ACCESS_REMOTE_READ;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
+		*ib_acl |= IB_ACCESS_REMOTE_WRITE;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
+		*ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
+		*ib_acl |= IB_ACCESS_LOCAL_WRITE;
+	if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
+		*ib_acl |= IB_ACCESS_MW_BIND;
+} /* end ehca_mrmw_reverse_map_acl() */
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR/MW allocations
+ * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
+ */
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RT_PARM:              /* invalid resource type */
+	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+	case H_MLENGTH_PARM:         /* invalid memory length */
+	case H_MEM_ACCESS_PARM:      /* invalid access controls */
+	case H_CONSTRAINED:          /* resource constraint */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_alloc() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering last page
+ */
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:         /* registration complete */
+		return 0;
+	case H_PAGE_REGISTERED:	/* page registered */
+	case H_ADAPTER_PARM:    /* invalid adapter handle */
+	case H_RH_PARM:         /* invalid resource handle */
+/*	case H_QT_PARM:            invalid queue type */
+	case H_PARAMETER:       /*
+				 * invalid logical address,
+				 * or count zero or greater 512
+				 */
+	case H_TABLE_FULL:      /* page table full */
+	case H_HARDWARE:        /* HCA not operational */
+		return -EINVAL;
+	case H_BUSY:            /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_rrpg_last() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for MR register rpage
+ * Used for hipz_h_register_rpage_mr at registering one page, but not last page
+ */
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_PAGE_REGISTERED:	/* page registered */
+		return 0;
+	case H_SUCCESS:         /* registration complete */
+	case H_ADAPTER_PARM:    /* invalid adapter handle */
+	case H_RH_PARM:         /* invalid resource handle */
+/*	case H_QT_PARM:            invalid queue type */
+	case H_PARAMETER:       /*
+				 * invalid logical address,
+				 * or count zero or greater 512
+				 */
+	case H_TABLE_FULL:      /* page table full */
+	case H_HARDWARE:        /* HCA not operational */
+		return -EINVAL;
+	case H_BUSY:            /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
+
+/*----------------------------------------------------------------------*/
+
+/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RH_PARM:              /* invalid resource handle */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_query_mr() */
+
+/*----------------------------------------------------------------------*/
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MR resource
+ * Used for hipz_h_free_resource_mr
+ */
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:      /* resource freed */
+		return 0;
+	case H_ADAPTER_PARM: /* invalid adapter handle */
+	case H_RH_PARM:      /* invalid resource handle */
+	case H_R_STATE:      /* invalid resource state */
+	case H_HARDWARE:     /* HCA not operational */
+		return -EINVAL;
+	case H_RESOURCE:     /* Resource in use */
+	case H_BUSY:         /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_free_mr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for freeing MW resource
+ * Used for hipz_h_free_resource_mw
+ */
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	     /* resource freed */
+		return 0;
+	case H_ADAPTER_PARM: /* invalid adapter handle */
+	case H_RH_PARM:      /* invalid resource handle */
+	case H_R_STATE:      /* invalid resource state */
+	case H_HARDWARE:     /* HCA not operational */
+		return -EINVAL;
+	case H_RESOURCE:     /* Resource in use */
+	case H_BUSY:         /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_free_mw() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * map HIPZ rc to IB retcodes for SMR registrations
+ * Used for hipz_h_register_smr.
+ */
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
+{
+	switch (hipz_rc) {
+	case H_SUCCESS:	             /* successful completion */
+		return 0;
+	case H_ADAPTER_PARM:         /* invalid adapter handle */
+	case H_RH_PARM:              /* invalid resource handle */
+	case H_MEM_PARM:             /* invalid MR virtual address */
+	case H_MEM_ACCESS_PARM:      /* invalid access controls */
+	case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+		return -EINVAL;
+	case H_BUSY:                 /* long busy */
+		return -EBUSY;
+	default:
+		return -EINVAL;
+	}
+} /* end ehca_mrmw_map_hrc_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * MR destructor and constructor
+ * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
+ * except struct ib_mr and spinlock
+ */
+void ehca_mr_deletenew(struct ehca_mr *mr)
+{
+	mr->flags         = 0;
+	mr->num_pages     = 0;
+	mr->num_4k        = 0;
+	mr->acl           = 0;
+	mr->start         = NULL;
+	mr->fmr_page_size = 0;
+	mr->fmr_max_pages = 0;
+	mr->fmr_max_maps  = 0;
+	mr->fmr_map_cnt   = 0;
+	memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
+	memset(&mr->galpas, 0, sizeof(mr->galpas));
+	mr->nr_of_pages   = 0;
+	mr->pagearray     = NULL;
+} /* end ehca_mr_deletenew() */
+
+int ehca_init_mrmw_cache(void)
+{
+	mr_cache = kmem_cache_create("ehca_cache_mr",
+				     sizeof(struct ehca_mr), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!mr_cache)
+		return -ENOMEM;
+	mw_cache = kmem_cache_create("ehca_cache_mw",
+				     sizeof(struct ehca_mw), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!mw_cache) {
+		kmem_cache_destroy(mr_cache);
+		mr_cache = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void ehca_cleanup_mrmw_cache(void)
+{
+	if (mr_cache)
+		kmem_cache_destroy(mr_cache);
+	if (mw_cache)
+		kmem_cache_destroy(mw_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
new file mode 100644
index 0000000..d936e40
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -0,0 +1,140 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  MR/MW declarations and inline functions
+ *
+ *  Authors: Dietmar Decker <ddecker@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EHCA_MRMW_H_
+#define _EHCA_MRMW_H_
+
+int ehca_reg_mr(struct ehca_shca *shca,
+		struct ehca_mr *e_mr,
+		u64 *iova_start,
+		u64 size,
+		int acl,
+		struct ehca_pd *e_pd,
+		struct ehca_mr_pginfo *pginfo,
+		u32 *lkey,
+		u32 *rkey);
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+		       struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo);
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+		  struct ehca_mr *e_mr,
+		  u64 *iova_start,
+		  u64 size,
+		  int mr_access_flags,
+		  struct ehca_pd *e_pd,
+		  struct ehca_mr_pginfo *pginfo,
+		  u32 *lkey,
+		  u32 *rkey);
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+		       struct ehca_mr *e_fmr);
+
+int ehca_reg_smr(struct ehca_shca *shca,
+		 struct ehca_mr *e_origmr,
+		 struct ehca_mr *e_newmr,
+		 u64 *iova_start,
+		 int acl,
+		 struct ehca_pd *e_pd,
+		 u32 *lkey,
+		 u32 *rkey);
+
+int ehca_reg_internal_maxmr(struct ehca_shca *shca,
+			    struct ehca_pd *e_pd,
+			    struct ehca_mr **maxmr);
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+		   struct ehca_mr *e_newmr,
+		   u64 *iova_start,
+		   int acl,
+		   struct ehca_pd *e_pd,
+		   u32 *lkey,
+		   u32 *rkey);
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
+
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+				  int num_phys_buf,
+				  u64 *iova_start,
+				  u64 *size);
+
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+			     u64 *page_list,
+			     int list_len);
+
+int ehca_set_pagebuf(struct ehca_mr *e_mr,
+		     struct ehca_mr_pginfo *pginfo,
+		     u32 number,
+		     u64 *kpage);
+
+int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
+		       struct ehca_mr_pginfo *pginfo,
+		       u64 *rpage);
+
+int ehca_mr_is_maxmr(u64 size,
+		     u64 *iova_start);
+
+void ehca_mrmw_map_acl(int ib_acl,
+		       u32 *hipz_acl);
+
+void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+			       int *ib_acl);
+
+int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
+
+int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
+
+void ehca_mr_deletenew(struct ehca_mr *mr);
+
+#endif  /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
new file mode 100644
index 0000000..2c3cdc6
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -0,0 +1,114 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  PD functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+
+static struct kmem_cache *pd_cache;
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+			    struct ib_ucontext *context, struct ib_udata *udata)
+{
+	struct ehca_pd *pd;
+
+	pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
+	if (!pd) {
+		ehca_err(device, "device=%p context=%p out of memory",
+			 device, context);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(pd, 0, sizeof(struct ehca_pd));
+	pd->ownpid = current->tgid;
+
+	/*
+	 * Kernel PD: when device = -1, 0
+	 * User   PD: when context != -1
+	 */
+	if (!context) {
+		/*
+		 * Kernel PDs after init reuses always
+		 * the one created in ehca_shca_reopen()
+		 */
+		struct ehca_shca *shca = container_of(device, struct ehca_shca,
+						      ib_device);
+		pd->fw_pd.value = shca->pd->fw_pd.value;
+	} else
+		pd->fw_pd.value = (u64)pd;
+
+	return &pd->ib_pd;
+}
+
+int ehca_dealloc_pd(struct ib_pd *pd)
+{
+	u32 cur_pid = current->tgid;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	kmem_cache_free(pd_cache,
+			container_of(pd, struct ehca_pd, ib_pd));
+
+	return 0;
+}
+
+int ehca_init_pd_cache(void)
+{
+	pd_cache = kmem_cache_create("ehca_cache_pd",
+				     sizeof(struct ehca_pd), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!pd_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_pd_cache(void)
+{
+	if (pd_cache)
+		kmem_cache_destroy(pd_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
new file mode 100644
index 0000000..8707d29
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -0,0 +1,259 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Hardware request structures
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _EHCA_QES_H_
+#define _EHCA_QES_H_
+
+#include "ehca_tools.h"
+
+/* virtual scatter gather entry to specify remote adresses with length */
+struct ehca_vsgentry {
+	u64 vaddr;
+	u32 lkey;
+	u32 length;
+};
+
+#define GRH_FLAG_MASK        EHCA_BMASK_IBM(7,7)
+#define GRH_IPVERSION_MASK   EHCA_BMASK_IBM(0,3)
+#define GRH_TCLASS_MASK      EHCA_BMASK_IBM(4,12)
+#define GRH_FLOWLABEL_MASK   EHCA_BMASK_IBM(13,31)
+#define GRH_PAYLEN_MASK      EHCA_BMASK_IBM(32,47)
+#define GRH_NEXTHEADER_MASK  EHCA_BMASK_IBM(48,55)
+#define GRH_HOPLIMIT_MASK    EHCA_BMASK_IBM(56,63)
+
+/*
+ * Unreliable Datagram Address Vector Format
+ * see IBTA Vol1 chapter 8.3 Global Routing Header
+ */
+struct ehca_ud_av {
+	u8 sl;
+	u8 lnh;
+	u16 dlid;
+	u8 reserved1;
+	u8 reserved2;
+	u8 reserved3;
+	u8 slid_path_bits;
+	u8 reserved4;
+	u8 ipd;
+	u8 reserved5;
+	u8 pmtu;
+	u32 reserved6;
+	u64 reserved7;
+	union {
+		struct {
+			u64 word_0; /* always set to 6  */
+			/*should be 0x1B for IB transport */
+			u64 word_1;
+			u64 word_2;
+			u64 word_3;
+			u64 word_4;
+		} grh;
+		struct {
+			u32 wd_0;
+			u32 wd_1;
+			/* DWord_1 --> SGID */
+
+			u32 sgid_wd3;
+			u32 sgid_wd2;
+
+			u32 sgid_wd1;
+			u32 sgid_wd0;
+			/* DWord_3 --> DGID */
+
+			u32 dgid_wd3;
+			u32 dgid_wd2;
+
+			u32 dgid_wd1;
+			u32 dgid_wd0;
+		} grh_l;
+	};
+};
+
+/* maximum number of sg entries allowed in a WQE */
+#define MAX_WQE_SG_ENTRIES 252
+
+#define WQE_OPTYPE_SEND             0x80
+#define WQE_OPTYPE_RDMAREAD         0x40
+#define WQE_OPTYPE_RDMAWRITE        0x20
+#define WQE_OPTYPE_CMPSWAP          0x10
+#define WQE_OPTYPE_FETCHADD         0x08
+#define WQE_OPTYPE_BIND             0x04
+
+#define WQE_WRFLAG_REQ_SIGNAL_COM   0x80
+#define WQE_WRFLAG_FENCE            0x40
+#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
+#define WQE_WRFLAG_SOLIC_EVENT      0x10
+
+#define WQEF_CACHE_HINT             0x80
+#define WQEF_CACHE_HINT_RD_WR       0x40
+#define WQEF_TIMED_WQE              0x20
+#define WQEF_PURGE                  0x08
+#define WQEF_HIGH_NIBBLE            0xF0
+
+#define MW_BIND_ACCESSCTRL_R_WRITE   0x40
+#define MW_BIND_ACCESSCTRL_R_READ    0x20
+#define MW_BIND_ACCESSCTRL_R_ATOMIC  0x10
+
+struct ehca_wqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 wr_flag;
+	u16 pkeyi;
+	u8 wqef;
+	u8 nr_of_data_seg;
+	u16 wqe_provided_slid;
+	u32 destination_qp_number;
+	u32 resync_psn_sqp;
+	u32 local_ee_context_qkey;
+	u32 immediate_data;
+	union {
+		struct {
+			u64 remote_virtual_adress;
+			u32 rkey;
+			u32 reserved;
+			u64 atomic_1st_op_dma_len;
+			u64 atomic_2nd_op;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+
+		} nud;
+		struct {
+			u64 ehca_ud_av_ptr;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} ud_avp;
+		struct {
+			struct ehca_ud_av ud_av;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
+						     2];
+		} ud_av;
+		struct {
+			u64 reserved0;
+			u64 reserved1;
+			u64 reserved2;
+			u64 reserved3;
+			struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+		} all_rcv;
+
+		struct {
+			u64 reserved;
+			u32 rkey;
+			u32 old_rkey;
+			u64 reserved1;
+			u64 reserved2;
+			u64 virtual_address;
+			u32 reserved3;
+			u32 length;
+			u32 reserved4;
+			u16 reserved5;
+			u8 reserved6;
+			u8 lr_ctl;
+			u32 lkey;
+			u32 reserved7;
+			u64 reserved8;
+			u64 reserved9;
+			u64 reserved10;
+			u64 reserved11;
+		} bind;
+		struct {
+			u64 reserved12;
+			u64 reserved13;
+			u32 size;
+			u32 start;
+		} inline_data;
+	} u;
+
+};
+
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
+#define WC_IMM_DATA     EHCA_BMASK_IBM(1,1)
+#define WC_GRH_PRESENT  EHCA_BMASK_IBM(2,2)
+#define WC_SE_BIT       EHCA_BMASK_IBM(3,3)
+#define WC_STATUS_ERROR_BIT 0x80000000
+#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
+#define WC_STATUS_PURGE_BIT 0x10
+
+struct ehca_cqe {
+	u64 work_request_id;
+	u8 optype;
+	u8 w_completion_flags;
+	u16 reserved1;
+	u32 nr_bytes_transferred;
+	u32 immediate_data;
+	u32 local_qp_number;
+	u8 freed_resource_count;
+	u8 service_level;
+	u16 wqe_count;
+	u32 qp_token;
+	u32 qkey_ee_token;
+	u32 remote_qp_number;
+	u16 dlid;
+	u16 rlid;
+	u16 reserved2;
+	u16 pkey_index;
+	u32 cqe_timestamp;
+	u32 wqe_timestamp;
+	u8 wqe_timestamp_valid;
+	u8 reserved3;
+	u8 reserved4;
+	u8 cqe_flags;
+	u32 status;
+};
+
+struct ehca_eqe {
+	u64 entry;
+};
+
+struct ehca_mrte {
+	u64 starting_va;
+	u64 length; /* length of memory region in bytes*/
+	u32 pd;
+	u8 key_instance;
+	u8 pagesize;
+	u8 mr_control;
+	u8 local_remote_access_ctrl;
+	u8 reserved[0x20 - 0x18];
+	u64 at_pointer[4];
+};
+#endif /*_EHCA_QES_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
new file mode 100644
index 0000000..4394123
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -0,0 +1,1507 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  QP functions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static struct kmem_cache *qp_cache;
+
+/*
+ * attributes not supported by query qp
+ */
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
+				     IB_QP_MAX_QP_RD_ATOMIC   | \
+				     IB_QP_ACCESS_FLAGS       | \
+				     IB_QP_EN_SQD_ASYNC_NOTIFY)
+
+/*
+ * ehca (internal) qp state values
+ */
+enum ehca_qp_state {
+	EHCA_QPS_RESET = 1,
+	EHCA_QPS_INIT = 2,
+	EHCA_QPS_RTR = 3,
+	EHCA_QPS_RTS = 5,
+	EHCA_QPS_SQD = 6,
+	EHCA_QPS_SQE = 8,
+	EHCA_QPS_ERR = 128
+};
+
+/*
+ * qp state transitions as defined by IB Arch Rel 1.1 page 431
+ */
+enum ib_qp_statetrans {
+	IB_QPST_ANY2RESET,
+	IB_QPST_ANY2ERR,
+	IB_QPST_RESET2INIT,
+	IB_QPST_INIT2RTR,
+	IB_QPST_INIT2INIT,
+	IB_QPST_RTR2RTS,
+	IB_QPST_RTS2SQD,
+	IB_QPST_RTS2RTS,
+	IB_QPST_SQD2RTS,
+	IB_QPST_SQE2RTS,
+	IB_QPST_SQD2SQD,
+	IB_QPST_MAX	/* nr of transitions, this must be last!!! */
+};
+
+/*
+ * ib2ehca_qp_state maps IB to ehca qp_state
+ * returns ehca qp state corresponding to given ib qp state
+ */
+static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
+{
+	switch (ib_qp_state) {
+	case IB_QPS_RESET:
+		return EHCA_QPS_RESET;
+	case IB_QPS_INIT:
+		return EHCA_QPS_INIT;
+	case IB_QPS_RTR:
+		return EHCA_QPS_RTR;
+	case IB_QPS_RTS:
+		return EHCA_QPS_RTS;
+	case IB_QPS_SQD:
+		return EHCA_QPS_SQD;
+	case IB_QPS_SQE:
+		return EHCA_QPS_SQE;
+	case IB_QPS_ERR:
+		return EHCA_QPS_ERR;
+	default:
+		ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
+		return -EINVAL;
+	}
+}
+
+/*
+ * ehca2ib_qp_state maps ehca to IB qp_state
+ * returns ib qp state corresponding to given ehca qp state
+ */
+static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
+						ehca_qp_state)
+{
+	switch (ehca_qp_state) {
+	case EHCA_QPS_RESET:
+		return IB_QPS_RESET;
+	case EHCA_QPS_INIT:
+		return IB_QPS_INIT;
+	case EHCA_QPS_RTR:
+		return IB_QPS_RTR;
+	case EHCA_QPS_RTS:
+		return IB_QPS_RTS;
+	case EHCA_QPS_SQD:
+		return IB_QPS_SQD;
+	case EHCA_QPS_SQE:
+		return IB_QPS_SQE;
+	case EHCA_QPS_ERR:
+		return IB_QPS_ERR;
+	default:
+		ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
+		return -EINVAL;
+	}
+}
+
+/*
+ * ehca_qp_type used as index for req_attr and opt_attr of
+ * struct ehca_modqp_statetrans
+ */
+enum ehca_qp_type {
+	QPT_RC = 0,
+	QPT_UC = 1,
+	QPT_UD = 2,
+	QPT_SQP = 3,
+	QPT_MAX
+};
+
+/*
+ * ib2ehcaqptype maps Ib to ehca qp_type
+ * returns ehca qp type corresponding to ib qp type
+ */
+static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
+{
+	switch (ibqptype) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		return QPT_SQP;
+	case IB_QPT_RC:
+		return QPT_RC;
+	case IB_QPT_UC:
+		return QPT_UC;
+	case IB_QPT_UD:
+		return QPT_UD;
+	default:
+		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+		return -EINVAL;
+	}
+}
+
+static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
+							 int ib_tostate)
+{
+	int index = -EINVAL;
+	switch (ib_tostate) {
+	case IB_QPS_RESET:
+		index = IB_QPST_ANY2RESET;
+		break;
+	case IB_QPS_INIT:
+		switch (ib_fromstate) {
+		case IB_QPS_RESET:
+			index = IB_QPST_RESET2INIT;
+			break;
+		case IB_QPS_INIT:
+			index = IB_QPST_INIT2INIT;
+			break;
+		}
+		break;
+	case IB_QPS_RTR:
+		if (ib_fromstate == IB_QPS_INIT)
+			index = IB_QPST_INIT2RTR;
+		break;
+	case IB_QPS_RTS:
+		switch (ib_fromstate) {
+		case IB_QPS_RTR:
+			index = IB_QPST_RTR2RTS;
+			break;
+		case IB_QPS_RTS:
+			index = IB_QPST_RTS2RTS;
+			break;
+		case IB_QPS_SQD:
+			index = IB_QPST_SQD2RTS;
+			break;
+		case IB_QPS_SQE:
+			index = IB_QPST_SQE2RTS;
+			break;
+		}
+		break;
+	case IB_QPS_SQD:
+		if (ib_fromstate == IB_QPS_RTS)
+			index = IB_QPST_RTS2SQD;
+		break;
+	case IB_QPS_SQE:
+		break;
+	case IB_QPS_ERR:
+		index = IB_QPST_ANY2ERR;
+		break;
+	default:
+		break;
+	}
+	return index;
+}
+
+enum ehca_service_type {
+	ST_RC = 0,
+	ST_UC = 1,
+	ST_RD = 2,
+	ST_UD = 3
+};
+
+/*
+ * ibqptype2servicetype returns hcp service type corresponding to given
+ * ib qp type used by create_qp()
+ */
+static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
+{
+	switch (ibqptype) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		return ST_UD;
+	case IB_QPT_RC:
+		return ST_RC;
+	case IB_QPT_UC:
+		return ST_UC;
+	case IB_QPT_UD:
+		return ST_UD;
+	case IB_QPT_RAW_IPV6:
+		return -EINVAL;
+	case IB_QPT_RAW_ETY:
+		return -EINVAL;
+	default:
+		ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+		return -EINVAL;
+	}
+}
+
+/*
+ * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queues(struct ehca_shca *shca,
+				 struct ehca_qp *my_qp,
+				 int nr_sq_pages,
+				 int nr_rq_pages,
+				 int swqe_size,
+				 int rwqe_size,
+				 int nr_send_sges, int nr_receive_sges)
+{
+	int ret, cnt, ipz_rc;
+	void *vpage;
+	u64 rpage, h_ret;
+	struct ib_device *ib_dev = &shca->ib_device;
+	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
+
+	ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
+				nr_sq_pages,
+				EHCA_PAGESIZE, swqe_size, nr_send_sges);
+	if (!ipz_rc) {
+		ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
+			 ipz_rc);
+		return -EBUSY;
+	}
+
+	ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
+				nr_rq_pages,
+				EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
+	if (!ipz_rc) {
+		ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
+			 ipz_rc);
+		ret = -EBUSY;
+		goto init_qp_queues0;
+	}
+	/* register SQ pages */
+	for (cnt = 0; cnt < nr_sq_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+		if (!vpage) {
+			ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+				 "failed p_vpage= %p", vpage);
+			ret = -EINVAL;
+			goto init_qp_queues1;
+		}
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+						 my_qp->ipz_qp_handle,
+						 &my_qp->pf, 0, 0,
+						 rpage, 1,
+						 my_qp->galpas.kernel);
+		if (h_ret < H_SUCCESS) {
+			ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
+				 " failed rc=%lx", h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto init_qp_queues1;
+		}
+	}
+
+	ipz_qeit_reset(&my_qp->ipz_squeue);
+
+	/* register RQ pages */
+	for (cnt = 0; cnt < nr_rq_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+		if (!vpage) {
+			ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
+				 "failed p_vpage = %p", vpage);
+			ret = -EINVAL;
+			goto init_qp_queues1;
+		}
+
+		rpage = virt_to_abs(vpage);
+
+		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+						 my_qp->ipz_qp_handle,
+						 &my_qp->pf, 0, 1,
+						 rpage, 1,my_qp->galpas.kernel);
+		if (h_ret < H_SUCCESS) {
+			ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
+				 "rc=%lx", h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto init_qp_queues1;
+		}
+		if (cnt == (nr_rq_pages - 1)) {	/* last page! */
+			if (h_ret != H_SUCCESS) {
+				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+					 "h_ret= %lx ", h_ret);
+				ret = ehca2ib_return_code(h_ret);
+				goto init_qp_queues1;
+			}
+			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+			if (vpage) {
+				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
+					 "should not succeed vpage=%p", vpage);
+				ret = -EINVAL;
+				goto init_qp_queues1;
+			}
+		} else {
+			if (h_ret != H_PAGE_REGISTERED) {
+				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+					 "h_ret= %lx ", h_ret);
+				ret = ehca2ib_return_code(h_ret);
+				goto init_qp_queues1;
+			}
+		}
+	}
+
+	ipz_qeit_reset(&my_qp->ipz_rqueue);
+
+	return 0;
+
+init_qp_queues1:
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+init_qp_queues0:
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+	return ret;
+}
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *init_attr,
+			     struct ib_udata *udata)
+{
+	static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
+	static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
+	struct ehca_qp *my_qp;
+	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+	struct ib_ucontext *context = NULL;
+	u64 h_ret;
+	int max_send_sge, max_recv_sge, ret;
+
+	/* h_call's out parameters */
+	struct ehca_alloc_qp_parms parms;
+	u32 swqe_size = 0, rwqe_size = 0;
+	u8 daqp_completion, isdaqp;
+	unsigned long flags;
+
+	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
+		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
+		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
+			 init_attr->sq_sig_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* save daqp completion bits */
+	daqp_completion = init_attr->qp_type & 0x60;
+	/* save daqp bit */
+	isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
+	init_attr->qp_type = init_attr->qp_type & 0x1F;
+
+	if (init_attr->qp_type != IB_QPT_UD &&
+	    init_attr->qp_type != IB_QPT_SMI &&
+	    init_attr->qp_type != IB_QPT_GSI &&
+	    init_attr->qp_type != IB_QPT_UC &&
+	    init_attr->qp_type != IB_QPT_RC) {
+		ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+	if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
+	    && isdaqp) {
+		ehca_err(pd->device, "unsupported LL QP Type=%x",
+			 init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	} else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
+		   (init_attr->cap.max_send_wr > 255 ||
+		    init_attr->cap.max_recv_wr > 255 )) {
+		       ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
+				"or max_rq_wr=%x for QP Type=%x",
+				init_attr->cap.max_send_wr,
+				init_attr->cap.max_recv_wr,init_attr->qp_type);
+		       return ERR_PTR(-EINVAL);
+	} else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
+		  init_attr->cap.max_send_wr > 255) {
+		ehca_err(pd->device,
+			 "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
+			 init_attr->cap.max_send_wr, init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (pd->uobject && udata)
+		context = pd->uobject->context;
+
+	my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
+	if (!my_qp) {
+		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memset(my_qp, 0, sizeof(struct ehca_qp));
+	memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
+	spin_lock_init(&my_qp->spinlock_s);
+	spin_lock_init(&my_qp->spinlock_r);
+
+	my_qp->recv_cq =
+		container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+	my_qp->send_cq =
+		container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
+
+	my_qp->init_attr = *init_attr;
+
+	do {
+		if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
+			ret = -ENOMEM;
+			ehca_err(pd->device, "Can't reserve idr resources.");
+			goto create_qp_exit0;
+		}
+
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+	} while (ret == -EAGAIN);
+
+	if (ret) {
+		ret = -ENOMEM;
+		ehca_err(pd->device, "Can't allocate new idr entry.");
+		goto create_qp_exit0;
+	}
+
+	parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+	if (parms.servicetype < 0) {
+		ret = -EINVAL;
+		ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+		goto create_qp_exit0;
+	}
+
+	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+		parms.sigtype = HCALL_SIGT_EVERY;
+	else
+		parms.sigtype = HCALL_SIGT_BY_WQE;
+
+	/* UD_AV CIRCUMVENTION */
+	max_send_sge = init_attr->cap.max_send_sge;
+	max_recv_sge = init_attr->cap.max_recv_sge;
+	if (IB_QPT_UD == init_attr->qp_type ||
+	    IB_QPT_GSI == init_attr->qp_type ||
+	    IB_QPT_SMI == init_attr->qp_type) {
+		max_send_sge += 2;
+		max_recv_sge += 2;
+	}
+
+	parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
+	parms.daqp_ctrl = isdaqp | daqp_completion;
+	parms.pd = my_pd->fw_pd;
+	parms.max_recv_sge = max_recv_sge;
+	parms.max_send_sge = max_send_sge;
+
+	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
+
+	if (h_ret != H_SUCCESS) {
+		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
+			 h_ret);
+		ret = ehca2ib_return_code(h_ret);
+		goto create_qp_exit1;
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_RC:
+	        if (isdaqp == 0) {
+			swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+					     (parms.act_nr_send_sges)]);
+			rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
+					     (parms.act_nr_recv_sges)]);
+		} else { /* for daqp we need to use msg size, not wqe size */
+		        swqe_size = da_rc_msg_size[max_send_sge];
+			rwqe_size = da_rc_msg_size[max_recv_sge];
+			parms.act_nr_send_sges = 1;
+			parms.act_nr_recv_sges = 1;
+		}
+		break;
+	case IB_QPT_UC:
+		swqe_size = offsetof(struct ehca_wqe,
+				     u.nud.sg_list[parms.act_nr_send_sges]);
+		rwqe_size = offsetof(struct ehca_wqe,
+				     u.nud.sg_list[parms.act_nr_recv_sges]);
+		break;
+
+	case IB_QPT_UD:
+	case IB_QPT_GSI:
+	case IB_QPT_SMI:
+		/* UD circumvention */
+		parms.act_nr_recv_sges -= 2;
+		parms.act_nr_send_sges -= 2;
+		if (isdaqp) {
+		        swqe_size = da_ud_sq_msg_size[max_send_sge];
+			rwqe_size = da_rc_msg_size[max_recv_sge];
+			parms.act_nr_send_sges = 1;
+			parms.act_nr_recv_sges = 1;
+		} else {
+			swqe_size = offsetof(struct ehca_wqe,
+					     u.ud_av.sg_list[parms.act_nr_send_sges]);
+			rwqe_size = offsetof(struct ehca_wqe,
+					     u.ud_av.sg_list[parms.act_nr_recv_sges]);
+		}
+
+		if (IB_QPT_GSI == init_attr->qp_type ||
+		    IB_QPT_SMI == init_attr->qp_type) {
+			parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
+			parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
+			parms.act_nr_send_sges = init_attr->cap.max_send_sge;
+			parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
+			my_qp->real_qp_num =
+				(init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+		}
+
+		break;
+
+	default:
+		break;
+	}
+
+	/* initializes r/squeue and registers queue pages */
+	ret = init_qp_queues(shca, my_qp,
+			     parms.nr_sq_pages, parms.nr_rq_pages,
+			     swqe_size, rwqe_size,
+			     parms.act_nr_send_sges, parms.act_nr_recv_sges);
+	if (ret) {
+		ehca_err(pd->device,
+			 "Couldn't initialize r/squeue and pages ret=%x", ret);
+		goto create_qp_exit2;
+	}
+
+	my_qp->ib_qp.pd = &my_pd->ib_pd;
+	my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+	my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+	my_qp->ib_qp.send_cq = init_attr->send_cq;
+
+	my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+	my_qp->ib_qp.qp_type = init_attr->qp_type;
+
+	my_qp->qp_type = init_attr->qp_type;
+	my_qp->ib_qp.srq = init_attr->srq;
+
+	my_qp->ib_qp.qp_context = init_attr->qp_context;
+	my_qp->ib_qp.event_handler = init_attr->event_handler;
+
+	init_attr->cap.max_inline_data = 0; /* not supported yet */
+	init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
+	init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
+	init_attr->cap.max_send_sge = parms.act_nr_send_sges;
+	init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+
+	/* NOTE: define_apq0() not supported yet */
+	if (init_attr->qp_type == IB_QPT_GSI) {
+		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+		if (h_ret != H_SUCCESS) {
+			ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
+				 h_ret);
+			ret = ehca2ib_return_code(h_ret);
+			goto create_qp_exit3;
+		}
+	}
+	if (init_attr->send_cq) {
+		struct ehca_cq *cq = container_of(init_attr->send_cq,
+						  struct ehca_cq, ib_cq);
+		ret = ehca_cq_assign_qp(cq, my_qp);
+		if (ret) {
+			ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
+				 ret);
+			goto create_qp_exit3;
+		}
+		my_qp->send_cq = cq;
+	}
+	/* copy queues, galpa data to user space */
+	if (context && udata) {
+		struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
+		struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
+		struct ehca_create_qp_resp resp;
+		struct vm_area_struct * vma;
+		memset(&resp, 0, sizeof(resp));
+
+		resp.qp_num = my_qp->real_qp_num;
+		resp.token = my_qp->token;
+		resp.qp_type = my_qp->qp_type;
+		resp.qkey = my_qp->qkey;
+		resp.real_qp_num = my_qp->real_qp_num;
+		/* rqueue properties */
+		resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
+		resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
+		resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
+		resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
+		resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
+				       ipz_rqueue->queue_length,
+				       (void**)&resp.ipz_rqueue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap rqueue pages");
+			goto create_qp_exit3;
+		}
+		my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
+		/* squeue properties */
+		resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
+		resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
+		resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
+		resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
+		resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+		ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
+				       ipz_squeue->queue_length,
+				       (void**)&resp.ipz_squeue.queue,
+				       &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap squeue pages");
+			goto create_qp_exit4;
+		}
+		my_qp->uspace_squeue = resp.ipz_squeue.queue;
+		/* fw_handle */
+		resp.galpas = my_qp->galpas;
+		ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
+					 (void**)&resp.galpas.kernel.fw_handle,
+					 &vma);
+		if (ret) {
+			ehca_err(pd->device, "Could not mmap fw_handle");
+			goto create_qp_exit5;
+		}
+		my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
+
+		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+			ehca_err(pd->device, "Copy to udata failed");
+			ret = -EINVAL;
+			goto create_qp_exit6;
+		}
+	}
+
+	return &my_qp->ib_qp;
+
+create_qp_exit6:
+	ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+
+create_qp_exit5:
+	ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
+
+create_qp_exit4:
+	ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
+
+create_qp_exit3:
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+
+create_qp_exit2:
+	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+
+create_qp_exit1:
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	idr_remove(&ehca_qp_idr, my_qp->token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+create_qp_exit0:
+	kmem_cache_free(qp_cache, my_qp);
+	return ERR_PTR(ret);
+}
+
+/*
+ * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
+ * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
+ * returns total number of bad wqes in bad_wqe_cnt
+ */
+static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
+			   int *bad_wqe_cnt)
+{
+	u64 h_ret;
+	struct ipz_queue *squeue;
+	void *bad_send_wqe_p, *bad_send_wqe_v;
+	void *squeue_start_p, *squeue_end_p;
+	void *squeue_start_v, *squeue_end_v;
+	struct ehca_wqe *wqe;
+	int qp_num = my_qp->ib_qp.qp_num;
+
+	/* get send wqe pointer */
+	h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+					   my_qp->ipz_qp_handle, &my_qp->pf,
+					   &bad_send_wqe_p, NULL, 2);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
+			 " ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, qp_num, h_ret);
+		return ehca2ib_return_code(h_ret);
+	}
+	bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
+	ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
+		 qp_num, bad_send_wqe_p);
+	/* convert wqe pointer to vadr */
+	bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+	if (ehca_debug_level)
+		ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
+	squeue = &my_qp->ipz_squeue;
+	squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
+	squeue_end_p = squeue_start_p+squeue->queue_length;
+	squeue_start_v = abs_to_virt((u64)squeue_start_p);
+	squeue_end_v = abs_to_virt((u64)squeue_end_p);
+	ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
+		 qp_num, squeue_start_v, squeue_end_v);
+
+	/* loop sets wqe's purge bit */
+	wqe = (struct ehca_wqe*)bad_send_wqe_v;
+	*bad_wqe_cnt = 0;
+	while (wqe->optype != 0xff && wqe->wqef != 0xff) {
+		if (ehca_debug_level)
+			ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
+		wqe->nr_of_data_seg = 0; /* suppress data access */
+		wqe->wqef = WQEF_PURGE; /* WQE to be purged */
+		wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
+		*bad_wqe_cnt = (*bad_wqe_cnt)+1;
+		if ((void*)wqe >= squeue_end_v) {
+			wqe = squeue_start_v;
+		}
+	}
+	/*
+	 * bad wqe will be reprocessed and ignored when pol_cq() is called,
+	 *  i.e. nr of wqes with flush error status is one less
+	 */
+	ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
+		 qp_num, (*bad_wqe_cnt)-1);
+	wqe->wqef = 0;
+
+	return 0;
+}
+
+/*
+ * internal_modify_qp with circumvention to handle aqp0 properly
+ * smi_reset2init indicates if this is an internal reset-to-init-call for
+ * smi. This flag must always be zero if called from ehca_modify_qp()!
+ * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
+ */
+static int internal_modify_qp(struct ib_qp *ibqp,
+			      struct ib_qp_attr *attr,
+			      int attr_mask, int smi_reset2init)
+{
+	enum ib_qp_state qp_cur_state, qp_new_state;
+	int cnt, qp_attr_idx, ret = 0;
+	enum ib_qp_statetrans statetrans;
+	struct hcp_modify_qp_control_block *mqpcb;
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca =
+		container_of(ibqp->pd->device, struct ehca_shca, ib_device);
+	u64 update_mask;
+	u64 h_ret;
+	int bad_wqe_cnt = 0;
+	int squeue_locked = 0;
+	unsigned long spl_flags = 0;
+
+	/* do query_qp to obtain current attr values */
+	mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
+	if (mqpcb == NULL) {
+		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				mqpcb, my_qp->galpas.kernel);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(ibqp->device, "hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, ibqp->qp_num, h_ret);
+		ret = ehca2ib_return_code(h_ret);
+		goto modify_qp_exit1;
+	}
+
+	qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
+
+	if (qp_cur_state == -EINVAL) {	/* invalid qp state */
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 mqpcb->qp_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+	/*
+	 * circumvention to set aqp0 initial state to init
+	 * as expected by IB spec
+	 */
+	if (smi_reset2init == 0 &&
+	    ibqp->qp_type == IB_QPT_SMI &&
+	    qp_cur_state == IB_QPS_RESET &&
+	    (attr_mask & IB_QP_STATE) &&
+	    attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
+		struct ib_qp_attr smiqp_attr = {
+			.qp_state = IB_QPS_INIT,
+			.port_num = my_qp->init_attr.port_num,
+			.pkey_index = 0,
+			.qkey = 0
+		};
+		int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
+			IB_QP_PKEY_INDEX | IB_QP_QKEY;
+		int smirc = internal_modify_qp(
+			ibqp, &smiqp_attr, smiqp_attr_mask, 1);
+		if (smirc) {
+			ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
+				 "ehca_modify_qp() rc=%x", smirc);
+			ret = H_PARAMETER;
+			goto modify_qp_exit1;
+		}
+		qp_cur_state = IB_QPS_INIT;
+		ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
+	}
+	/* is transmitted current state  equal to "real" current state */
+	if ((attr_mask & IB_QP_CUR_STATE) &&
+	    qp_cur_state != attr->cur_qp_state) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device,
+			 "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
+			 " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
+			 attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
+		 "new qp_state=%x attribute_mask=%x",
+		 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
+
+	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
+	if (!smi_reset2init &&
+	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
+				attr_mask)) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device,
+			 "Invalid qp transition new_state=%x cur_state=%x "
+			 "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
+			 qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
+		goto modify_qp_exit1;
+	}
+
+	if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
+		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+	else {
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "Invalid new qp state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 qp_new_state, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	/* retrieve state transition struct to get req and opt attrs */
+	statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
+	if (statetrans < 0) {
+		ret = -EINVAL;
+		ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
+			 "new_qp_state=%x State_xsition=%x ehca_qp=%p "
+			 "qp_num=%x", qp_cur_state, qp_new_state,
+			 statetrans, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
+
+	if (qp_attr_idx < 0) {
+		ret = qp_attr_idx;
+		ehca_err(ibqp->device,
+			 "Invalid QP type=%x ehca_qp=%p qp_num=%x",
+			 ibqp->qp_type, my_qp, ibqp->qp_num);
+		goto modify_qp_exit1;
+	}
+
+	ehca_dbg(ibqp->device,
+		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
+		 my_qp, ibqp->qp_num, statetrans);
+
+	/* sqe -> rts: set purge bit of bad wqe before actual trans */
+	if ((my_qp->qp_type == IB_QPT_UD ||
+	     my_qp->qp_type == IB_QPT_GSI ||
+	     my_qp->qp_type == IB_QPT_SMI) &&
+	    statetrans == IB_QPST_SQE2RTS) {
+		/* mark next free wqe if kernel */
+		if (my_qp->uspace_squeue == 0) {
+			struct ehca_wqe *wqe;
+			/* lock send queue */
+			spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+			squeue_locked = 1;
+			/* mark next free wqe */
+			wqe = (struct ehca_wqe*)
+				ipz_qeit_get(&my_qp->ipz_squeue);
+			wqe->optype = wqe->wqef = 0xff;
+			ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
+				 ibqp->qp_num, wqe);
+		}
+		ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
+		if (ret) {
+			ehca_err(ibqp->device, "prepare_sqe_rts() failed "
+				 "ehca_qp=%p qp_num=%x ret=%x",
+				 my_qp, ibqp->qp_num, ret);
+			goto modify_qp_exit2;
+		}
+	}
+
+	/*
+	 * enable RDMA_Atomic_Control if reset->init und reliable con
+	 * this is necessary since gen2 does not provide that flag,
+	 * but pHyp requires it
+	 */
+	if (statetrans == IB_QPST_RESET2INIT &&
+	    (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
+		mqpcb->rdma_atomic_ctrl = 3;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
+	}
+	/* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
+	if (statetrans == IB_QPST_INIT2RTR &&
+	    (ibqp->qp_type == IB_QPT_UC) &&
+	    !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
+		mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX) {
+		mqpcb->prim_p_key_idx = attr->pkey_index;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+	}
+	if (attr_mask & IB_QP_PORT) {
+		if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+			ret = -EINVAL;
+			ehca_err(ibqp->device, "Invalid port=%x. "
+				 "ehca_qp=%p qp_num=%x num_ports=%x",
+				 attr->port_num, my_qp, ibqp->qp_num,
+				 shca->num_ports);
+			goto modify_qp_exit2;
+		}
+		mqpcb->prim_phys_port = attr->port_num;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+	}
+	if (attr_mask & IB_QP_QKEY) {
+		mqpcb->qkey = attr->qkey;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
+	}
+	if (attr_mask & IB_QP_AV) {
+		int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate);
+		int ehca_mult = ib_rate_to_mult(shca->sport[my_qp->
+						init_attr.port_num].rate);
+
+		mqpcb->dlid = attr->ah_attr.dlid;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
+		mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
+		mqpcb->service_level = attr->ah_attr.sl;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
+
+		if (ah_mult < ehca_mult)
+			mqpcb->max_static_rate = (ah_mult > 0) ?
+			((ehca_mult - 1) / ah_mult) : 0;
+		else
+			mqpcb->max_static_rate = 0;
+
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
+
+		/*
+		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
+		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+		 */
+		if (attr->ah_attr.ah_flags == IB_AH_GRH) {
+			mqpcb->send_grh_flag = 1 << 31;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+			mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
+
+			for (cnt = 0; cnt < 16; cnt++)
+				mqpcb->dest_gid.byte[cnt] =
+					attr->ah_attr.grh.dgid.raw[cnt];
+
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
+			mqpcb->flow_label = attr->ah_attr.grh.flow_label;
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
+			mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
+			update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
+			mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
+		}
+	}
+
+	if (attr_mask & IB_QP_PATH_MTU) {
+		mqpcb->path_mtu = attr->path_mtu;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+	}
+	if (attr_mask & IB_QP_TIMEOUT) {
+		mqpcb->timeout = attr->timeout;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
+	}
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		mqpcb->retry_count = attr->retry_cnt;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
+	}
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		mqpcb->rnr_retry_count = attr->rnr_retry;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
+	}
+	if (attr_mask & IB_QP_RQ_PSN) {
+		mqpcb->receive_psn = attr->rq_psn;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
+	}
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
+			attr->max_dest_rd_atomic : 2;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+	}
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
+			attr->max_rd_atomic : 2;
+		update_mask |=
+			EHCA_BMASK_SET
+			(MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
+	}
+	if (attr_mask & IB_QP_ALT_PATH) {
+		int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate);
+		int ehca_mult = ib_rate_to_mult(
+			shca->sport[my_qp->init_attr.port_num].rate);
+
+		mqpcb->dlid_al = attr->alt_ah_attr.dlid;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
+		mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
+		mqpcb->service_level_al = attr->alt_ah_attr.sl;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
+
+		if (ah_mult < ehca_mult)
+			mqpcb->max_static_rate = (ah_mult > 0) ?
+			((ehca_mult - 1) / ah_mult) : 0;
+		else
+			mqpcb->max_static_rate_al = 0;
+
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
+
+		/*
+		 * only if GRH is TRUE we might consider SOURCE_GID_IDX
+		 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+		 */
+		if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
+			mqpcb->send_grh_flag_al = 1 << 31;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
+			mqpcb->source_gid_idx_al =
+				attr->alt_ah_attr.grh.sgid_index;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
+
+			for (cnt = 0; cnt < 16; cnt++)
+				mqpcb->dest_gid_al.byte[cnt] =
+					attr->alt_ah_attr.grh.dgid.raw[cnt];
+
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
+			mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
+			mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
+			mqpcb->traffic_class_al =
+				attr->alt_ah_attr.grh.traffic_class;
+			update_mask |=
+				EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
+		}
+	}
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+		mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
+	}
+
+	if (attr_mask & IB_QP_SQ_PSN) {
+		mqpcb->send_psn = attr->sq_psn;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
+	}
+
+	if (attr_mask & IB_QP_DEST_QPN) {
+		mqpcb->dest_qp_nr = attr->dest_qp_num;
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		mqpcb->path_migration_state = attr->path_mig_state;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
+	}
+
+	if (attr_mask & IB_QP_CAP) {
+		mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
+		mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
+		/* no support for max_send/recv_sge yet */
+	}
+
+	if (ehca_debug_level)
+		ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
+
+	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+				 my_qp->ipz_qp_handle,
+				 &my_qp->pf,
+				 update_mask,
+				 mqpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
+		goto modify_qp_exit2;
+	}
+
+	if ((my_qp->qp_type == IB_QPT_UD ||
+	     my_qp->qp_type == IB_QPT_GSI ||
+	     my_qp->qp_type == IB_QPT_SMI) &&
+	    statetrans == IB_QPST_SQE2RTS) {
+		/* doorbell to reprocessing wqes */
+		iosync(); /* serialize GAL register access */
+		hipz_update_sqa(my_qp, bad_wqe_cnt-1);
+		ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
+	}
+
+	if (statetrans == IB_QPST_RESET2INIT ||
+	    statetrans == IB_QPST_INIT2INIT) {
+		mqpcb->qp_enable = 1;
+		mqpcb->qp_state = EHCA_QPS_INIT;
+		update_mask = 0;
+		update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+
+		h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+					 my_qp->ipz_qp_handle,
+					 &my_qp->pf,
+					 update_mask,
+					 mqpcb,
+					 my_qp->galpas.kernel);
+
+		if (h_ret != H_SUCCESS) {
+			ret = ehca2ib_return_code(h_ret);
+			ehca_err(ibqp->device, "ENABLE in context of "
+				 "RESET_2_INIT failed! Maybe you didn't get "
+				 "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
+				 h_ret, my_qp, ibqp->qp_num);
+			goto modify_qp_exit2;
+		}
+	}
+
+	if (statetrans == IB_QPST_ANY2RESET) {
+		ipz_qeit_reset(&my_qp->ipz_rqueue);
+		ipz_qeit_reset(&my_qp->ipz_squeue);
+	}
+
+	if (attr_mask & IB_QP_QKEY)
+		my_qp->qkey = attr->qkey;
+
+modify_qp_exit2:
+	if (squeue_locked) { /* this means: sqe -> rts */
+		spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+		my_qp->sqerr_purgeflag = 1;
+	}
+
+modify_qp_exit1:
+	kfree(mqpcb);
+
+	return ret;
+}
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		   struct ib_udata *udata)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	u32 cur_pid = current->tgid;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	return internal_modify_qp(ibqp, attr, attr_mask, 0);
+}
+
+int ehca_query_qp(struct ib_qp *qp,
+		  struct ib_qp_attr *qp_attr,
+		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
+					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	struct hcp_modify_qp_control_block *qpcb;
+	u32 cur_pid = current->tgid;
+	int cnt, ret = 0;
+	u64 h_ret;
+
+	if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
+		ehca_err(qp->device,"Invalid attribute mask "
+			 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
+			 my_qp, qp->qp_num, qp_attr_mask);
+		return -EINVAL;
+	}
+
+	qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL );
+	if (!qpcb) {
+		ehca_err(qp->device,"Out of memory for qpcb "
+			 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(adapter_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				qpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(qp->device,"hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, qp->qp_num, h_ret);
+		goto query_qp_exit1;
+	}
+
+	qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
+	qp_attr->qp_state = qp_attr->cur_qp_state;
+
+	if (qp_attr->cur_qp_state == -EINVAL) {
+		ret = -EINVAL;
+		ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
+			 "ehca_qp=%p qp_num=%x",
+			 qpcb->qp_state, my_qp, qp->qp_num);
+		goto query_qp_exit1;
+	}
+
+	if (qp_attr->qp_state == IB_QPS_SQD)
+		qp_attr->sq_draining = 1;
+
+	qp_attr->qkey = qpcb->qkey;
+	qp_attr->path_mtu = qpcb->path_mtu;
+	qp_attr->path_mig_state = qpcb->path_migration_state;
+	qp_attr->rq_psn = qpcb->receive_psn;
+	qp_attr->sq_psn = qpcb->send_psn;
+	qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
+	qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
+	qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
+	/* UD_AV CIRCUMVENTION */
+	if (my_qp->qp_type == IB_QPT_UD) {
+		qp_attr->cap.max_send_sge =
+			qpcb->actual_nr_sges_in_sq_wqe - 2;
+		qp_attr->cap.max_recv_sge =
+			qpcb->actual_nr_sges_in_rq_wqe - 2;
+	} else {
+		qp_attr->cap.max_send_sge =
+			qpcb->actual_nr_sges_in_sq_wqe;
+		qp_attr->cap.max_recv_sge =
+			qpcb->actual_nr_sges_in_rq_wqe;
+	}
+
+	qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
+	qp_attr->dest_qp_num = qpcb->dest_qp_nr;
+
+	qp_attr->pkey_index =
+		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
+
+	qp_attr->port_num =
+		EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
+
+	qp_attr->timeout = qpcb->timeout;
+	qp_attr->retry_cnt = qpcb->retry_count;
+	qp_attr->rnr_retry = qpcb->rnr_retry_count;
+
+	qp_attr->alt_pkey_index =
+		EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
+
+	qp_attr->alt_port_num = qpcb->alt_phys_port;
+	qp_attr->alt_timeout = qpcb->timeout_al;
+
+	/* primary av */
+	qp_attr->ah_attr.sl = qpcb->service_level;
+
+	if (qpcb->send_grh_flag) {
+		qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+	}
+
+	qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
+	qp_attr->ah_attr.dlid = qpcb->dlid;
+	qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
+	qp_attr->ah_attr.port_num = qp_attr->port_num;
+
+	/* primary GRH */
+	qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
+	qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
+	qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
+	qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
+
+	for (cnt = 0; cnt < 16; cnt++)
+		qp_attr->ah_attr.grh.dgid.raw[cnt] =
+			qpcb->dest_gid.byte[cnt];
+
+	/* alternate AV */
+	qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
+	if (qpcb->send_grh_flag_al) {
+		qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
+	}
+
+	qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
+	qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
+	qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
+
+	/* alternate GRH */
+	qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
+	qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
+	qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
+	qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
+
+	for (cnt = 0; cnt < 16; cnt++)
+		qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
+			qpcb->dest_gid_al.byte[cnt];
+
+	/* return init attributes given in ehca_create_qp */
+	if (qp_init_attr)
+		*qp_init_attr = my_qp->init_attr;
+
+	if (ehca_debug_level)
+		ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
+
+query_qp_exit1:
+	kfree(qpcb);
+
+	return ret;
+}
+
+int ehca_destroy_qp(struct ib_qp *ibqp)
+{
+	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+					      ib_device);
+	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+					     ib_pd);
+	u32 cur_pid = current->tgid;
+	u32 qp_num = ibqp->qp_num;
+	int ret;
+	u64 h_ret;
+	u8 port_num;
+	enum ib_qp_type	qp_type;
+	unsigned long flags;
+
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	if (my_qp->send_cq) {
+		ret = ehca_cq_unassign_qp(my_qp->send_cq,
+					      my_qp->real_qp_num);
+		if (ret) {
+			ehca_err(ibqp->device, "Couldn't unassign qp from "
+				 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
+				 my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+			return ret;
+		}
+	}
+
+	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	idr_remove(&ehca_qp_idr, my_qp->token);
+	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+	/* un-mmap if vma alloc */
+	if (my_qp->uspace_rqueue) {
+		ret = ehca_munmap(my_qp->uspace_rqueue,
+				  my_qp->ipz_rqueue.queue_length);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap rqueue "
+				 "qp_num=%x", qp_num);
+		ret = ehca_munmap(my_qp->uspace_squeue,
+				  my_qp->ipz_squeue.queue_length);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap squeue "
+				 "qp_num=%x", qp_num);
+		ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
+		if (ret)
+			ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
+				 qp_num);
+	}
+
+	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+	if (h_ret != H_SUCCESS) {
+		ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
+		return ehca2ib_return_code(h_ret);
+	}
+
+	port_num = my_qp->init_attr.port_num;
+	qp_type  = my_qp->init_attr.qp_type;
+
+	/* no support for IB_QPT_SMI yet */
+	if (qp_type == IB_QPT_GSI) {
+		struct ib_event event;
+		ehca_info(ibqp->device, "device %s: port %x is inactive.",
+			  shca->ib_device.name, port_num);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port_num;
+		shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+	}
+
+	ipz_queue_dtor(&my_qp->ipz_rqueue);
+	ipz_queue_dtor(&my_qp->ipz_squeue);
+	kmem_cache_free(qp_cache, my_qp);
+	return 0;
+}
+
+int ehca_init_qp_cache(void)
+{
+	qp_cache = kmem_cache_create("ehca_cache_qp",
+				     sizeof(struct ehca_qp), 0,
+				     SLAB_HWCACHE_ALIGN,
+				     NULL, NULL);
+	if (!qp_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void ehca_cleanup_qp_cache(void)
+{
+	if (qp_cache)
+		kmem_cache_destroy(qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
new file mode 100644
index 0000000..b46bda1
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -0,0 +1,653 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  post_send/recv, poll_cq, req_notify
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm-powerpc/system.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+				  struct ehca_wqe *wqe_p,
+				  struct ib_recv_wr *recv_wr)
+{
+	u8 cnt_ds;
+	if (unlikely((recv_wr->num_sge < 0) ||
+		     (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = recv_wr->wr_id;
+	wqe_p->nr_of_data_seg = recv_wr->num_sge;
+
+	for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
+		wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
+			recv_wr->sg_list[cnt_ds].addr;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
+			recv_wr->sg_list[cnt_ds].lkey;
+		wqe_p->u.all_rcv.sg_list[cnt_ds].length =
+			recv_wr->sg_list[cnt_ds].length;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+	}
+
+	return 0;
+}
+
+#if defined(DEBUG_GSI_SEND_WR)
+
+/* need ib_mad struct */
+#include <rdma/ib_mad.h>
+
+static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+{
+	int idx;
+	int j;
+	while (send_wr) {
+		struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
+		struct ib_sge *sge = send_wr->sg_list;
+		ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
+			     "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+			     send_wr->num_sge, send_wr->send_flags,
+			     send_wr->opcode);
+		if (mad_hdr) {
+			ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+				     "mgmt_class=%x class_version=%x method=%x "
+				     "status=%x class_specific=%x tid=%lx "
+				     "attr_id=%x resv=%x attr_mod=%x",
+				     idx, mad_hdr->base_version,
+				     mad_hdr->mgmt_class,
+				     mad_hdr->class_version, mad_hdr->method,
+				     mad_hdr->status, mad_hdr->class_specific,
+				     mad_hdr->tid, mad_hdr->attr_id,
+				     mad_hdr->resv,
+				     mad_hdr->attr_mod);
+		}
+		for (j = 0; j < send_wr->num_sge; j++) {
+			u8 *data = (u8 *) abs_to_virt(sge->addr);
+			ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+				     "lkey=%x",
+				     idx, j, data, sge->length, sge->lkey);
+			/* assume length is n*16 */
+			ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+				 idx, j);
+			sge++;
+		} /* eof for j */
+		idx++;
+		send_wr = send_wr->next;
+	} /* eof while send_wr */
+}
+
+#endif /* DEBUG_GSI_SEND_WR */
+
+static inline int ehca_write_swqe(struct ehca_qp *qp,
+				  struct ehca_wqe *wqe_p,
+				  const struct ib_send_wr *send_wr)
+{
+	u32 idx;
+	u64 dma_length;
+	struct ehca_av *my_av;
+	u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+
+	if (unlikely((send_wr->num_sge < 0) ||
+		     (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
+		ehca_gen_err("Invalid number of WQE SGE. "
+			 "num_sqe=%x max_nr_of_sg=%x",
+			 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
+		return -EINVAL; /* invalid SG list length */
+	}
+
+	/* clear wqe header until sglist */
+	memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+	wqe_p->work_request_id = send_wr->wr_id;
+
+	switch (send_wr->opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_SEND;
+		break;
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
+		break;
+	case IB_WR_RDMA_READ:
+		wqe_p->optype = WQE_OPTYPE_RDMAREAD;
+		break;
+	default:
+		ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
+		return -EINVAL; /* invalid opcode */
+	}
+
+	wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
+
+	wqe_p->wr_flag = 0;
+
+	if (send_wr->send_flags & IB_SEND_SIGNALED)
+		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+
+	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+		/* this might not work as long as HW does not support it */
+		wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
+	}
+
+	wqe_p->nr_of_data_seg = send_wr->num_sge;
+
+	switch (qp->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		/* no break is intential here */
+	case IB_QPT_UD:
+		/* IB 1.2 spec C10-15 compliance */
+		if (send_wr->wr.ud.remote_qkey & 0x80000000)
+			remote_qkey = qp->qkey;
+
+		wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+		wqe_p->local_ee_context_qkey = remote_qkey;
+		if (!send_wr->wr.ud.ah) {
+			ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+			return -EINVAL;
+		}
+		my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+		wqe_p->u.ud_av.ud_av = my_av->av;
+
+		/*
+		 * omitted check of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.ud_av.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.ud_av.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.ud_av.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+		} /* eof for idx */
+		if (qp->qp_type == IB_QPT_SMI ||
+		    qp->qp_type == IB_QPT_GSI)
+			wqe_p->u.ud_av.ud_av.pmtu = 1;
+		if (qp->qp_type == IB_QPT_GSI) {
+			wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+#ifdef DEBUG_GSI_SEND_WR
+			trace_send_wr_ud(send_wr);
+#endif /* DEBUG_GSI_SEND_WR */
+		}
+		break;
+
+	case IB_QPT_UC:
+		if (send_wr->send_flags & IB_SEND_FENCE)
+			wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
+		/* no break is intentional here */
+	case IB_QPT_RC:
+		/* TODO: atomic not implemented */
+		wqe_p->u.nud.remote_virtual_adress =
+			send_wr->wr.rdma.remote_addr;
+		wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+
+		/*
+		 * omitted checking of IB_SEND_INLINE
+		 * since HW does not support it
+		 */
+		dma_length = 0;
+		for (idx = 0; idx < send_wr->num_sge; idx++) {
+			wqe_p->u.nud.sg_list[idx].vaddr =
+				send_wr->sg_list[idx].addr;
+			wqe_p->u.nud.sg_list[idx].lkey =
+				send_wr->sg_list[idx].lkey;
+			wqe_p->u.nud.sg_list[idx].length =
+				send_wr->sg_list[idx].length;
+			dma_length += send_wr->sg_list[idx].length;
+		} /* eof idx */
+		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
+		break;
+
+	default:
+		ehca_gen_err("Invalid qptype=%x", qp->qp_type);
+		return -EINVAL;
+	}
+
+	if (ehca_debug_level) {
+		ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
+		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
+	}
+	return 0;
+}
+
+/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
+static inline void map_ib_wc_status(u32 cqe_status,
+				    enum ib_wc_status *wc_status)
+{
+	if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
+		switch (cqe_status & 0x3F) {
+		case 0x01:
+		case 0x21:
+			*wc_status = IB_WC_LOC_LEN_ERR;
+			break;
+		case 0x02:
+		case 0x22:
+			*wc_status = IB_WC_LOC_QP_OP_ERR;
+			break;
+		case 0x03:
+		case 0x23:
+			*wc_status = IB_WC_LOC_EEC_OP_ERR;
+			break;
+		case 0x04:
+		case 0x24:
+			*wc_status = IB_WC_LOC_PROT_ERR;
+			break;
+		case 0x05:
+		case 0x25:
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		case 0x06:
+			*wc_status = IB_WC_MW_BIND_ERR;
+			break;
+		case 0x07: /* remote error - look into bits 20:24 */
+			switch ((cqe_status
+				 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
+			case 0x0:
+				/*
+				 * PSN Sequence Error!
+				 * couldn't find a matching status!
+				 */
+				*wc_status = IB_WC_GENERAL_ERR;
+				break;
+			case 0x1:
+				*wc_status = IB_WC_REM_INV_REQ_ERR;
+				break;
+			case 0x2:
+				*wc_status = IB_WC_REM_ACCESS_ERR;
+				break;
+			case 0x3:
+				*wc_status = IB_WC_REM_OP_ERR;
+				break;
+			case 0x4:
+				*wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+				break;
+			}
+			break;
+		case 0x08:
+			*wc_status = IB_WC_RETRY_EXC_ERR;
+			break;
+		case 0x09:
+			*wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+			break;
+		case 0x0A:
+		case 0x2D:
+			*wc_status = IB_WC_REM_ABORT_ERR;
+			break;
+		case 0x0B:
+		case 0x2E:
+			*wc_status = IB_WC_INV_EECN_ERR;
+			break;
+		case 0x0C:
+		case 0x2F:
+			*wc_status = IB_WC_INV_EEC_STATE_ERR;
+			break;
+		case 0x0D:
+			*wc_status = IB_WC_BAD_RESP_ERR;
+			break;
+		case 0x10:
+			/* WQE purged */
+			*wc_status = IB_WC_WR_FLUSH_ERR;
+			break;
+		default:
+			*wc_status = IB_WC_FATAL_ERR;
+
+		}
+	} else
+		*wc_status = IB_WC_SUCCESS;
+}
+
+int ehca_post_send(struct ib_qp *qp,
+		   struct ib_send_wr *send_wr,
+		   struct ib_send_wr **bad_send_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_send_wr *cur_send_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_send_wr = send_wr; cur_send_wr != NULL;
+	     cur_send_wr = cur_send_wr->next) {
+		u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_send_wr)
+				*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		/* write a SEND WQE into the QUEUE */
+		ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_squeue.current_q_offset = start_offset;
+			*bad_send_wr = cur_send_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_send_exit0;
+		}
+		wqe_cnt++;
+		ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+			 my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_send_wr */
+
+post_send_exit0:
+	/* UNLOCK the QUEUE */
+	spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_sqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+int ehca_post_recv(struct ib_qp *qp,
+		   struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr)
+{
+	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+	struct ib_recv_wr *cur_recv_wr;
+	struct ehca_wqe *wqe_p;
+	int wqe_cnt = 0;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	/* LOCK the QUEUE */
+	spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+
+	/* loop processes list of send reqs */
+	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
+	     cur_recv_wr = cur_recv_wr->next) {
+		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
+		/* get pointer next to free WQE */
+		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
+		if (unlikely(!wqe_p)) {
+			/* too many posted work requests: queue overflow */
+			if (bad_recv_wr)
+				*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -ENOMEM;
+				ehca_err(qp->device, "Too many posted WQEs "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		/* write a RECV WQE into the QUEUE */
+		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+		/*
+		 * if something failed,
+		 * reset the free entry pointer to the start value
+		 */
+		if (unlikely(ret)) {
+			my_qp->ipz_rqueue.current_q_offset = start_offset;
+			*bad_recv_wr = cur_recv_wr;
+			if (wqe_cnt == 0) {
+				ret = -EINVAL;
+				ehca_err(qp->device, "Could not write WQE "
+					 "qp_num=%x", qp->qp_num);
+			}
+			goto post_recv_exit0;
+		}
+		wqe_cnt++;
+		ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
+		     my_qp, qp->qp_num, wqe_cnt);
+	} /* eof for cur_recv_wr */
+
+post_recv_exit0:
+	spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
+	iosync(); /* serialize GAL register access */
+	hipz_update_rqa(my_qp, wqe_cnt);
+	return ret;
+}
+
+/*
+ * ib_wc_opcode table converts ehca wc opcode to ib
+ * Since we use zero to indicate invalid opcode, the actual ib opcode must
+ * be decremented!!!
+ */
+static const u8 ib_wc_opcode[255] = {
+	[0x01] = IB_WC_RECV+1,
+	[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
+	[0x04] = IB_WC_BIND_MW+1,
+	[0x08] = IB_WC_FETCH_ADD+1,
+	[0x10] = IB_WC_COMP_SWAP+1,
+	[0x20] = IB_WC_RDMA_WRITE+1,
+	[0x40] = IB_WC_RDMA_READ+1,
+	[0x80] = IB_WC_SEND+1
+};
+
+/* internal function to poll one entry of cq */
+static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
+{
+	int ret = 0;
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	struct ehca_cqe *cqe;
+	int cqe_count = 0;
+
+poll_cq_one_read_cqe:
+	cqe = (struct ehca_cqe *)
+		ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
+	if (!cqe) {
+		ret = -EAGAIN;
+		ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
+			 "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
+		goto  poll_cq_one_exit0;
+	}
+
+	/* prevents loads being reordered across this point */
+	rmb();
+
+	cqe_count++;
+	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
+		struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+		int purgeflag;
+		unsigned long spl_flags;
+		if (!qp) {
+			ehca_err(cq->device, "cq_num=%x qp_num=%x "
+				 "could not find qp -> ignore cqe",
+				 my_cq->cq_number, cqe->local_qp_number);
+			ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
+				 my_cq->cq_number, cqe->local_qp_number);
+			/* ignore this purged cqe */
+			goto poll_cq_one_read_cqe;
+		}
+		spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+		purgeflag = qp->sqerr_purgeflag;
+		spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+
+		if (purgeflag) {
+			ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
+				 "src_qp=%x",
+				 cqe->local_qp_number, cqe->remote_qp_number);
+			if (ehca_debug_level)
+				ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
+					 cqe->local_qp_number,
+					 cqe->remote_qp_number);
+			/*
+			 * ignore this to avoid double cqes of bad wqe
+			 * that caused sqe and turn off purge flag
+			 */
+			qp->sqerr_purgeflag = 0;
+			goto poll_cq_one_read_cqe;
+		}
+	}
+
+	/* tracing cqe */
+	if (ehca_debug_level) {
+		ehca_dbg(cq->device,
+			 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
+			 my_cq, my_cq->cq_number);
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x -------------------------",
+			 my_cq, my_cq->cq_number);
+	}
+
+	/* we got a completion! */
+	wc->wr_id = cqe->work_request_id;
+
+	/* eval ib_wc_opcode */
+	wc->opcode = ib_wc_opcode[cqe->optype]-1;
+	if (unlikely(wc->opcode == -1)) {
+		ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
+			 "ehca_cq=%p cq_num=%x",
+			 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
+		/* dump cqe for other infos */
+		ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+			 my_cq, my_cq->cq_number);
+		/* update also queue adder to throw away this entry!!! */
+		goto poll_cq_one_exit0;
+	}
+	/* eval ib_wc_status */
+	if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+		/* complete with errors */
+		map_ib_wc_status(cqe->status, &wc->status);
+		wc->vendor_err = wc->status;
+	} else
+		wc->status = IB_WC_SUCCESS;
+
+	wc->qp_num = cqe->local_qp_number;
+	wc->byte_len = cqe->nr_bytes_transferred;
+	wc->pkey_index = cqe->pkey_index;
+	wc->slid = cqe->rlid;
+	wc->dlid_path_bits = cqe->dlid;
+	wc->src_qp = cqe->remote_qp_number;
+	wc->wc_flags = cqe->w_completion_flags;
+	wc->imm_data = cpu_to_be32(cqe->immediate_data);
+	wc->sl = cqe->service_level;
+
+	if (wc->status != IB_WC_SUCCESS)
+		ehca_dbg(cq->device,
+			 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
+			 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
+			 "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
+			 cqe->status, cqe->local_qp_number,
+			 cqe->remote_qp_number, cqe->work_request_id, cqe);
+
+poll_cq_one_exit0:
+	if (cqe_count > 0)
+		hipz_update_feca(my_cq, cqe_count);
+
+	return ret;
+}
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+	int nr;
+	struct ib_wc *current_wc = wc;
+	int ret = 0;
+	unsigned long spl_flags;
+
+	if (num_entries < 1) {
+		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
+			 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
+		ret = -EINVAL;
+		goto poll_cq_exit0;
+	}
+
+	spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+	for (nr = 0; nr < num_entries; nr++) {
+		ret = ehca_poll_cq_one(cq, current_wc);
+		if (ret)
+			break;
+		current_wc++;
+	} /* eof for nr */
+	spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+	if (ret == -EAGAIN  || !ret)
+		ret = nr;
+
+poll_cq_exit0:
+	return ret;
+}
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+{
+	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+
+	switch (cq_notify) {
+	case IB_CQ_SOLICITED:
+		hipz_set_cqx_n0(my_cq, 1);
+		break;
+	case IB_CQ_NEXT_COMP:
+		hipz_set_cqx_n1(my_cq, 1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
new file mode 100644
index 0000000..9f16e9c
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -0,0 +1,111 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  SQP functions
+ *
+ *  Authors: Khadija Souissi <souissi@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+
+/**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+ * pair is created successfully, the corresponding port gets active.
+ *
+ * Define Special Queue pair 0 (SMI QP) is still not supported.
+ *
+ * @qp_init_attr: Queue pair init attributes with port and queue pair type
+ */
+
+u64 ehca_define_sqp(struct ehca_shca *shca,
+		    struct ehca_qp *ehca_qp,
+		    struct ib_qp_init_attr *qp_init_attr)
+{
+	u32 pma_qp_nr, bma_qp_nr;
+	u64 ret;
+	u8 port = qp_init_attr->port_num;
+	int counter;
+
+	shca->sport[port - 1].port_state = IB_PORT_DOWN;
+
+	switch (qp_init_attr->qp_type) {
+	case IB_QPT_SMI:
+		/* function not supported yet */
+		break;
+	case IB_QPT_GSI:
+		ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
+					 ehca_qp->ipz_qp_handle,
+					 ehca_qp->galpas.kernel,
+					 (u32) qp_init_attr->port_num,
+					 &pma_qp_nr, &bma_qp_nr);
+
+		if (ret != H_SUCCESS) {
+			ehca_err(&shca->ib_device,
+				 "Can't define AQP1 for port %x. rc=%lx",
+				 port, ret);
+			return ret;
+		}
+		break;
+	default:
+		ehca_err(&shca->ib_device, "invalid qp_type=%x",
+			 qp_init_attr->qp_type);
+		return H_PARAMETER;
+	}
+
+	for (counter = 0;
+	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+		     counter < ehca_port_act_time;
+	     counter++) {
+		ehca_dbg(&shca->ib_device, "... wait until port %x is active",
+			 port);
+		msleep_interruptible(1000);
+	}
+
+	if (counter == ehca_port_act_time) {
+		ehca_err(&shca->ib_device, "Port %x is not active.", port);
+		return H_HARDWARE;
+	}
+
+	return H_SUCCESS;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
new file mode 100644
index 0000000..9f56bb8
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -0,0 +1,172 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  auxiliary functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Khadija Souissi <souissik@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef EHCA_TOOLS_H
+#define EHCA_TOOLS_H
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/abs_addr.h>
+#include <asm/ibmebus.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+extern int ehca_debug_level;
+
+#define ehca_dbg(ib_dev, format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
+				   "PU%04x EHCA_DBG:%s " format "\n", \
+				   get_paca()->paca_index, __FUNCTION__, \
+				   ## arg); \
+	} while (0)
+
+#define ehca_info(ib_dev, format, arg...) \
+	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_warn(ib_dev, format, arg...) \
+	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
+		 get_paca()->paca_index, __FUNCTION__, ## arg)
+
+#define ehca_err(ib_dev, format, arg...) \
+	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/* use this one only if no ib_dev available */
+#define ehca_gen_dbg(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_warn(format, arg...) \
+	do { \
+		if (unlikely(ehca_debug_level)) \
+			printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
+			       get_paca()->paca_index, __FUNCTION__, ## arg); \
+	} while (0)
+
+#define ehca_gen_err(format, arg...) \
+	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
+		get_paca()->paca_index, __FUNCTION__, ## arg)
+
+/**
+ * ehca_dmp - printk a memory block, whose length is n*8 bytes.
+ * Each line has the following layout:
+ * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
+ */
+#define ehca_dmp(adr, len, format, args...) \
+	do {				       \
+		unsigned int x;			      \
+		unsigned int l = (unsigned int)(len); \
+		unsigned char *deb = (unsigned char*)(adr);	\
+		for (x = 0; x < l; x += 16) { \
+			printk("EHCA_DMP:%s" format \
+			       " adr=%p ofs=%04x %016lx %016lx\n", \
+			       __FUNCTION__, ##args, deb, x, \
+			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
+			deb += 16; \
+		} \
+	} while (0)
+
+/* define a bitmask, little endian version */
+#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+
+/* define a bitmask, the ibm way... */
+#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+
+/* internal function, don't use */
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+
+/* internal function, don't use */
+#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+
+/**
+ * EHCA_BMASK_SET - return value shifted and masked by mask
+ * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
+ * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
+ * in variable
+ */
+#define EHCA_BMASK_SET(mask,value) \
+	((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+
+/**
+ * EHCA_BMASK_GET - extract a parameter from value by mask
+ */
+#define EHCA_BMASK_GET(mask,value) \
+	(EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
+
+
+/* Converts ehca to ib return code */
+static inline int ehca2ib_return_code(u64 ehca_rc)
+{
+	switch (ehca_rc) {
+	case H_SUCCESS:
+		return 0;
+	case H_BUSY:
+		return -EBUSY;
+	case H_NO_MEM:
+		return -ENOMEM;
+	default:
+		return -EINVAL;
+	}
+}
+
+
+#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
new file mode 100644
index 0000000..e08764e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -0,0 +1,392 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  userspace support verbs
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/current.h>
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+					struct ib_udata *udata)
+{
+	struct ehca_ucontext *my_context;
+
+	my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+	if (!my_context) {
+		ehca_err(device, "Out of memory device=%p", device);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
+	return 0;
+}
+
+struct page *ehca_nopage(struct vm_area_struct *vma,
+			 unsigned long address, int *type)
+{
+	struct page *mypage = NULL;
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+	u64 offset;
+	void *vaddr;
+
+	switch (q_type) {
+	case 1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq) {
+			ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
+			ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp) {
+			ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
+			return NOPAGE_SIGBUS;
+		}
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return NOPAGE_SIGBUS;
+		}
+
+		if (rsrc_type == 2) {	/* rqueue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		} else if (rsrc_type == 3) {	/* squeue */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
+			offset = address - vma->vm_start;
+			vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
+			ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
+				 offset, vaddr);
+			mypage = virt_to_page(vaddr);
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return NOPAGE_SIGBUS;
+	}
+
+	if (!mypage) {
+		ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
+		return NOPAGE_SIGBUS;
+	}
+	get_page(mypage);
+
+	return mypage;
+}
+
+static struct vm_operations_struct ehcau_vm_ops = {
+	.nopage = ehca_nopage,
+};
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
+	u32 idr_handle = fileoffset >> 32;
+	u32 q_type = (fileoffset >> 28) & 0xF;	  /* CQ, QP,...        */
+	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+	u32 cur_pid = current->tgid;
+	u32 ret;
+	u64 vsize, physical;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	struct ehca_qp *qp;
+	struct ehca_pd *pd;
+
+	switch (q_type) {
+	case  1: /* CQ */
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!cq)
+			return -EINVAL;
+
+		if (cq->ownpid != cur_pid) {
+			ehca_err(cq->ib_cq.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, cq->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = cq->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(cq->ib_cq.device,
+				 "vsize=%lx physical=%lx", vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(cq->ib_cq.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* cq queue_addr */
+			ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(cq->ib_cq.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	case 2: /* QP */
+		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		qp = idr_find(&ehca_qp_idr, idr_handle);
+		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+		/* make sure this mmap really belongs to the authorized user */
+		if (!qp)
+			return -EINVAL;
+
+		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
+		if (pd->ownpid != cur_pid) {
+			ehca_err(qp->ib_qp.device,
+				 "Invalid caller pid=%x ownpid=%x",
+				 cur_pid, pd->ownpid);
+			return -ENOMEM;
+		}
+
+		if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+			return -EINVAL;
+
+		switch (rsrc_type) {
+		case 1: /* galpa fw handle */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vsize = vma->vm_end - vma->vm_start;
+			if (vsize != EHCA_PAGESIZE) {
+				ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
+					 vma->vm_end - vma->vm_start);
+				return -EINVAL;
+			}
+
+			physical = qp->galpas.user.fw_handle;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+			vma->vm_flags |= VM_IO | VM_RESERVED;
+
+			ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
+				 vsize, physical);
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      physical >> PAGE_SHIFT, vsize,
+					      vma->vm_page_prot);
+			if (ret) {
+				ehca_err(qp->ib_qp.device,
+					 "remap_pfn_range() failed ret=%x",
+					 ret);
+				return -ENOMEM;
+			}
+			break;
+
+		case 2: /* qp rqueue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		case 3: /* qp squeue_addr */
+			ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
+			vma->vm_flags |= VM_RESERVED;
+			vma->vm_ops = &ehcau_vm_ops;
+			break;
+
+		default:
+			ehca_err(qp->ib_qp.device, "bad resource type %x",
+				 rsrc_type);
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		ehca_gen_err("bad queue type %x", q_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
+		     struct vm_area_struct **vma)
+{
+	down_write(&current->mm->mmap_sem);
+	*mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS,
+				 foffset);
+	up_write(&current->mm->mmap_sem);
+	if (!(*mapped)) {
+		ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
+			     foffset, length);
+		return -EINVAL;
+	}
+
+	*vma = find_vma(current->mm, (u64)*mapped);
+	if (!(*vma)) {
+		down_write(&current->mm->mmap_sem);
+		do_munmap(current->mm, 0, length);
+		up_write(&current->mm->mmap_sem);
+		ehca_gen_err("couldn't find vma queue=%p", *mapped);
+		return -EINVAL;
+	}
+	(*vma)->vm_flags |= VM_RESERVED;
+	(*vma)->vm_ops = &ehcau_vm_ops;
+
+	return 0;
+}
+
+int ehca_mmap_register(u64 physical, void **mapped,
+		       struct vm_area_struct **vma)
+{
+	int ret;
+	unsigned long vsize;
+	/* ehca hw supports only 4k page */
+	ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
+	if (ret) {
+		ehca_gen_err("could'nt mmap physical=%lx", physical);
+		return ret;
+	}
+
+	(*vma)->vm_flags |= VM_RESERVED;
+	vsize = (*vma)->vm_end - (*vma)->vm_start;
+	if (vsize != EHCA_PAGESIZE) {
+		ehca_gen_err("invalid vsize=%lx",
+			     (*vma)->vm_end - (*vma)->vm_start);
+		return -EINVAL;
+	}
+
+	(*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
+	(*vma)->vm_flags |= VM_IO | VM_RESERVED;
+
+	ret = remap_pfn_range((*vma), (*vma)->vm_start,
+			      physical >> PAGE_SHIFT, vsize,
+			      (*vma)->vm_page_prot);
+	if (ret) {
+		ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+		return -ENOMEM;
+	}
+
+	return 0;
+
+}
+
+int ehca_munmap(unsigned long addr, size_t len) {
+	int ret = 0;
+	struct mm_struct *mm = current->mm;
+	if (mm) {
+		down_write(&mm->mmap_sem);
+		ret = do_munmap(mm, addr, len);
+		up_write(&mm->mmap_sem);
+	}
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
new file mode 100644
index 0000000..3fb46e6
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -0,0 +1,874 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/hvcall.h>
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hcp_phyp.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define H_ALL_RES_QP_ENHANCED_OPS       EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN            EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE       EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING  EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING  EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE     EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL    EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_RESOURCE_TYPE      EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR  EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE       EHCA_BMASK_IBM(24, 31)
+
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
+
+/* direct access qp controls */
+#define DAQP_CTRL_ENABLE    0x01
+#define DAQP_CTRL_SEND_COMP 0x20
+#define DAQP_CTRL_RECV_COMP 0x40
+
+static u32 get_longbusy_msecs(int longbusy_rc)
+{
+	switch (longbusy_rc) {
+	case H_LONG_BUSY_ORDER_1_MSEC:
+		return 1;
+	case H_LONG_BUSY_ORDER_10_MSEC:
+		return 10;
+	case H_LONG_BUSY_ORDER_100_MSEC:
+		return 100;
+	case H_LONG_BUSY_ORDER_1_SEC:
+		return 1000;
+	case H_LONG_BUSY_ORDER_10_SEC:
+		return 10000;
+	case H_LONG_BUSY_ORDER_100_SEC:
+		return 100000;
+	default:
+		return 1;
+	}
+}
+
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+				    unsigned long arg1,
+				    unsigned long arg2,
+				    unsigned long arg3,
+				    unsigned long arg4,
+				    unsigned long arg5,
+				    unsigned long arg6,
+				    unsigned long arg7)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+					 arg5, arg6, arg7);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx ",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+
+static long ehca_plpar_hcall9(unsigned long opcode,
+			      unsigned long *outs, /* array of 9 outputs */
+			      unsigned long arg1,
+			      unsigned long arg2,
+			      unsigned long arg3,
+			      unsigned long arg4,
+			      unsigned long arg5,
+			      unsigned long arg6,
+			      unsigned long arg7,
+			      unsigned long arg8,
+			      unsigned long arg9)
+{
+	long ret;
+	int i, sleep_msecs;
+
+	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
+		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
+		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
+		     arg8, arg9);
+
+	for (i = 0; i < 5; i++) {
+		ret = plpar_hcall9(opcode, outs,
+				   arg1, arg2, arg3, arg4, arg5,
+				   arg6, arg7, arg8, arg9);
+
+		if (H_IS_LONG_BUSY(ret)) {
+			sleep_msecs = get_longbusy_msecs(ret);
+			msleep_interruptible(sleep_msecs);
+			continue;
+		}
+
+		if (ret < H_SUCCESS)
+			ehca_gen_err("opcode=%lx ret=%lx"
+				     " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
+				     " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
+				     " arg9=%lx"
+				     " out1=%lx out2=%lx out3=%lx out4=%lx"
+				     " out5=%lx out6=%lx out7=%lx out8=%lx"
+				     " out9=%lx",
+				     opcode, ret,
+				     arg1, arg2, arg3, arg4, arg5,
+				     arg6, arg7, arg8, arg9,
+				     outs[0], outs[1], outs[2], outs[3],
+				     outs[4], outs[5], outs[6], outs[7],
+				     outs[8]);
+
+		ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
+			     "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
+			     "out9=%lx",
+			     opcode, ret, outs[0], outs[1], outs[2], outs[3],
+			     outs[4], outs[5], outs[6], outs[7], outs[8]);
+		return ret;
+
+	}
+
+	return H_BUSY;
+}
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 *act_nr_of_entries,
+			     u32 *act_pages,
+			     u32 *eq_ist)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u64 allocate_controls;
+
+	/* resource type */
+	allocate_controls = 3ULL;
+
+	/* ISN is associated */
+	if (neq_control != 1)
+		allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
+	else /* notification event queue */
+		allocate_controls = (1ULL << 63) | allocate_controls;
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,  /* r4 */
+				allocate_controls,      /* r5 */
+				number_of_entries,      /* r6 */
+				0, 0, 0, 0, 0, 0);
+	eq_handle->handle = outs[0];
+	*act_nr_of_entries = (u32)outs[3];
+	*act_pages = (u32)outs[4];
+	*eq_ist = (u32)outs[5];
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resource - ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask)
+{
+	return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+				       adapter_handle.handle, /* r4 */
+				       eq_handle.handle,      /* r5 */
+				       event_mask,	      /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,   /* r4  */
+				2,	                 /* r5  */
+				param->eq_handle.handle, /* r6  */
+				cq->token,	         /* r7  */
+				param->nr_cqe,           /* r8  */
+				0, 0, 0, 0);
+	cq->ipz_cq_handle.handle = outs[0];
+	param->act_nr_of_entries = (u32)outs[3];
+	param->act_pages = (u32)outs[4];
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms)
+{
+	u64 ret;
+	u64 allocate_controls;
+	u64 max_r10_reg;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
+	u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
+	int daqp_ctrl = parms->daqp_ctrl;
+
+	allocate_controls =
+		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
+			       (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
+				 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
+				 parms->ud_av_l_key_ctl)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
+
+	max_r10_reg =
+		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
+			       max_nr_send_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
+				 max_nr_receive_wqes)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
+				 parms->max_send_sge)
+		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
+				 parms->max_recv_sge);
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,	           /* r4  */
+				allocate_controls,	           /* r5  */
+				qp->send_cq->ipz_cq_handle.handle,
+				qp->recv_cq->ipz_cq_handle.handle,
+				parms->ipz_eq_handle.handle,
+				((u64)qp->token << 32) | parms->pd.value,
+				max_r10_reg,	                   /* r10 */
+				parms->ud_av_l_key_ctl,            /* r11 */
+				0);
+	qp->ipz_qp_handle.handle = outs[0];
+	qp->real_qp_num = (u32)outs[1];
+	parms->act_nr_send_sges =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
+	parms->act_nr_recv_wqes =
+		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
+	parms->act_nr_send_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
+	parms->act_nr_recv_sges =
+		(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
+	parms->nr_sq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
+	parms->nr_rq_pages =
+		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+	if (ret == H_SUCCESS)
+		hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block)
+{
+	u64 ret;
+	u64 r_cb = virt_to_abs(query_port_response_block);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response block not page aligned");
+		return H_PARAMETER;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+				      adapter_handle.handle, /* r4 */
+				      port_id,	             /* r5 */
+				      r_cb,	             /* r6 */
+				      0, 0, 0, 0);
+
+	if (ehca_debug_level)
+		ehca_dmp(query_port_response_block, 64, "response_block");
+
+	return ret;
+}
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock)
+{
+	u64 r_cb = virt_to_abs(query_hca_rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("response_block=%p not page aligned",
+			     query_hca_rblock);
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_QUERY_HCA,
+				       adapter_handle.handle, /* r4 */
+				       r_cb,                  /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count)
+{
+	return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+				       adapter_handle.handle,      /* r4  */
+				       queue_type | pagesize << 8, /* r5  */
+				       resource_handle,	           /* r6  */
+				       logical_address_of_page,    /* r7  */
+				       count,	                   /* r8  */
+				       0, 0);
+}
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	if (count != 1) {
+		ehca_gen_err("Ppage counter=%lx", count);
+		return H_PARAMETER;
+	}
+	return hipz_h_register_rpage(adapter_handle,
+				     pagesize,
+				     queue_type,
+				     eq_handle.handle,
+				     logical_address_of_page, count);
+}
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+			   u32 ist)
+{
+	u64 ret;
+	ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+				      adapter_handle.handle, /* r4 */
+				      ist,                   /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret != H_SUCCESS && ret != H_BUSY)
+		ehca_gen_err("Could not query interrupt state.");
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+				     cq_handle.handle, logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa)
+{
+	if (count != 1) {
+		ehca_gen_err("Page counter=%lx", count);
+		return H_PARAMETER;
+	}
+
+	return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
+				     qp_handle.handle,logical_address_of_page,
+				     count);
+}
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe2processed,
+			       void **log_addr_next_rq_wqe2processed,
+			       int dis_and_get_function_code)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				dis_and_get_function_code, /* r5 */
+				qp_handle.handle,	   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (log_addr_next_sq_wqe2processed)
+		*log_addr_next_sq_wqe2processed = (void*)outs[0];
+	if (log_addr_next_rq_wqe2processed)
+		*log_addr_next_rq_wqe2processed = (void*)outs[1];
+
+	return ret;
+}
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+	ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				update_mask,	       /* r6 */
+				virt_to_abs(mqpcb),    /* r7 */
+				0, 0, 0, 0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Insufficient resources ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal)
+{
+	return ehca_plpar_hcall_norets(H_QUERY_QP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       virt_to_abs(qqpcb),    /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = hcp_galpas_dtor(&qp->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct qp->galpas");
+		return H_RESOURCE;
+	}
+	ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+				adapter_handle.handle,     /* r4 */
+				/* function code */
+				1,	                   /* r5 */
+				qp->ipz_qp_handle.handle,  /* r6 */
+				0, 0, 0, 0, 0, 0);
+	if (ret == H_HARDWARE)
+		ehca_gen_err("HCA not operational. ret=%lx", ret);
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      qp->ipz_qp_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource still in use. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port)
+{
+	return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       port,                  /* r6 */
+				       0, 0, 0, 0);
+}
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+				adapter_handle.handle, /* r4 */
+				qp_handle.handle,      /* r5 */
+				port,	               /* r6 */
+				0, 0, 0, 0, 0, 0);
+	*pma_qp_nr = (u32)outs[0];
+	*bma_qp_nr = (u32)outs[1];
+
+	if (ret == H_ALIAS_EXIST)
+		ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	u64 ret;
+
+	ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+				      adapter_handle.handle,  /* r4 */
+				      qp_handle.handle,       /* r5 */
+				      mcg_dlid,               /* r6 */
+				      interface_id,           /* r7 */
+				      subnet_prefix,          /* r8 */
+				      0, 0);
+
+	if (ret == H_NOT_ENOUGH_RESOURCES)
+		ehca_gen_err("Not enough resources. ret=%lx", ret);
+
+	return ret;
+}
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id)
+{
+	return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+				       adapter_handle.handle, /* r4 */
+				       qp_handle.handle,      /* r5 */
+				       mcg_dlid,              /* r6 */
+				       interface_id,          /* r7 */
+				       subnet_prefix,         /* r8 */
+				       0, 0);
+}
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&cq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct cp->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      cq->ipz_cq_handle.handle,  /* r5 */
+				      force_flag != 0 ? 1L : 0L, /* r6 */
+				      0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq)
+{
+	u64 ret;
+
+	ret = hcp_galpas_dtor(&eq->galpas);
+	if (ret) {
+		ehca_gen_err("Could not destruct eq->galpas");
+		return H_RESOURCE;
+	}
+
+	ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				      adapter_handle.handle,     /* r4 */
+				      eq->ipz_eq_handle.handle,  /* r5 */
+				      0, 0, 0, 0, 0);
+
+	if (ret == H_RESOURCE)
+		ehca_gen_err("Resource in use. ret=%lx ", ret);
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,            /* r4 */
+				5,                                /* r5 */
+				vaddr,                            /* r6 */
+				length,                           /* r7 */
+				(((u64)access_ctrl) << 32ULL),    /* r8 */
+				pd.value,                         /* r9 */
+				0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count)
+{
+	u64 ret;
+
+	if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
+		ehca_gen_err("logical_address_of_page not on a 4k boundary "
+			     "adapter_handle=%lx mr=%p mr_handle=%lx "
+			     "pagesize=%x queue_type=%x "
+			     "logical_address_of_page=%lx count=%lx",
+			     adapter_handle.handle, mr,
+			     mr->ipz_mr_handle.handle, pagesize, queue_type,
+			     logical_address_of_page, count);
+		ret = H_PARAMETER;
+	} else
+		ret = hipz_h_register_rpage(adapter_handle, pagesize,
+					    queue_type,
+					    mr->ipz_mr_handle.handle,
+					    logical_address_of_page, count);
+	return ret;
+}
+
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+				adapter_handle.handle,     /* r4 */
+				mr->ipz_mr_handle.handle,  /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->len = outs[0];
+	outparms->vaddr = outs[1];
+	outparms->acl  = outs[4] >> 32;
+	outparms->lkey = (u32)(outs[5] >> 32);
+	outparms->rkey = (u32)(outs[5] & (0xffffffff));
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mr->ipz_mr_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+				adapter_handle.handle,    /* r4 */
+				mr->ipz_mr_handle.handle, /* r5 */
+				vaddr_in,	          /* r6 */
+				length,                   /* r7 */
+				/* r8 */
+				((((u64)access_ctrl) << 32ULL) | pd.value),
+				mr_addr_cb,               /* r9 */
+				0, 0, 0);
+	outparms->vaddr = outs[1];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+				adapter_handle.handle,            /* r4 */
+				orig_mr->ipz_mr_handle.handle,    /* r5 */
+				vaddr_in,                         /* r6 */
+				(((u64)access_ctrl) << 32ULL),    /* r7 */
+				pd.value,                         /* r8 */
+				0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->lkey = (u32)outs[2];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+				adapter_handle.handle,      /* r4 */
+				6,                          /* r5 */
+				pd.value,                   /* r6 */
+				0, 0, 0, 0, 0, 0);
+	outparms->handle.handle = outs[0];
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms)
+{
+	u64 ret;
+	u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+	ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+				adapter_handle.handle,    /* r4 */
+				mw->ipz_mw_handle.handle, /* r5 */
+				0, 0, 0, 0, 0, 0, 0);
+	outparms->rkey = (u32)outs[3];
+
+	return ret;
+}
+
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw)
+{
+	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+				       adapter_handle.handle,    /* r4 */
+				       mw->ipz_mw_handle.handle, /* r5 */
+				       0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count)
+{
+	u64 r_cb = virt_to_abs(rblock);
+
+	if (r_cb & (EHCA_PAGESIZE-1)) {
+		ehca_gen_err("rblock not page aligned.");
+		return H_PARAMETER;
+	}
+
+	return ehca_plpar_hcall_norets(H_ERROR_DATA,
+				       adapter_handle.handle,
+				       ressource_handle,
+				       r_cb,
+				       0, 0, 0, 0);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
new file mode 100644
index 0000000..587ebd4
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -0,0 +1,261 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware Infiniband Interface code for POWER
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_IF_H__
+#define __HCP_IF_H__
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "hipz_hw.h"
+
+/*
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * resources, create the empty EQPT (ring).
+ */
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u32 neq_control,
+			     const u32 number_of_entries,
+			     struct ipz_eq_handle *eq_handle,
+			     u32 * act_nr_of_entries,
+			     u32 * act_pages,
+			     u32 * eq_ist);
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+		       struct ipz_eq_handle eq_handle,
+		       const u64 event_mask);
+/*
+ * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
+ * resources, create the empty CQPT (ring).
+ */
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_cq *cq,
+			     struct ehca_alloc_cq_parms *param);
+
+
+/*
+ * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+			     struct ehca_qp *qp,
+			     struct ehca_alloc_qp_parms *parms);
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+		      const u8 port_id,
+		      struct hipz_query_port *query_port_response_block);
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+		     struct hipz_query_hca *query_hca_rblock);
+
+/*
+ * hipz_h_register_rpage internal function in hcp_if.h for all
+ * hcp_H_REGISTER_RPAGE calls.
+ */
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+			  const u8 pagesize,
+			  const u8 queue_type,
+			  const u64 resource_handle,
+			  const u64 logical_address_of_page,
+			  u64 count);
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_eq_handle eq_handle,
+			     struct ehca_pfeq *pfeq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
+			   hcp_adapter_handle,
+			   u32 ist);
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_cq_handle cq_handle,
+			     struct ehca_pfcq *pfcq,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa gal);
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+			     const struct ipz_qp_handle qp_handle,
+			     struct ehca_pfqp *pfqp,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count,
+			     const struct h_galpa galpa);
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+			       const struct ipz_qp_handle qp_handle,
+			       struct ehca_pfqp *pfqp,
+			       void **log_addr_next_sq_wqe_tb_processed,
+			       void **log_addr_next_rq_wqe_tb_processed,
+			       int dis_and_get_function_code);
+enum hcall_sigt {
+	HCALL_SIGT_NO_CQE = 0,
+	HCALL_SIGT_BY_WQE = 1,
+	HCALL_SIGT_EVERY = 2
+};
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+		     const struct ipz_qp_handle qp_handle,
+		     struct ehca_pfqp *pfqp,
+		     const u64 update_mask,
+		     struct hcp_modify_qp_control_block *mqpcb,
+		     struct h_galpa gal);
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+		    const struct ipz_qp_handle qp_handle,
+		    struct ehca_pfqp *pfqp,
+		    struct hcp_modify_qp_control_block *qqpcb,
+		    struct h_galpa gal);
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_qp *qp);
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port);
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u32 port, u32 * pma_qp_nr,
+		       u32 * bma_qp_nr);
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+		       const struct ipz_qp_handle qp_handle,
+		       struct h_galpa gal,
+		       u16 mcg_dlid,
+		       u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_cq *cq,
+		      u8 force_flag);
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+		      struct ehca_eq *eq);
+
+/*
+ * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u64 vaddr,
+			     const u64 length,
+			     const u32 access_ctrl,
+			     const struct ipz_pd pd,
+			     struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mr *mr,
+			     const u8 pagesize,
+			     const u8 queue_type,
+			     const u64 logical_address_of_page,
+			     const u64 count);
+
+/* hipz_h_query_mr queries MR in HW and FW */
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mr *mr,
+		    struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mr frees MR resources in HW and FW */
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mr *mr);
+
+/* hipz_h_reregister_pmr reregisters MR in HW and FW */
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+			  const struct ehca_mr *mr,
+			  const u64 vaddr_in,
+			  const u64 length,
+			  const u32 access_ctrl,
+			  const struct ipz_pd pd,
+			  const u64 mr_addr_cb,
+			  struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_smr register shared MR in HW and FW */
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+			const struct ehca_mr *mr,
+			const struct ehca_mr *orig_mr,
+			const u64 vaddr_in,
+			const u32 access_ctrl,
+			const struct ipz_pd pd,
+			struct ehca_mr_hipzout_parms *outparms);
+
+/*
+ * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			     const struct ehca_mw *mw,
+			     const struct ipz_pd pd,
+			     struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_query_mw queries MW in HW and FW */
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+		    const struct ehca_mw *mw,
+		    struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mw frees MW resources in HW and FW */
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+			    const struct ehca_mw *mw);
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+		      const u64 ressource_handle,
+		      void *rblock,
+		      unsigned long *byte_count);
+
+#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
new file mode 100644
index 0000000..0b1a477
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -0,0 +1,80 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *   load store abstraction for ehca register access with tracing
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+int hcall_map_page(u64 physaddr, u64 *mapaddr)
+{
+	*mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
+	return 0;
+}
+
+int hcall_unmap_page(u64 mapaddr)
+{
+	iounmap((volatile void __iomem*)mapaddr);
+	return 0;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user)
+{
+	int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+	if (ret)
+		return ret;
+
+	galpas->user.fw_handle = paddr_user;
+
+	return 0;
+}
+
+int hcp_galpas_dtor(struct h_galpas *galpas)
+{
+	if (galpas->kernel.fw_handle) {
+		int ret = hcall_unmap_page(galpas->kernel.fw_handle);
+		if (ret)
+			return ret;
+	}
+
+	galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
new file mode 100644
index 0000000..5305c2a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -0,0 +1,90 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Firmware calls
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Gerd Bayer <gerd.bayer@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_PHYP_H__
+#define __HCP_PHYP_H__
+
+
+/*
+ * eHCA page (mapped into memory)
+ * resource to access eHCA register pages in CPU address space
+*/
+struct h_galpa {
+	u64 fw_handle;
+	/* for pSeries this is a 64bit memory address where
+	   I/O memory is mapped into CPU address space (kv) */
+};
+
+/*
+ * resource to access eHCA address space registers, all types
+ */
+struct h_galpas {
+	u32 pid;		/*PID of userspace galpa checking */
+	struct h_galpa user;	/* user space accessible resource,
+				   set to 0 if unused */
+	struct h_galpa kernel;	/* kernel space accessible resource,
+				   set to 0 if unused */
+};
+
+static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
+{
+	u64 addr = galpa.fw_handle + offset;
+	return *(volatile u64 __force *)addr;
+}
+
+static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
+{
+	u64 addr = galpa.fw_handle + offset;
+	*(volatile u64 __force *)addr = value;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+		    u64 paddr_kernel, u64 paddr_user);
+
+int hcp_galpas_dtor(struct h_galpas *galpas);
+
+int hcall_map_page(u64 physaddr, u64 * mapaddr);
+
+int hcall_unmap_page(u64 mapaddr);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h
new file mode 100644
index 0000000..9dac93d
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns.h
@@ -0,0 +1,68 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_H__
+#define __HIPZ_FNS_H__
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+#include "hipz_fns_core.h"
+
+#define hipz_galpa_store_eq(gal, offset, value) \
+	hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_eq(gal, offset) \
+	hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qped(gal, offset, value) \
+	hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_qped(gal, offset) \
+	hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
+
+#define hipz_galpa_store_mrmw(gal, offset, value) \
+	hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_mrmw(gal, offset) \
+	hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
new file mode 100644
index 0000000..20898a1
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -0,0 +1,100 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  HW abstraction register functions
+ *
+ *  Authors: Christoph Raisch <raisch@de.ibm.com>
+ *           Heiko J Schick <schickhj@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_CORE_H__
+#define __HIPZ_FNS_CORE_H__
+
+#include "hcp_phyp.h"
+#include "hipz_hw.h"
+
+#define hipz_galpa_store_cq(gal, offset, value) \
+	hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_cq(gal, offset) \
+	hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qp(gal,offset, value) \
+	hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
+#define hipz_galpa_load_qp(gal, offset) \
+	hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+
+static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
+			    EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
+}
+
+static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+	/*  ringing doorbell :-) */
+	hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
+			    EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
+}
+
+static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
+{
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
+			    EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
+}
+
+static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n0_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
+			    EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
+					   value));
+	cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
+}
+
+static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
+{
+	u64 cqx_n1_reg;
+
+	hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
+			    EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
+	cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
+}
+
+#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
new file mode 100644
index 0000000..3fc92b0
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -0,0 +1,388 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  eHCA register definitions
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_HW_H__
+#define __HIPZ_HW_H__
+
+#include "ehca_tools.h"
+
+/* QP Table Entry Memory Map */
+struct hipz_qptemm {
+	u64 qpx_hcr;
+	u64 qpx_c;
+	u64 qpx_herr;
+	u64 qpx_aer;
+/* 0x20*/
+	u64 qpx_sqa;
+	u64 qpx_sqc;
+	u64 qpx_rqa;
+	u64 qpx_rqc;
+/* 0x40*/
+	u64 qpx_st;
+	u64 qpx_pmstate;
+	u64 qpx_pmfa;
+	u64 qpx_pkey;
+/* 0x60*/
+	u64 qpx_pkeya;
+	u64 qpx_pkeyb;
+	u64 qpx_pkeyc;
+	u64 qpx_pkeyd;
+/* 0x80*/
+	u64 qpx_qkey;
+	u64 qpx_dqp;
+	u64 qpx_dlidp;
+	u64 qpx_portp;
+/* 0xa0*/
+	u64 qpx_slidp;
+	u64 qpx_slidpp;
+	u64 qpx_dlida;
+	u64 qpx_porta;
+/* 0xc0*/
+	u64 qpx_slida;
+	u64 qpx_slidpa;
+	u64 qpx_slvl;
+	u64 qpx_ipd;
+/* 0xe0*/
+	u64 qpx_mtu;
+	u64 qpx_lato;
+	u64 qpx_rlimit;
+	u64 qpx_rnrlimit;
+/* 0x100*/
+	u64 qpx_t;
+	u64 qpx_sqhp;
+	u64 qpx_sqptp;
+	u64 qpx_nspsn;
+/* 0x120*/
+	u64 qpx_nspsnhwm;
+	u64 reserved1;
+	u64 qpx_sdsi;
+	u64 qpx_sdsbc;
+/* 0x140*/
+	u64 qpx_sqwsize;
+	u64 qpx_sqwts;
+	u64 qpx_lsn;
+	u64 qpx_nssn;
+/* 0x160 */
+	u64 qpx_mor;
+	u64 qpx_cor;
+	u64 qpx_sqsize;
+	u64 qpx_erc;
+/* 0x180*/
+	u64 qpx_rnrrc;
+	u64 qpx_ernrwt;
+	u64 qpx_rnrresp;
+	u64 qpx_lmsna;
+/* 0x1a0 */
+	u64 qpx_sqhpc;
+	u64 qpx_sqcptp;
+	u64 qpx_sigt;
+	u64 qpx_wqecnt;
+/* 0x1c0*/
+	u64 qpx_rqhp;
+	u64 qpx_rqptp;
+	u64 qpx_rqsize;
+	u64 qpx_nrr;
+/* 0x1e0*/
+	u64 qpx_rdmac;
+	u64 qpx_nrpsn;
+	u64 qpx_lapsn;
+	u64 qpx_lcr;
+/* 0x200*/
+	u64 qpx_rwc;
+	u64 qpx_rwva;
+	u64 qpx_rdsi;
+	u64 qpx_rdsbc;
+/* 0x220*/
+	u64 qpx_rqwsize;
+	u64 qpx_crmsn;
+	u64 qpx_rdd;
+	u64 qpx_larpsn;
+/* 0x240*/
+	u64 qpx_pd;
+	u64 qpx_scqn;
+	u64 qpx_rcqn;
+	u64 qpx_aeqn;
+/* 0x260*/
+	u64 qpx_aaelog;
+	u64 qpx_ram;
+	u64 qpx_rdmaqe0;
+	u64 qpx_rdmaqe1;
+/* 0x280*/
+	u64 qpx_rdmaqe2;
+	u64 qpx_rdmaqe3;
+	u64 qpx_nrpsnhwm;
+/* 0x298*/
+	u64 reserved[(0x400 - 0x298) / 8];
+/* 0x400 extended data */
+	u64 reserved_ext[(0x500 - 0x400) / 8];
+/* 0x500 */
+	u64 reserved2[(0x1000 - 0x500) / 8];
+/* 0x1000      */
+};
+
+#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+
+/* MRMWPT Entry Memory Map */
+struct hipz_mrmwmm {
+	/* 0x00 */
+	u64 mrx_hcr;
+
+	u64 mrx_c;
+	u64 mrx_herr;
+	u64 mrx_aer;
+	/* 0x20 */
+	u64 mrx_pp;
+	u64 reserved1;
+	u64 reserved2;
+	u64 reserved3;
+	/* 0x40 */
+	u64 reserved4[(0x200 - 0x40) / 8];
+	/* 0x200 */
+	u64 mrx_ctl[64];
+
+};
+
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+
+struct hipz_qpedmm {
+	/* 0x00 */
+	u64 reserved0[(0x400) / 8];
+	/* 0x400 */
+	u64 qpedx_phh;
+	u64 qpedx_ppsgp;
+	/* 0x410 */
+	u64 qpedx_ppsgu;
+	u64 qpedx_ppdgp;
+	/* 0x420 */
+	u64 qpedx_ppdgu;
+	u64 qpedx_aph;
+	/* 0x430 */
+	u64 qpedx_apsgp;
+	u64 qpedx_apsgu;
+	/* 0x440 */
+	u64 qpedx_apdgp;
+	u64 qpedx_apdgu;
+	/* 0x450 */
+	u64 qpedx_apav;
+	u64 qpedx_apsav;
+	/* 0x460  */
+	u64 qpedx_hcr;
+	u64 reserved1[4];
+	/* 0x488 */
+	u64 qpedx_rrl0;
+	/* 0x490 */
+	u64 qpedx_rrrkey0;
+	u64 qpedx_rrva0;
+	/* 0x4a0 */
+	u64 reserved2;
+	u64 qpedx_rrl1;
+	/* 0x4b0 */
+	u64 qpedx_rrrkey1;
+	u64 qpedx_rrva1;
+	/* 0x4c0 */
+	u64 reserved3;
+	u64 qpedx_rrl2;
+	/* 0x4d0 */
+	u64 qpedx_rrrkey2;
+	u64 qpedx_rrva2;
+	/* 0x4e0 */
+	u64 reserved4;
+	u64 qpedx_rrl3;
+	/* 0x4f0 */
+	u64 qpedx_rrrkey3;
+	u64 qpedx_rrva3;
+};
+
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+
+/* CQ Table Entry Memory Map */
+struct hipz_cqtemm {
+	u64 cqx_hcr;
+	u64 cqx_c;
+	u64 cqx_herr;
+	u64 cqx_aer;
+/* 0x20  */
+	u64 cqx_ptp;
+	u64 cqx_tp;
+	u64 cqx_fec;
+	u64 cqx_feca;
+/* 0x40  */
+	u64 cqx_ep;
+	u64 cqx_eq;
+/* 0x50  */
+	u64 reserved1;
+	u64 cqx_n0;
+/* 0x60  */
+	u64 cqx_n1;
+	u64 reserved2[(0x1000 - 0x60) / 8];
+/* 0x1000 */
+};
+
+#define CQX_FEC_CQE_CNT           EHCA_BMASK_IBM(32,63)
+#define CQX_FECADDER              EHCA_BMASK_IBM(32,63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+
+/* EQ Table Entry Memory Map */
+struct hipz_eqtemm {
+	u64 eqx_hcr;
+	u64 eqx_c;
+
+	u64 eqx_herr;
+	u64 eqx_aer;
+/* 0x20 */
+	u64 eqx_ptp;
+	u64 eqx_tp;
+	u64 eqx_ssba;
+	u64 eqx_psba;
+
+/* 0x40 */
+	u64 eqx_cec;
+	u64 eqx_meql;
+	u64 eqx_xisbi;
+	u64 eqx_xisc;
+/* 0x60 */
+	u64 eqx_it;
+
+};
+
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+
+/* access control defines for MR/MW */
+#define HIPZ_ACCESSCTRL_L_WRITE  0x00800000
+#define HIPZ_ACCESSCTRL_R_WRITE  0x00400000
+#define HIPZ_ACCESSCTRL_R_READ   0x00200000
+#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
+#define HIPZ_ACCESSCTRL_MW_BIND  0x00080000
+
+/* query hca response block */
+struct hipz_query_hca {
+	u32 cur_reliable_dg;
+	u32 cur_qp;
+	u32 cur_cq;
+	u32 cur_eq;
+	u32 cur_mr;
+	u32 cur_mw;
+	u32 cur_ee_context;
+	u32 cur_mcast_grp;
+	u32 cur_qp_attached_mcast_grp;
+	u32 reserved1;
+	u32 cur_ipv6_qp;
+	u32 cur_eth_qp;
+	u32 cur_hp_mr;
+	u32 reserved2[3];
+	u32 max_rd_domain;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_eq;
+	u32 max_mr;
+	u32 max_hp_mr;
+	u32 max_mw;
+	u32 max_mrwpte;
+	u32 max_special_mrwpte;
+	u32 max_rd_ee_context;
+	u32 max_mcast_grp;
+	u32 max_total_mcast_qp_attach;
+	u32 max_mcast_qp_attach;
+	u32 max_raw_ipv6_qp;
+	u32 max_raw_ethy_qp;
+	u32 internal_clock_frequency;
+	u32 max_pd;
+	u32 max_ah;
+	u32 max_cqe;
+	u32 max_wqes_wq;
+	u32 max_partitions;
+	u32 max_rr_ee_context;
+	u32 max_rr_qp;
+	u32 max_rr_hca;
+	u32 max_act_wqs_ee_context;
+	u32 max_act_wqs_qp;
+	u32 max_sge;
+	u32 max_sge_rd;
+	u32 memory_page_size_supported;
+	u64 max_mr_size;
+	u32 local_ca_ack_delay;
+	u32 num_ports;
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	u64 node_guid;
+	u64 hca_cap_indicators;
+	u32 data_counter_register_size;
+	u32 max_shared_rq;
+	u32 max_isns_eq;
+	u32 max_neq;
+} __attribute__ ((packed));
+
+/* query port response block */
+struct hipz_query_port {
+	u32 state;
+	u32 bad_pkey_cntr;
+	u32 lmc;
+	u32 lid;
+	u32 subnet_timeout;
+	u32 qkey_viol_cntr;
+	u32 sm_sl;
+	u32 sm_lid;
+	u32 capability_mask;
+	u32 init_type_reply;
+	u32 pkey_tbl_len;
+	u32 gid_tbl_len;
+	u64 gid_prefix;
+	u32 port_nr;
+	u16 pkey_entries[16];
+	u8  reserved1[32];
+	u32 trent_size;
+	u32 trbuf_size;
+	u64 max_msg_sz;
+	u32 max_mtu;
+	u32 vl_cap;
+	u8  reserved2[1900];
+	u64 guid_entries[255];
+} __attribute__ ((packed));
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
new file mode 100644
index 0000000..e028ff1
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -0,0 +1,149 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ipz_pt_fn.h"
+
+void *ipz_qpageit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->pagesize;
+	if (queue->current_q_offset > queue->queue_length) {
+		queue->current_q_offset -= queue->pagesize;
+		ret = NULL;
+	}
+	if (((u64)ret) % EHCA_PAGESIZE) {
+		ehca_gen_err("ERROR!! not at PAGE-Boundary");
+		return NULL;
+	}
+	return ret;
+}
+
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u64 last_entry_in_q = queue->queue_length - queue->qe_size;
+
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset > last_entry_in_q) {
+		queue->current_q_offset = 0;
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+int ipz_queue_ctor(struct ipz_queue *queue,
+		   const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int f;
+
+	if (pagesize > PAGE_SIZE) {
+		ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
+			     "than kernel page size", pagesize);
+		return 0;
+	}
+	if (!pages_per_kpage) {
+		ehca_gen_err("FATAL ERROR: invalid kernel page size. "
+			     "pages_per_kpage=%x", pages_per_kpage);
+		return 0;
+	}
+	queue->queue_length = nr_of_pages * pagesize;
+	queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+	if (!queue->queue_pages) {
+		ehca_gen_err("ERROR!! didn't get the memory");
+		return 0;
+	}
+	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+	/*
+	 * allocate pages for queue:
+	 * outer loop allocates whole kernel pages (page aligned) and
+	 * inner loop divides a kernel page into smaller hca queue pages
+	 */
+	f = 0;
+	while (f < nr_of_pages) {
+		u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
+		int k;
+		if (!kpage)
+			goto ipz_queue_ctor_exit0; /*NOMEM*/
+		for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
+			(queue->queue_pages)[f] = (struct ipz_page *)kpage;
+			kpage += EHCA_PAGESIZE;
+			f++;
+		}
+	}
+
+	queue->current_q_offset = 0;
+	queue->qe_size = qe_size;
+	queue->act_nr_of_sg = nr_of_sg;
+	queue->pagesize = pagesize;
+	queue->toggle_state = 1;
+	return 1;
+
+ ipz_queue_ctor_exit0:
+	ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
+		     queue, f, nr_of_pages);
+	for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
+		if (!(queue->queue_pages)[f])
+			break;
+		free_page((unsigned long)(queue->queue_pages)[f]);
+	}
+	return 0;
+}
+
+int ipz_queue_dtor(struct ipz_queue *queue)
+{
+	int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
+	int g;
+	int nr_pages;
+
+	if (!queue || !queue->queue_pages) {
+		ehca_gen_dbg("queue or queue_pages is NULL");
+		return 0;
+	}
+	nr_pages = queue->queue_length / queue->pagesize;
+	for (g = 0; g < nr_pages; g += pages_per_kpage)
+		free_page((unsigned long)(queue->queue_pages)[g]);
+	vfree(queue->queue_pages);
+
+	return 1;
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
new file mode 100644
index 0000000..2f13509
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -0,0 +1,247 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  internal queue handling
+ *
+ *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *           Reinhard Ernst <rernst@de.ibm.com>
+ *           Christoph Raisch <raisch@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPZ_PT_FN_H__
+#define __IPZ_PT_FN_H__
+
+#define EHCA_PAGESHIFT   12
+#define EHCA_PAGESIZE   4096UL
+#define EHCA_PAGEMASK   (~(EHCA_PAGESIZE-1))
+#define EHCA_PT_ENTRIES 512UL
+
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+
+/* struct generic ehca page */
+struct ipz_page {
+	u8 entries[EHCA_PAGESIZE];
+};
+
+/* struct generic queue in linux kernel virtual memory (kv) */
+struct ipz_queue {
+	u64 current_q_offset;	/* current queue entry */
+
+	struct ipz_page **queue_pages;	/* array of pages belonging to queue */
+	u32 qe_size;		/* queue entry size */
+	u32 act_nr_of_sg;
+	u32 queue_length;	/* queue length allocated in bytes */
+	u32 pagesize;
+	u32 toggle_state;	/* toggle flag - per page */
+	u32 dummy3;		/* 64 bit alignment */
+};
+
+/*
+ * return current Queue Entry for a certain q_offset
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
+{
+	struct ipz_page *current_page;
+	if (q_offset >= queue->queue_length)
+		return NULL;
+	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
+	return  &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+}
+
+/*
+ * return current Queue Entry
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_get(struct ipz_queue *queue)
+{
+	return ipz_qeit_calc(queue, queue->current_q_offset);
+}
+
+/*
+ * return current Queue Page , increment Queue Page iterator from
+ * page to page in struct ipz_queue, last increment will return 0! and
+ * NOT wrap
+ * returns address (kv) of Queue Page
+ * warning don't use in parallel with ipz_QE_get_inc()
+ */
+void *ipz_qpageit_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	queue->current_q_offset += queue->qe_size;
+	if (queue->current_q_offset >= queue->queue_length) {
+		queue->current_q_offset = 0;
+		/* toggle the valid flag */
+		queue->toggle_state = (~queue->toggle_state) & 1;
+	}
+
+	return ret;
+}
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
+{
+	struct ehca_cqe *cqe = ipz_qeit_get(queue);
+	u32 cqe_flags = cqe->cqe_flags;
+
+	if ((cqe_flags >> 7) != (queue->toggle_state & 1))
+		return NULL;
+
+	ipz_qeit_get_inc(queue);
+	return cqe;
+}
+
+/*
+ * returns and resets Queue Entry iterator
+ * returns address (kv) of first Queue Entry
+ */
+static inline void *ipz_qeit_reset(struct ipz_queue *queue)
+{
+	queue->current_q_offset = 0;
+	return ipz_qeit_get(queue);
+}
+
+/* struct generic page table */
+struct ipz_pt {
+	u64 entries[EHCA_PT_ENTRIES];
+};
+
+/* struct page table for a queue, only to be used in pf */
+struct ipz_qpt {
+	/* queue page tables (kv), use u64 because we know the element length */
+	u64 *qpts;
+	u32 n_qpts;
+	u32 n_ptes;       /*  number of page table entries */
+	u64 *current_pte_addr;
+};
+
+/*
+ * constructor for a ipz_queue_t, placement new for ipz_queue_t,
+ * new for all dependent datastructors
+ * all QP Tables are the same
+ * flow:
+ *    allocate+pin queue
+ * see ipz_qpt_ctor()
+ * returns true if ok, false if out of memory
+ */
+int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
+		   const u32 pagesize, const u32 qe_size,
+		   const u32 nr_of_sg);
+
+/*
+ * destructor for a ipz_queue_t
+ *  -# free queue
+ *  see ipz_queue_ctor()
+ *  returns true if ok, false if queue was NULL-ptr of free failed
+ */
+int ipz_queue_dtor(struct ipz_queue *queue);
+
+/*
+ * constructor for a ipz_qpt_t,
+ * placement new for struct ipz_queue, new for all dependent datastructors
+ * all QP Tables are the same,
+ * flow:
+ * -# allocate+pin queue
+ * -# initialise ptcb
+ * -# allocate+pin PTs
+ * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
+ * -# the ring must have room for exactly nr_of_PTEs
+ * see ipz_qpt_ctor()
+ */
+void ipz_qpt_ctor(struct ipz_qpt *qpt,
+		  const u32 nr_of_qes,
+		  const u32 pagesize,
+		  const u32 qe_size,
+		  const u8 lowbyte, const u8 toggle,
+		  u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ * fix EQ page problems
+ */
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Event Queue Entry, increment Queue Entry iterator
+ * by one step in struct ipz_queue if valid, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_queue_QPageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u32 qe = *(u8 *) ret;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
+	ipz_qeit_eq_get_inc(queue); /* this is a good one */
+	return ret;
+}
+
+/* returns address (GX) of first queue entry */
+static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
+{
+	return be64_to_cpu(qpt->qpts[0]);
+}
+
+/* returns address (kv) of first page of queue page table */
+static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
+{
+	return qpt->qpts;
+}
+
+#endif				/* __IPZ_PT_FN_H__ */
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 1db9489..574a678 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,16 +1,9 @@
-config IPATH_CORE
-	tristate "QLogic InfiniPath Driver"
-	depends on 64BIT && PCI_MSI && NET
-	---help---
-	This is a low-level driver for QLogic InfiniPath host channel
-	adapters (HCAs) based on the HT-400 and PE-800 chips.
-
 config INFINIBAND_IPATH
-	tristate "QLogic InfiniPath Verbs Driver"
-	depends on IPATH_CORE && INFINIBAND
+	tristate "QLogic InfiniPath Driver"
+	depends on PCI_MSI && 64BIT && INFINIBAND
 	---help---
-	This is a driver that provides InfiniBand verbs support for
-	QLogic InfiniPath host channel adapters (HCAs).  This
-	allows these devices to be used with both kernel upper level
-	protocols such as IP-over-InfiniBand as well as with userspace
-	applications (in conjunction with InfiniBand userspace access).
+	This is a driver for QLogic InfiniPath host channel adapters,
+	including InfiniBand verbs support.  This driver allows these
+	devices to be used with both kernel upper level protocols such
+	as IP-over-InfiniBand as well as with userspace applications
+	(in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index b0bf728..5e29cb0 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,36 +1,35 @@
 EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
 	-DIPATH_KERN_TYPE=0
 
-obj-$(CONFIG_IPATH_CORE) += ipath_core.o
 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
 
-ipath_core-y := \
+ib_ipath-y := \
+	ipath_cq.o \
 	ipath_diag.o \
 	ipath_driver.o \
 	ipath_eeprom.o \
 	ipath_file_ops.o \
 	ipath_fs.o \
-	ipath_ht400.o \
+	ipath_iba6110.o \
+	ipath_iba6120.o \
 	ipath_init_chip.o \
 	ipath_intr.o \
-	ipath_layer.o \
-	ipath_pe800.o \
-	ipath_stats.o \
-	ipath_sysfs.o \
-	ipath_user_pages.o
-
-ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
-
-ib_ipath-y := \
-	ipath_cq.o \
 	ipath_keys.o \
+	ipath_layer.o \
 	ipath_mad.o \
+	ipath_mmap.o \
 	ipath_mr.o \
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
 	ipath_srq.o \
+	ipath_stats.o \
+	ipath_sysfs.o \
 	ipath_uc.o \
 	ipath_ud.o \
-	ipath_verbs.o \
-	ipath_verbs_mcast.o
+	ipath_user_pages.o \
+	ipath_verbs_mcast.o \
+	ipath_verbs.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 062bd39..f577905 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -106,9 +106,9 @@
 	__u64 sps_ether_spkts;
 	/* number of "ethernet" packets received by driver */
 	__u64 sps_ether_rpkts;
-	/* number of SMA packets sent by driver */
+	/* number of SMA packets sent by driver. Obsolete. */
 	__u64 sps_sma_spkts;
-	/* number of SMA packets received by driver */
+	/* number of SMA packets received by driver. Obsolete. */
 	__u64 sps_sma_rpkts;
 	/* number of times all ports rcvhdrq was full and packet dropped */
 	__u64 sps_hdrqfull;
@@ -138,7 +138,7 @@
 	__u64 sps_pageunlocks;
 	/*
 	 * Number of packets dropped in kernel other than errors (ether
-	 * packets if ipath not configured, sma/mad, etc.)
+	 * packets if ipath not configured, etc.)
 	 */
 	__u64 sps_krdrops;
 	/* pad for future growth */
@@ -153,8 +153,6 @@
 #define IPATH_STATUS_DISABLED      0x2	/* hardware disabled */
 /* Device has been disabled via admin request */
 #define IPATH_STATUS_ADMIN_DISABLED    0x4
-#define IPATH_STATUS_OIB_SMA       0x8	/* ipath_mad kernel SMA running */
-#define IPATH_STATUS_SMA          0x10	/* user SMA running */
 /* Chip has been found and initted */
 #define IPATH_STATUS_CHIP_PRESENT 0x20
 /* IB link is at ACTIVE, usable for data traffic */
@@ -465,12 +463,11 @@
 	struct ipath_iovec sps_iov[4];
 };
 
-/* Passed into SMA special file's ->read and ->write methods. */
-struct ipath_sma_pkt
-{
-	__u32 unit;	/* unit on which to send packet */
-	__u64 data;	/* address of payload in userspace */
-	__u32 len;	/* length of payload */
+/* Passed into diag data special file's ->write method. */
+struct ipath_diag_pkt {
+	__u32 unit;
+	__u64 data;
+	__u32 len;
 };
 
 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3efee34..049221b 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,28 @@
  * @entry: work completion entry to add
  * @sig: true if @entry is a solicitated entry
  *
- * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
+ * This may be called with qp->s_lock held.
  */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
+	u32 head;
 	u32 next;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
-	if (cq->head == cq->ibcq.cqe)
+	/*
+	 * Note that the head pointer might be writable by user processes.
+	 * Take care to verify it is a sane value.
+	 */
+	head = wc->head;
+	if (head >= (unsigned) cq->ibcq.cqe) {
+		head = cq->ibcq.cqe;
 		next = 0;
-	else
-		next = cq->head + 1;
-	if (unlikely(next == cq->tail)) {
+	} else
+		next = head + 1;
+	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
 			struct ib_event ev;
@@ -67,8 +75,8 @@
 		}
 		return;
 	}
-	cq->queue[cq->head] = *entry;
-	cq->head = next;
+	wc->queue[head] = *entry;
+	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
+	struct ipath_cq_wc *wc = cq->queue;
 	unsigned long flags;
 	int npolled;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (cq->tail == cq->head)
+		if (wc->tail == wc->head)
 			break;
-		*entry = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+		*entry = wc->queue[wc->tail];
+		if (wc->tail >= cq->ibcq.cqe)
+			wc->tail = 0;
 		else
-			cq->tail++;
+			wc->tail++;
 	}
 
 	spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	struct ipath_cq *cq;
-	struct ib_wc *wc;
+	struct ipath_cq_wc *wc;
 	struct ib_cq *ret;
 
-	if (entries > ib_ipath_max_cqes) {
+	if (entries < 1 || entries > ib_ipath_max_cqes) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
-	/*
-	 * Need to use vmalloc() if we want to support large #s of
-	 * entries.
-	 */
+	/* Allocate the completion queue structure. */
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
+	 * Allocate the completion queue entries and head/tail pointers.
+	 * This is allocated separately so that it can be resized and
+	 * also mapped into user space.
+	 * We need to use vmalloc() in order to support mmap and large
+	 * numbers of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (entries + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
 	if (!wc) {
-		kfree(cq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_cq;
 	}
+
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) wc;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wc;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wc;
+		}
+		cq->ip = ip;
+		ip->context = context;
+		ip->obj = wc;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * entries);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		cq->ip = NULL;
+
 	/*
 	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 	 * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@
 	cq->triggered = 0;
 	spin_lock_init(&cq->lock);
 	tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
-	cq->head = 0;
-	cq->tail = 0;
+	wc->head = 0;
+	wc->tail = 0;
 	cq->queue = wc;
 
 	ret = &cq->ibcq;
 
 	dev->n_cqs_allocated++;
+	goto done;
 
-bail:
+bail_wc:
+	vfree(wc);
+
+bail_cq:
+	kfree(cq);
+
+done:
 	return ret;
 }
 
@@ -229,7 +281,10 @@
 
 	tasklet_kill(&cq->comptask);
 	dev->n_cqs_allocated--;
-	vfree(cq->queue);
+	if (cq->ip)
+		kref_put(&cq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(cq->queue);
 	kfree(cq);
 
 	return 0;
@@ -253,7 +308,7 @@
 	spin_lock_irqsave(&cq->lock, flags);
 	/*
 	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions.
+	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 	 */
 	if (cq->notify != IB_CQ_NEXT_COMP)
 		cq->notify = notify;
@@ -264,46 +319,86 @@
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ib_wc *wc, *old_wc;
-	u32 n;
+	struct ipath_cq_wc *old_wc = cq->queue;
+	struct ipath_cq_wc *wc;
+	u32 head, tail, n;
 	int ret;
 
+	if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
-	wc = vmalloc(sizeof(*wc) * (cqe + 1));
+	wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
 	if (!wc) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 
+	/*
+	 * Return the address of the WC as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		__u64 offset = (__u64) wc;
+
+		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (ret)
+			goto bail;
+	}
+
 	spin_lock_irq(&cq->lock);
-	if (cq->head < cq->tail)
-		n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
+	/*
+	 * Make sure head and tail are sane since they
+	 * might be user writable.
+	 */
+	head = old_wc->head;
+	if (head > (u32) cq->ibcq.cqe)
+		head = (u32) cq->ibcq.cqe;
+	tail = old_wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
+	if (head < tail)
+		n = cq->ibcq.cqe + 1 + head - tail;
 	else
-		n = cq->head - cq->tail;
+		n = head - tail;
 	if (unlikely((u32)cqe < n)) {
 		spin_unlock_irq(&cq->lock);
 		vfree(wc);
 		ret = -EOVERFLOW;
 		goto bail;
 	}
-	for (n = 0; cq->tail != cq->head; n++) {
-		wc[n] = cq->queue[cq->tail];
-		if (cq->tail == cq->ibcq.cqe)
-			cq->tail = 0;
+	for (n = 0; tail != head; n++) {
+		wc->queue[n] = old_wc->queue[tail];
+		if (tail == (u32) cq->ibcq.cqe)
+			tail = 0;
 		else
-			cq->tail++;
+			tail++;
 	}
 	cq->ibcq.cqe = cqe;
-	cq->head = n;
-	cq->tail = 0;
-	old_wc = cq->queue;
+	wc->head = n;
+	wc->tail = 0;
 	cq->queue = wc;
 	spin_unlock_irq(&cq->lock);
 
 	vfree(old_wc);
 
+	if (cq->ip) {
+		struct ipath_ibdev *dev = to_idev(ibcq->device);
+		struct ipath_mmap_info *ip = cq->ip;
+
+		ip->obj = wc;
+		ip->size = PAGE_ALIGN(sizeof(*wc) +
+				      sizeof(struct ib_wc) * cqe);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	}
+
 	ret = 0;
 
 bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index f415bed..df69f0d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -60,7 +60,6 @@
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
-#define __IPATH_SMADBG      0x8000	/* sma packet debug */
 #define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
 #define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
 #define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
@@ -84,7 +83,6 @@
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG     0x0
 #define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_SMADBG    0x0   /* process startup (init)/exit messages */
 #define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
 #define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
 #define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 147dd89..29958b6 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -41,11 +41,12 @@
  * through the /sys/bus/pci resource mmap interface.
  */
 
+#include <linux/io.h>
 #include <linux/pci.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 int ipath_diag_inuse;
@@ -274,6 +275,158 @@
 	return ret;
 }
 
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off);
+
+static struct file_operations diagpkt_file_ops = {
+	.owner = THIS_MODULE,
+	.write = ipath_diagpkt_write,
+};
+
+static struct cdev *diagpkt_cdev;
+static struct class_device *diagpkt_class_dev;
+
+int __init ipath_diagpkt_add(void)
+{
+	return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+			       "ipath_diagpkt", &diagpkt_file_ops,
+			       &diagpkt_cdev, &diagpkt_class_dev);
+}
+
+void __exit ipath_diagpkt_remove(void)
+{
+	ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+				   const char __user *data,
+				   size_t count, loff_t *off)
+{
+	u32 __iomem *piobuf;
+	u32 plen, clen, pbufn;
+	struct ipath_diag_pkt dp;
+	u32 *tmpbuf = NULL;
+	struct ipath_devdata *dd;
+	ssize_t ret = 0;
+	u64 val;
+
+	if (count < sizeof(dp)) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	if (copy_from_user(&dp, data, sizeof(dp))) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	/* send count must be an exact number of dwords */
+	if (dp.len & 3) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	clen = dp.len >> 2;
+
+	dd = ipath_lookup(dp.unit);
+	if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+	    !dd->ipath_kregbase) {
+		ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+			   dp.unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	if (ipath_diag_inuse && !diag_set_link &&
+	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+		diag_set_link = 1;
+		ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+			   "diag pkt\n");
+		ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+	}
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+		ret = -ENODEV;
+		goto bail;
+	}
+	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
+	if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
+	    val != IPATH_IBSTATE_ACTIVE) {
+		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+			   dd->ipath_unit, (unsigned long long) val);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* need total length before first word written */
+	/* +1 word is for the qword padding */
+	plen = sizeof(u32) + dp.len;
+
+	if ((plen + 4) > dd->ipath_ibmaxlen) {
+		ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+			  plen - 4, dd->ipath_ibmaxlen);
+		ret = -EINVAL;
+		goto bail;	/* before writing pbc */
+	}
+	tmpbuf = vmalloc(plen);
+	if (!tmpbuf) {
+		dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+			 "failing\n");
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	if (copy_from_user(tmpbuf,
+			   (const void __user *) (unsigned long) dp.data,
+			   dp.len)) {
+		ret = -EFAULT;
+		goto bail;
+	}
+
+	piobuf = ipath_getpiobuf(dd, &pbufn);
+	if (!piobuf) {
+		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+			   dd->ipath_unit);
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	plen >>= 2;		/* in dwords */
+
+	if (ipath_debug & __IPATH_PKTDBG)
+		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+			   dd->ipath_unit, plen - 1, pbufn);
+
+	/* we have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	/* copy all by the trigger word, then flush, so it's written
+	 * to chip before trigger word, then write trigger word, then
+	 * flush again, so packet is sent. */
+	__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+	ipath_flush_wc();
+	__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+	ipath_flush_wc();
+
+	ret = sizeof(dp);
+
+bail:
+	vfree(tmpbuf);
+	return ret;
+}
+
 static int ipath_diag_release(struct inode *in, struct file *fp)
 {
 	mutex_lock(&ipath_mutex);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f98518d..2108466 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,7 +39,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -51,8 +51,6 @@
 	return iname;
 }
 
-EXPORT_SYMBOL_GPL(ipath_get_unit_name);
-
 #define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
 #define PFX IPATH_DRV_NAME ": "
 
@@ -60,13 +58,13 @@
  * The size has to be longer than this string, so we can append
  * board/chip information to it in the init code.
  */
-const char ipath_core_version[] = IPATH_IDSTR "\n";
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
 
 static struct idr unit_table;
 DEFINE_SPINLOCK(ipath_devs_lock);
 LIST_HEAD(ipath_dev_list);
 
-wait_queue_head_t ipath_sma_state_wait;
+wait_queue_head_t ipath_state_wait;
 
 unsigned ipath_debug = __IPATH_INFO;
 
@@ -403,10 +401,10 @@
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
 	case PCI_DEVICE_ID_INFINIPATH_HT:
-		ipath_init_ht400_funcs(dd);
+		ipath_init_iba6110_funcs(dd);
 		break;
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
-		ipath_init_pe800_funcs(dd);
+		ipath_init_iba6120_funcs(dd);
 		break;
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -440,7 +438,13 @@
 	}
 	dd->ipath_pcirev = rev;
 
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	dd->ipath_kregbase = __ioremap(addr, len,
+		(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
 	dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
 
 	if (!dd->ipath_kregbase) {
 		ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@@ -503,7 +507,7 @@
 	ipathfs_add_device(dd);
 	ipath_user_add(dd);
 	ipath_diag_add(dd);
-	ipath_layer_add(dd);
+	ipath_register_ib_device(dd);
 
 	goto bail;
 
@@ -532,7 +536,7 @@
 		return;
 
 	dd = pci_get_drvdata(pdev);
-	ipath_layer_remove(dd);
+	ipath_unregister_ib_device(dd->verbs_dev);
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
 	ipathfs_remove_device(dd);
@@ -607,21 +611,23 @@
  *
  * wait up to msecs milliseconds for IB link state change to occur for
  * now, take the easy polling route.  Currently used only by
- * ipath_layer_set_linkstate.  Returns 0 if state reached, otherwise
+ * ipath_set_linkstate.  Returns 0 if state reached, otherwise
  * -ETIMEDOUT state can have multiple states set, for any of several
  * transitions.
  */
-int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
+				int msecs)
 {
-	dd->ipath_sma_state_wanted = state;
-	wait_event_interruptible_timeout(ipath_sma_state_wait,
+	dd->ipath_state_wanted = state;
+	wait_event_interruptible_timeout(ipath_state_wait,
 					 (dd->ipath_flags & state),
 					 msecs_to_jiffies(msecs));
-	dd->ipath_sma_state_wanted = 0;
+	dd->ipath_state_wanted = 0;
 
 	if (!(dd->ipath_flags & state)) {
 		u64 val;
-		ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+		ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+			   " ms\n",
 			   /* test INIT ahead of DOWN, both can be set */
 			   (state & IPATH_LINKINIT) ? "INIT" :
 			   ((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -807,58 +813,6 @@
 	return skb;
 }
 
-/**
- * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
- * @dd: the infinipath device
- * @etail: the sk_buff number
- * @tlen: the total packet length
- * @hdr: the ethernet header
- *
- * Separate routine for better overall optimization
- */
-static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
-			    u32 tlen, struct ether_header *hdr)
-{
-	u32 elen;
-	u8 pad, *bthbytes;
-	struct sk_buff *skb, *nskb;
-
-	if (dd->ipath_port0_skbs &&
-			hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
-		/*
-		 * Allocate a new sk_buff to replace the one we give
-		 * to the network stack.
-		 */
-		nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
-		if (!nskb) {
-			/* count OK packets that we drop */
-			ipath_stats.sps_krdrops++;
-			return;
-		}
-
-		bthbytes = (u8 *) hdr->bth;
-		pad = (bthbytes[1] >> 4) & 3;
-		/* +CRC32 */
-		elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
-
-		skb = dd->ipath_port0_skbs[etail];
-		dd->ipath_port0_skbs[etail] = nskb;
-		skb_put(skb, elen);
-
-		dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
-				    ((char __iomem *) dd->ipath_kregbase
-				     + dd->ipath_rcvegrbase), 0,
-				    virt_to_phys(nskb->data));
-
-		__ipath_layer_rcv(dd, hdr, skb);
-
-		/* another ether packet received */
-		ipath_stats.sps_ether_rpkts++;
-	}
-	else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
-		__ipath_layer_rcv_lid(dd, hdr);
-}
-
 static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
 			     u32 eflags,
 			     u32 l,
@@ -972,26 +926,17 @@
 		if (unlikely(eflags))
 			ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-				int ret = __ipath_verbs_rcv(dd, rc + 1,
-							    ebuf, tlen);
-				if (ret == -ENODEV)
-					ipath_cdbg(VERBOSE,
-						   "received IB packet, "
-						   "not SMA (QP=%x)\n", qp);
-				if (dd->ipath_lli_counter)
-					dd->ipath_lli_counter--;
-
-		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
-			if (qp == IPATH_KD_QP &&
-			    bthbytes[0] == ipath_layer_rcv_opcode &&
-			    ebuf)
-				ipath_rcv_layer(dd, etail, tlen,
-						(struct ether_header *)hdr);
-			else
-				ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-					   "qp=%x), len %x; ignored\n",
-					   etype, bthbytes[0], qp, tlen);
+			ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+			if (dd->ipath_lli_counter)
+				dd->ipath_lli_counter--;
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		}
+		else if (etype == RCVHQ_RCV_TYPE_EAGER)
+			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+				   "qp=%x), len %x; ignored\n",
+				   etype, bthbytes[0], qp, tlen);
 		else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
 			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
 				  be32_to_cpu(hdr->bth[0]) & 0xff);
@@ -1024,7 +969,8 @@
 		 */
 		if (l == hdrqtail || (i && !(i&0xf))) {
 			u64 lval;
-			if (l == hdrqtail) /* PE-800 interrupt only on last */
+			if (l == hdrqtail)
+				/* request IBA6120 interrupt only on last */
 				lval = dd->ipath_rhdrhead_intr_off | l;
 			else
 				lval = l;
@@ -1038,7 +984,7 @@
 	}
 
 	if (!dd->ipath_rhdrhead_intr_off && !reloop) {
-		/* HT-400 workaround; we can have a race clearing chip
+		/* IBA6110 workaround; we can have a race clearing chip
 		 * interrupt with another interrupt about to be delivered,
 		 * and can clear it before it is delivered on the GPIO
 		 * workaround.  By doing the extra check here for the
@@ -1211,7 +1157,7 @@
  *
  * do appropriate marking as busy, etc.
  * returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_sma_send_pkt and ipath_layer_send
+ * Used by ipath_layer_send
  */
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
 {
@@ -1317,13 +1263,6 @@
 		goto bail;
 	}
 
-	if (updated)
-		/*
-		 * ran out of bufs, now some (at least this one we just
-		 * got) are now available, so tell the layered driver.
-		 */
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-
 	/*
 	 * set next starting place.  Since it's just an optimization,
 	 * it doesn't matter who wins on this, so no locking
@@ -1500,7 +1439,7 @@
 	return ret;
 }
 
-void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
 	static const char *what[4] = {
 		[0] = "DOWN",
@@ -1511,7 +1450,7 @@
 	int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
 			INFINIPATH_IBCC_LINKCMD_MASK;
 
-	ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+	ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
 		   "is %s\n", dd->ipath_unit,
 		   what[linkcmd],
 		   ipath_ibcstatus_str[
@@ -1520,7 +1459,7 @@
 			    INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
 			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
 	/* flush all queued sends when going to DOWN or INIT, to be sure that
-	 * they don't block SMA and other MAD packets */
+	 * they don't block MAD packets */
 	if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 				 INFINIPATH_S_ABORT);
@@ -1534,6 +1473,180 @@
 			 dd->ipath_ibcctrl | which);
 }
 
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+	u32 lstate;
+	int ret;
+
+	switch (newstate) {
+	case IPATH_IB_LINKDOWN:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_SLEEP:
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKDOWN_DISABLE:
+		ipath_set_ib_lstate(dd,
+				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		/* don't wait */
+		ret = 0;
+		goto bail;
+
+	case IPATH_IB_LINKINIT:
+		if (dd->ipath_flags & IPATH_LINKINIT) {
+			ret = 0;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKINIT;
+		break;
+
+	case IPATH_IB_LINKARM:
+		if (dd->ipath_flags & IPATH_LINKARMED) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags &
+		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		/*
+		 * Since the port can transition to ACTIVE by receiving
+		 * a non VL 15 packet, wait for either state.
+		 */
+		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+		break;
+
+	case IPATH_IB_LINKACTIVE:
+		if (dd->ipath_flags & IPATH_LINKACTIVE) {
+			ret = 0;
+			goto bail;
+		}
+		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+			ret = -EINVAL;
+			goto bail;
+		}
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
+				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		lstate = IPATH_LINKACTIVE;
+		break;
+
+	default:
+		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+		ret = -EINVAL;
+		goto bail;
+	}
+	ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size.   For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link initialize (IPATH_IBSTATE_INIT) state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+	u32 piosize;
+	int changed = 0;
+	int ret;
+
+	/*
+	 * mtu is IB data payload max.  It's the largest power of 2 less
+	 * than piosize (or even larger, since it only really controls the
+	 * largest we can receive; we can send the max of the mtu and
+	 * piosize).  We check that it's one of the valid IB sizes.
+	 */
+	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+	    arg != 4096) {
+		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+		ret = -EINVAL;
+		goto bail;
+	}
+	if (dd->ipath_ibmtu == arg) {
+		ret = 0;        /* same as current */
+		goto bail;
+	}
+
+	piosize = dd->ipath_ibmaxlen;
+	dd->ipath_ibmtu = arg;
+
+	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+		/* Only if it's not the initial value (or reset to it) */
+		if (piosize != dd->ipath_init_ibmaxlen) {
+			dd->ipath_ibmaxlen = piosize;
+			changed = 1;
+		}
+	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+		piosize = arg + IPATH_PIO_MAXIBHDR;
+		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+			   arg);
+		dd->ipath_ibmaxlen = piosize;
+		changed = 1;
+	}
+
+	if (changed) {
+		/*
+		 * set the IBC maxpktlength to the size of our pio
+		 * buffers in words
+		 */
+		u64 ibc = dd->ipath_ibcctrl;
+		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
+
+		piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
+		dd->ipath_ibmaxlen = piosize;
+		piosize /= sizeof(u32); /* in words */
+		/*
+		 * for ICRC, which we only send in diag test pkt mode, and
+		 * we don't need to worry about that for mtu
+		 */
+		piosize += 1;
+
+		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+		dd->ipath_ibcctrl = ibc;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		dd->ipath_f_tidtemplate(dd);
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+{
+	dd->ipath_lid = arg;
+	dd->ipath_lmc = lmc;
+
+	return 0;
+}
+
 /**
  * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
  * @dd: the infinipath device
@@ -1637,13 +1750,6 @@
 	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
 
-	/*
-	 * we are shutting down, so tell the layered driver.  We don't do
-	 * this on just a link state change, much like ethernet, a cable
-	 * unplug, etc. doesn't change driver state
-	 */
-	ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
-
 	/* disable IBC */
 	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
@@ -1743,7 +1849,7 @@
 {
 	int ret;
 
-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
 	/*
 	 * These must be called before the driver is registered with
@@ -1776,8 +1882,18 @@
 		goto bail_group;
 	}
 
+	ret = ipath_diagpkt_add();
+	if (ret < 0) {
+		printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+		       "diag data device: error %d\n", -ret);
+		goto bail_ipathfs;
+	}
+
 	goto bail;
 
+bail_ipathfs:
+	ipath_exit_ipathfs();
+
 bail_group:
 	ipath_driver_remove_group(&ipath_driver.driver);
 
@@ -1888,6 +2004,8 @@
 	struct ipath_devdata *dd, *tmp;
 	unsigned long flags;
 
+	ipath_diagpkt_remove();
+
 	ipath_exit_ipathfs();
 
 	ipath_driver_remove_group(&ipath_driver.driver);
@@ -1998,5 +2116,22 @@
 	return ret;
 }
 
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+	u64 val;
+	if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+		return -1;
+	}
+	if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+		dd->ipath_rx_pol_inv = new_pol_inv;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+			 INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= ((u64)dd->ipath_rx_pol_inv) <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+	}
+	return 0;
+}
 module_init(infinipath_init);
 module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index bbaa70e..29930e2 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -39,7 +39,6 @@
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 static int ipath_open(struct inode *, struct file *);
@@ -985,15 +984,17 @@
 	 * write combining behavior we want on the PIO buffers!
 	 */
 
-	if (vma->vm_flags & VM_READ) {
-		dev_info(&dd->pcidev->dev,
-			 "Can't map piobufs as readable (flags=%lx)\n",
-			 vma->vm_flags);
-		ret = -EPERM;
-		goto bail;
-	}
+#if defined(__powerpc__)
+	/* There isn't a generic way to specify writethrough mappings */
+	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
 
-	/* don't allow them to later change to readable with mprotect */
+	/*
+	 * don't allow them to later change to readable with mprotect (for when
+	 * not initially mapped readable, as is normally the case)
+	 */
 	vma->vm_flags &= ~VM_MAYREAD;
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 
@@ -1109,7 +1110,7 @@
 		ret = mmap_rcvegrbufs(vma, pd);
 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
 		/*
-		 * The rcvhdrq itself; readonly except on HT-400 (so have
+		 * The rcvhdrq itself; readonly except on HT (so have
 		 * to allow writable mapping), multiple pages, contiguous
 		 * from an i/o perspective.
 		 */
@@ -1149,6 +1150,7 @@
 	struct ipath_portdata *pd;
 	u32 head, tail;
 	int bit;
+	unsigned pollflag = 0;
 	struct ipath_devdata *dd;
 
 	pd = port_fp(fp);
@@ -1185,9 +1187,12 @@
 			clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
 			pd->port_rcvwait_to++;
 		}
+		else
+			pollflag = POLLIN | POLLRDNORM;
 	}
 	else {
 		/* it's already happened; don't do wait_event overhead */
+		pollflag = POLLIN | POLLRDNORM;
 		pd->port_rcvnowait++;
 	}
 
@@ -1195,7 +1200,7 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
-	return 0;
+	return pollflag;
 }
 
 static int try_alloc_port(struct ipath_devdata *dd, int port,
@@ -1297,14 +1302,14 @@
 	 * This code is present to allow a knowledgeable person to
 	 * specify the layout of processes to processors before opening
 	 * this driver, and then we'll assign the process to the "closest"
-	 * HT-400 to that processor (we assume reasonable connectivity,
+	 * InfiniPath chip to that processor (we assume reasonable connectivity,
 	 * for now).  This code assumes that if affinity has been set
 	 * before this point, that at most one cpu is set; for now this
 	 * is reasonable.  I check for both cpus_empty() and cpus_full(),
 	 * in case some kernel variant sets none of the bits when no
 	 * affinity is set.  2.6.11 and 12 kernels have all present
 	 * cpus set.  Some day we'll have to fix it up further to handle
-	 * a cpu subset.  This algorithm fails for two HT-400's connected
+	 * a cpu subset.  This algorithm fails for two HT chips connected
 	 * in tunnel fashion.  Eventually this needs real topology
 	 * information.  There may be some issues with dual core numbering
 	 * as well.  This needs more work prior to release.
@@ -1815,7 +1820,7 @@
 		if (ret < 0) {
 			ipath_dev_err(dd, "Could not create wildcard "
 				      "minor: error %d\n", -ret);
-			goto bail_sma;
+			goto bail_user;
 		}
 
 		atomic_set(&user_setup, 1);
@@ -1831,7 +1836,7 @@
 
 	goto bail;
 
-bail_sma:
+bail_user:
 	user_cleanup();
 bail:
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 0936d8e..a5eb30a 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -191,8 +191,8 @@
 	portinfo[4] = (dd->ipath_lid << 16);
 
 	/*
-	 * Notimpl yet SMLID (should we store this in the driver, in case
-	 * SMA dies?)  CapabilityMask is 0, we don't support any of these
+	 * Notimpl yet SMLID.
+	 * CapabilityMask is 0, we don't support any of these
 	 * DiagCode is 0; we don't store any diag info for now Notimpl yet
 	 * M_KeyLeasePeriod (we don't support M_Key)
 	 */
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
similarity index 96%
rename from drivers/infiniband/hw/ipath/ipath_ht400.c
rename to drivers/infiniband/hw/ipath/ipath_iba6110.c
index 3db015d..bf2455a 100644
--- a/drivers/infiniband/hw/ipath/ipath_ht400.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -33,7 +33,7 @@
 
 /*
  * This file contains all of the code that is specific to the InfiniPath
- * HT-400 chip.
+ * HT chip.
  */
 
 #include <linux/pci.h>
@@ -43,7 +43,7 @@
 #include "ipath_registers.h"
 
 /*
- * This lists the InfiniPath HT400 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
  *
  * The names are in InterCap form because they're taken straight from
@@ -461,8 +461,9 @@
 			 * times.
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
@@ -537,7 +538,7 @@
 	if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
 		strlcat(msg, "[HT core Misc7]", msgl);
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@@ -553,7 +554,7 @@
 
 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), HT-400 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@@ -610,18 +611,18 @@
 		break;
 	case 5:
 		/*
-		 * HT-460 original production board; two production levels, with
+		 * original production board; two production levels, with
 		 * different serial number ranges.   See ipath_ht_early_init() for
 		 * case where we enable IPATH_GPIO_INTR for later serial # range.
 		 */
-		n = "InfiniPath_HT-460";
+		n = "InfiniPath_QHT7040";
 		break;
 	case 6:
 		n = "OEM_Board_3";
 		break;
 	case 7:
-		/* HT-460 small form factor production board */
-		n = "InfiniPath_HT-465";
+		/* small form factor production board */
+		n = "InfiniPath_QHT7140";
 		break;
 	case 8:
 		n = "LS/X-1";
@@ -633,7 +634,7 @@
 		n = "OEM_Board_2";
 		break;
 	case 11:
-		n = "InfiniPath_HT-470";
+		n = "InfiniPath_HT-470"; /* obsoleted */
 		break;
 	case 12:
 		n = "OEM_Board_4";
@@ -641,7 +642,7 @@
 	default:		/* don't know, just print the number */
 		ipath_dev_err(dd, "Don't yet know about board "
 			      "with ID %u\n", boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
 			 boardrev);
 		break;
 	}
@@ -650,11 +651,10 @@
 
 	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
 		/*
-		 * This version of the driver only supports the HT-400
-		 * Rev 3.2
+		 * This version of the driver only supports Rev 3.2 and 3.3
 		 */
 		ipath_dev_err(dd,
-			      "Unsupported HT-400 revision %u.%u!\n",
+			      "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 		goto bail;
@@ -738,7 +738,7 @@
 
 static int ipath_setup_ht_reset(struct ipath_devdata *dd)
 {
-	ipath_dbg("No reset possible for HT-400\n");
+	ipath_dbg("No reset possible for this InfiniPath hardware\n");
 	return 0;
 }
 
@@ -925,7 +925,7 @@
 
 	/*
 	 * kernels with CONFIG_PCI_MSI set the vector in the irq field of
-	 * struct pci_device, so we use that to program the HT-400 internal
+	 * struct pci_device, so we use that to program the internal
 	 * interrupt register (not config space) with that value. The BIOS
 	 * must still have done the basic MSI setup.
 	 */
@@ -1013,7 +1013,7 @@
  * @dd: the infinipath device
  *
  * Called during driver unload.
- * This is currently a nop for the HT-400, not for all chips
+ * This is currently a nop for the HT chip, not for all chips
  */
 static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
 {
@@ -1290,6 +1290,15 @@
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
@@ -1470,7 +1479,7 @@
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
 
 	/*
-	 * For HT-400, we allocate a somewhat overly large eager buffer,
+	 * For HT, we allocate a somewhat overly large eager buffer,
 	 * such that we can guarantee that we can receive the largest
 	 * packet that we can send out.  To truly support a 4KB MTU,
 	 * we need to bump this to a large value.  To date, other than
@@ -1531,7 +1540,7 @@
 	if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 		dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
 		/*
-		 * Later production HT-460 has same changes as HT-465, so
+		 * Later production QHT7040 has same changes as QHT7140, so
 		 * can use GPIO interrupts.  They have serial #'s starting
 		 * with 128, rather than 112.
 		 */
@@ -1560,13 +1569,13 @@
 }
 
 /**
- * ipath_init_ht400_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
  *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-void ipath_init_ht400_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_ht_intconfig;
 	dd->ipath_f_bus = ipath_setup_ht_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
similarity index 94%
rename from drivers/infiniband/hw/ipath/ipath_pe800.c
rename to drivers/infiniband/hw/ipath/ipath_iba6120.c
index b83f66d..d86516d 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -32,7 +32,7 @@
  */
 /*
  * This file contains all of the code that is specific to the
- * InfiniPath PE-800 chip.
+ * InfiniPath PCIe chip.
  */
 
 #include <linux/interrupt.h>
@@ -45,9 +45,9 @@
 
 /*
  * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PE800, the PCI-Express chip.
+ * access functions for the QLogic InfiniPath PCI-Express chip.
  *
- * This lists the InfiniPath PE800 registers, in the actual chip layout.
+ * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
  */
 struct _infinipath_do_not_use_kernel_regs {
@@ -213,7 +213,6 @@
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
 
-	/* This group is pe-800-specific; and used only in this file */
 	/* The rcvpktled register controls one of the debug port signals, so
 	 * a packet activity LED can be connected to it. */
 	.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@@ -364,8 +363,9 @@
 			 * and we get here multiple times
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Error (freeze "
-					      "mode), no longer usable\n");
+				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					      "mode), no longer usable, SN %.16s\n",
+						  dd->ipath_serial);
 				isfatal = 1;
 			}
 			/*
@@ -388,7 +388,7 @@
 	*msg = '\0';
 
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
-		strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
+		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
 		/* ignore from now on, so disable until driver reloaded */
 		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@@ -433,7 +433,7 @@
 
 	if (hwerrs & _IPATH_PLL_FAIL) {
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[PLL failed (%llx), PE-800 unusable]",
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
 			 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
 		strlcat(msg, bitsmsg, msgl);
 		/* ignore from now on, so disable until driver reloaded */
@@ -511,22 +511,25 @@
 		n = "InfiniPath_Emulation";
 		break;
 	case 1:
-		n = "InfiniPath_PE-800-Bringup";
+		n = "InfiniPath_QLE7140-Bringup";
 		break;
 	case 2:
-		n = "InfiniPath_PE-880";
+		n = "InfiniPath_QLE7140";
 		break;
 	case 3:
-		n = "InfiniPath_PE-850";
+		n = "InfiniPath_QMI7140";
 		break;
 	case 4:
-		n = "InfiniPath_PE-860";
+		n = "InfiniPath_QEM7140";
+		break;
+	case 5:
+		n = "InfiniPath_QMH7140";
 		break;
 	default:
 		ipath_dev_err(dd,
 			      "Don't yet know about board with ID %u\n",
 			      boardrev);
-		snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
+		snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
 			 boardrev);
 		break;
 	}
@@ -534,7 +537,7 @@
 		snprintf(name, namelen, "%s", n);
 
 	if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
-		ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
+		ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      dd->ipath_majrev, dd->ipath_minrev);
 		ret = 1;
 	} else
@@ -651,6 +654,15 @@
 		val &= ~INFINIPATH_XGXS_RESET;
 		change = 1;
 	}
+	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+		/* need to compensate for Tx inversion in partner */
+		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+		         INFINIPATH_XGXS_RX_POL_SHIFT);
+		val |= dd->ipath_rx_pol_inv <<
+			INFINIPATH_XGXS_RX_POL_SHIFT;
+		change = 1;
+	}
 	if (change)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
@@ -705,7 +717,7 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 }
 
-/* this is not yet needed on the PE800, so just return 0. */
+/* this is not yet needed on this chip, so just return 0. */
 static int ipath_pe_intconfig(struct ipath_devdata *dd)
 {
 	return 0;
@@ -759,8 +771,8 @@
  *
  * This is called during driver unload.
  * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT-400. If we do end up needing pci_enable_msi
- * at some point in the future for HT-400, we'll move the call back
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
  * into the main init_one code.
  */
 static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@@ -780,10 +792,10 @@
  * late in 2.6.16).
  * All that can be done is to edit the kernel source to remove the quirk
  * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
- * even those it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for the PE-800.  If we do end up needing
- * pci_enable_msi at some point in the future for HT-400, we'll move the
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe.  If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
  * call back into the main init_one code.
  * We save the msi lo and hi values, so we can restore them after
  * chip reset (the kernel PCI infrastructure doesn't yet handle that
@@ -971,8 +983,7 @@
 	int ret;
 
 	/* Use ERROR so it shows up in logs, etc. */
-	ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
-		      dd->ipath_unit);
+	ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
 	/* keep chip from being accessed in a few places */
 	dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
 	val = dd->ipath_control | INFINIPATH_C_RESET;
@@ -1078,7 +1089,7 @@
  * @port: the port
  *
  * clear all TID entries for a port, expected and eager.
- * Used from ipath_close().  On PE800, TIDs are only 32 bits,
+ * Used from ipath_close().  On this chip, TIDs are only 32 bits,
  * not 64, but they are still on 64 bit boundaries, so tidbase
  * is declared as u64 * for the pointer math, even though we write 32 bits
  */
@@ -1148,9 +1159,9 @@
 	dd->ipath_flags |= IPATH_4BYTE_TID;
 
 	/*
-	 * For openib, we need to be able to handle an IB header of 96 bytes
-	 * or 24 dwords.  HT-400 has arbitrary sized receive buffers, so we
-	 * made them the same size as the PIO buffers.  The PE-800 does not
+	 * For openfabrics, we need to be able to handle an IB header of
+	 * 24 dwords.  HT chip has arbitrary sized receive buffers, so we
+	 * made them the same size as the PIO buffers.  This chip does not
 	 * handle arbitrary size buffers, so we need the header large enough
 	 * to handle largest IB header, but still have room for a 2KB MTU
 	 * standard IB packet.
@@ -1158,11 +1169,10 @@
 	dd->ipath_rcvhdrentsize = 24;
 	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
 
-	/* For HT-400, we allocate a somewhat overly large eager buffer,
-	 * such that we can guarantee that we can receive the largest packet
-	 * that we can send out.  To truly support a 4KB MTU, we need to
-	 * bump this to a larger value.  We'll do this when I get around to
-	 * testing 4KB sends on the PE-800, which I have not yet done.
+	/*
+	 * To truly support a 4KB MTU (for usermode), we need to
+	 * bump this to a larger value.  For now, we use them for
+	 * the kernel only.
 	 */
 	dd->ipath_rcvegrbufsize = 2048;
 	/*
@@ -1175,9 +1185,9 @@
 	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
 
 	/*
-	 * For PE-800, we can request a receive interrupt for 1 or
+	 * We can request a receive interrupt for 1 or
 	 * more packets from current offset.  For now, we set this
-	 * up for a single packet, to match the HT-400 behavior.
+	 * up for a single packet.
 	 */
 	dd->ipath_rhdrhead_intr_off = 1ULL<<32;
 
@@ -1216,13 +1226,13 @@
 }
 
 /**
- * ipath_init_pe800_funcs - set up the chip-specific function pointers
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
  *
  * This is global, and is called directly at init to set up the
  * chip-specific function pointers for later use.
  */
-void ipath_init_pe800_funcs(struct ipath_devdata *dd)
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
 {
 	dd->ipath_f_intrsetup = ipath_pe_intconfig;
 	dd->ipath_f_bus = ipath_setup_pe_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 414cdd1..44669dc 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -53,8 +53,8 @@
 MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
 
 /*
- * Number of buffers reserved for driver (layered drivers and SMA
- * send).  Reserved at end of buffer list.   Initialized based on
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Reserved at end of buffer list.   Initialized based on
  * number of PIO buffers if not set via module interface.
  * The problem with this is that it's global, but we'll use different
  * numbers for different chip types.  So the default value is not
@@ -80,7 +80,7 @@
  *
  * Allocate the eager TID buffers and program them into infinipath.
  * We use the network layer alloc_skb() allocator to allocate the
- * memory, and either use the buffers as is for things like SMA
+ * memory, and either use the buffers as is for things like verbs
  * packets, or pass the buffers up to the ipath layered driver and
  * thence the network layer, replacing them as we do so (see
  * ipath_rcv_layer()).
@@ -240,7 +240,11 @@
 			  "only supports %u\n", ipath_cfgports,
 			  dd->ipath_portcnt);
 	}
-	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+	/*
+	 * Allocate full portcnt array, rather than just cfgports, because
+	 * cleanup iterates across all possible ports.
+	 */
+	dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
 			       GFP_KERNEL);
 
 	if (!dd->ipath_pd) {
@@ -446,9 +450,9 @@
 	u32 val;
 	int i;
 
-	if (!reinit) {
-		init_waitqueue_head(&ipath_sma_state_wait);
-	}
+	if (!reinit)
+		init_waitqueue_head(&ipath_state_wait);
+
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
@@ -687,7 +691,7 @@
 	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
 	if (ipath_kpiobufs == 0) {
-		/* not set by user, or set explictly to default  */
+		/* not set by user (this is default) */
 		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
 			kpiobufs = 32;
 		else
@@ -946,6 +950,7 @@
 			dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
 	}
 
+	ipath_kpiobufs = val;
 	ret = 0;
 bail:
 	spin_unlock_irqrestore(&ipath_devs_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 280e732..49bf7bb 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -34,7 +34,7 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* These are all rcv-related errors which we want to count for stats */
@@ -201,7 +201,7 @@
 				  ib_linkstate(lstate));
 		}
 		else
-			ipath_cdbg(SMA, "Unit %u link state %s, last "
+			ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
 				   "was %s\n", dd->ipath_unit,
 				   ib_linkstate(lstate),
 				   ib_linkstate((unsigned)
@@ -213,7 +213,7 @@
 		if (lstate == IPATH_IBSTATE_INIT ||
 		    lstate == IPATH_IBSTATE_ARM ||
 		    lstate == IPATH_IBSTATE_ACTIVE)
-			ipath_cdbg(SMA, "Unit %u link state down"
+			ipath_cdbg(VERBOSE, "Unit %u link state down"
 				   " (state 0x%x), from %s\n",
 				   dd->ipath_unit,
 				   (u32)val & IPATH_IBSTATE_MASK,
@@ -269,7 +269,7 @@
 			     INFINIPATH_IBCS_LINKSTATE_MASK)
 			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
 				/* if from up to down be more vocal */
-				ipath_cdbg(SMA,
+				ipath_cdbg(VERBOSE,
 					   "Unit %u link now down (%s)\n",
 					   dd->ipath_unit,
 					   ipath_ibcstatus_str[ltstate]);
@@ -289,8 +289,6 @@
 		*dd->ipath_statusp |=
 			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
 		dd->ipath_f_setextled(dd, lstate, ltstate);
-
-		__ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
 	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
 		/*
 		 * set INIT and DOWN.  Down is checked by most of the other
@@ -598,11 +596,11 @@
 
 	if (!noprint && *msg)
 		ipath_dev_err(dd, "%s error\n", msg);
-	if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
-		ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
-			   "waking\n", dd->ipath_sma_state_wanted,
+	if (dd->ipath_state_wanted & dd->ipath_flags) {
+		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+			   "waking\n", dd->ipath_state_wanted,
 			   dd->ipath_flags);
-		wake_up_interruptible(&ipath_sma_state_wait);
+		wake_up_interruptible(&ipath_state_wait);
 	}
 
 	return chkerrpkts;
@@ -708,11 +706,7 @@
 {
 	int ret;
 
-	ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
-	if (ret > 0)
-		goto set;
-
-	ret = __ipath_verbs_piobufavail(dd);
+	ret = ipath_ib_piobufavail(dd->verbs_dev);
 	if (ret > 0)
 		goto set;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e9f374f..a8a5627 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -132,12 +132,6 @@
 	void *l_arg;
 };
 
-/* Verbs layer interface */
-struct _verbs_layer {
-	void *l_arg;
-	struct timer_list l_timer;
-};
-
 struct ipath_devdata {
 	struct list_head ipath_list;
 
@@ -198,7 +192,8 @@
 	void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
 	/* fill out chip-specific fields */
 	int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
-	struct _verbs_layer verbs_layer;
+	struct ipath_ibdev *verbs_dev;
+	struct timer_list verbs_timer;
 	/* total dwords sent (summed from counter) */
 	u64 ipath_sword;
 	/* total dwords rcvd (summed from counter) */
@@ -241,7 +236,7 @@
 	u64 ipath_tidtemplate;
 	/* value to write to free TIDs */
 	u64 ipath_tidinvalid;
-	/* PE-800 rcv interrupt setup */
+	/* IBA6120 rcv interrupt setup */
 	u64 ipath_rhdrhead_intr_off;
 
 	/* size of memory at ipath_kregbase */
@@ -250,8 +245,8 @@
 	u32 ipath_pioavregs;
 	/* IPATH_POLL, etc. */
 	u32 ipath_flags;
-	/* ipath_flags sma is waiting for */
-	u32 ipath_sma_state_wanted;
+	/* ipath_flags driver is waiting for */
+	u32 ipath_state_wanted;
 	/* last buffer for user use, first buf for kernel use is this
 	 * index. */
 	u32 ipath_lastport_piobuf;
@@ -311,10 +306,6 @@
 	u32 ipath_pcibar0;
 	/* so we can rewrite it after a chip reset */
 	u32 ipath_pcibar1;
-	/* sequential tries for SMA send and no bufs */
-	u32 ipath_nosma_bufs;
-	/* duration (seconds) ipath_nosma_bufs set */
-	u32 ipath_nosma_secs;
 
 	/* HT/PCI Vendor ID (here for NodeInfo) */
 	u16 ipath_vendorid;
@@ -512,6 +503,8 @@
 	u8 ipath_pci_cacheline;
 	/* LID mask control */
 	u8 ipath_lmc;
+	/* Rx Polarity inversion (compensate for ~tx on partner) */
+	u8 ipath_rx_pol_inv;
 
 	/* local link integrity counter */
 	u32 ipath_lli_counter;
@@ -523,18 +516,6 @@
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);
 
-extern u16 ipath_layer_rcv_opcode;
-extern int __ipath_layer_intr(struct ipath_devdata *, u32);
-extern int ipath_layer_intr(struct ipath_devdata *, u32);
-extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
-			     struct sk_buff *);
-extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
-extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
-extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
-
-void ipath_layer_add(struct ipath_devdata *);
-void ipath_layer_remove(struct ipath_devdata *);
-
 int ipath_init_chip(struct ipath_devdata *, int);
 int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
@@ -549,9 +530,8 @@
 
 int ipath_diag_add(struct ipath_devdata *);
 void ipath_diag_remove(struct ipath_devdata *);
-void ipath_diag_bringup_link(struct ipath_devdata *);
 
-extern wait_queue_head_t ipath_sma_state_wait;
+extern wait_queue_head_t ipath_state_wait;
 
 int ipath_user_add(struct ipath_devdata *dd);
 void ipath_user_remove(struct ipath_devdata *dd);
@@ -582,12 +562,14 @@
 
 int ipath_parse_ushort(const char *str, unsigned short *valp);
 
-int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
-void ipath_set_ib_lstate(struct ipath_devdata *, int);
 void ipath_kreceive(struct ipath_devdata *);
 int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
 int ipath_reset_device(int);
 void ipath_get_faststats(unsigned long);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
@@ -642,10 +624,8 @@
 int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
 int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
 u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
-/* init PE-800-specific func */
-void ipath_init_pe800_funcs(struct ipath_devdata *);
-/* init HT-400-specific func */
-void ipath_init_ht400_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 
@@ -801,7 +781,7 @@
 
 struct device_driver;
 
-extern const char ipath_core_version[];
+extern const char ib_ipath_version[];
 
 int ipath_driver_create_group(struct device_driver *);
 void ipath_driver_remove_group(struct device_driver *);
@@ -810,6 +790,9 @@
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
 int ipath_expose_reset(struct device *);
 
+int ipath_diagpkt_add(void);
+void ipath_diagpkt_remove(void);
+
 int ipath_init_ipathfs(void);
 void ipath_exit_ipathfs(void);
 int ipathfs_add_device(struct ipath_devdata *);
@@ -831,10 +814,10 @@
 
 extern struct mutex ipath_mutex;
 
-#define IPATH_DRV_NAME		"ipath_core"
+#define IPATH_DRV_NAME		"ib_ipath"
 #define IPATH_MAJOR		233
 #define IPATH_USER_MINOR_BASE	0
-#define IPATH_SMA_MINOR		128
+#define IPATH_DIAGPKT_MINOR	127
 #define IPATH_DIAG_MINOR_BASE	129
 #define IPATH_NMINORS		255
 
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index a5ca279..ba1b932 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -34,6 +34,7 @@
 #include <asm/io.h>
 
 #include "ipath_verbs.h"
+#include "ipath_kernel.h"
 
 /**
  * ipath_alloc_lkey - allocate an lkey
@@ -60,7 +61,7 @@
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			_VERBS_INFO("LKEY table full\n");
+			ipath_dbg(KERN_INFO "LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index b28c6f8..e46aa4e 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -42,26 +42,20 @@
 
 #include "ipath_kernel.h"
 #include "ipath_layer.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /* Acquire before ipath_devs_lock. */
 static DEFINE_MUTEX(ipath_layer_mutex);
 
-static int ipath_verbs_registered;
-
 u16 ipath_layer_rcv_opcode;
 
 static int (*layer_intr)(void *, u32);
 static int (*layer_rcv)(void *, void *, struct sk_buff *);
 static int (*layer_rcv_lid)(void *, void *);
-static int (*verbs_piobufavail)(void *);
-static void (*verbs_rcv)(void *, void *, void *, u32);
 
 static void *(*layer_add_one)(int, struct ipath_devdata *);
 static void (*layer_remove_one)(void *);
-static void *(*verbs_add_one)(int, struct ipath_devdata *);
-static void (*verbs_remove_one)(void *);
-static void (*verbs_timer_cb)(void *);
 
 int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
 {
@@ -107,302 +101,16 @@
 	return ret;
 }
 
-int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
+void ipath_layer_lid_changed(struct ipath_devdata *dd)
 {
-	int ret = -ENODEV;
-
-	if (dd->verbs_layer.l_arg && verbs_piobufavail)
-		ret = verbs_piobufavail(dd->verbs_layer.l_arg);
-
-	return ret;
-}
-
-int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
-		      u32 tlen)
-{
-	int ret = -ENODEV;
-
-	if (dd->verbs_layer.l_arg && verbs_rcv) {
-		verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
-		ret = 0;
-	}
-
-	return ret;
-}
-
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
-{
-	u32 lstate;
-	int ret;
-
-	switch (newstate) {
-	case IPATH_IB_LINKDOWN:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_SLEEP:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKDOWN_DISABLE:
-		ipath_set_ib_lstate(dd,
-				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
-		/* don't wait */
-		ret = 0;
-		goto bail;
-
-	case IPATH_IB_LINKINIT:
-		if (dd->ipath_flags & IPATH_LINKINIT) {
-			ret = 0;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		lstate = IPATH_LINKINIT;
-		break;
-
-	case IPATH_IB_LINKARM:
-		if (dd->ipath_flags & IPATH_LINKARMED) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags &
-		      (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		/*
-		 * Since the port can transition to ACTIVE by receiving
-		 * a non VL 15 packet, wait for either state.
-		 */
-		lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
-		break;
-
-	case IPATH_IB_LINKACTIVE:
-		if (dd->ipath_flags & IPATH_LINKACTIVE) {
-			ret = 0;
-			goto bail;
-		}
-		if (!(dd->ipath_flags & IPATH_LINKARMED)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
-		lstate = IPATH_LINKACTIVE;
-		break;
-
-	default:
-		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
-		ret = -EINVAL;
-		goto bail;
-	}
-	ret = ipath_wait_linkstate(dd, lstate, 2000);
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
-
-/**
- * ipath_layer_set_mtu - set the MTU
- * @dd: the infinipath device
- * @arg: the new MTU
- *
- * we can handle "any" incoming size, the issue here is whether we
- * need to restrict our outgoing size.   For now, we don't do any
- * sanity checking on this, and we don't deal with what happens to
- * programs that are already running when the size changes.
- * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
- */
-int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
-{
-	u32 piosize;
-	int changed = 0;
-	int ret;
-
-	/*
-	 * mtu is IB data payload max.  It's the largest power of 2 less
-	 * than piosize (or even larger, since it only really controls the
-	 * largest we can receive; we can send the max of the mtu and
-	 * piosize).  We check that it's one of the valid IB sizes.
-	 */
-	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-	    arg != 4096) {
-		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (dd->ipath_ibmtu == arg) {
-		ret = 0;	/* same as current */
-		goto bail;
-	}
-
-	piosize = dd->ipath_ibmaxlen;
-	dd->ipath_ibmtu = arg;
-
-	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
-		/* Only if it's not the initial value (or reset to it) */
-		if (piosize != dd->ipath_init_ibmaxlen) {
-			dd->ipath_ibmaxlen = piosize;
-			changed = 1;
-		}
-	} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
-		piosize = arg + IPATH_PIO_MAXIBHDR;
-		ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
-			   "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
-			   arg);
-		dd->ipath_ibmaxlen = piosize;
-		changed = 1;
-	}
-
-	if (changed) {
-		/*
-		 * set the IBC maxpktlength to the size of our pio
-		 * buffers in words
-		 */
-		u64 ibc = dd->ipath_ibcctrl;
-		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
-		piosize = piosize - 2 * sizeof(u32);	/* ignore pbc */
-		dd->ipath_ibmaxlen = piosize;
-		piosize /= sizeof(u32);	/* in words */
-		/*
-		 * for ICRC, which we only send in diag test pkt mode, and
-		 * we don't need to worry about that for mtu
-		 */
-		piosize += 1;
-
-		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-		dd->ipath_ibcctrl = ibc;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-		dd->ipath_f_tidtemplate(dd);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
-
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
-{
-	dd->ipath_lid = arg;
-	dd->ipath_lmc = lmc;
-
 	mutex_lock(&ipath_layer_mutex);
 
 	if (dd->ipath_layer.l_arg && layer_intr)
 		layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
 
 	mutex_unlock(&ipath_layer_mutex);
-
-	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ipath_set_lid);
-
-int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
-{
-	/* XXX - need to inform anyone who cares this just happened. */
-	dd->ipath_guid = guid;
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
-
-__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
-{
-	return dd->ipath_guid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
-
-u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
-{
-	return dd->ipath_nguid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
-
-u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
-{
-	return dd->ipath_majrev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_majrev);
-
-u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
-{
-	return dd->ipath_minrev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_minrev);
-
-u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
-{
-	return dd->ipath_pcirev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev);
-
-u32 ipath_layer_get_flags(struct ipath_devdata *dd)
-{
-	return dd->ipath_flags;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
-
-struct device *ipath_layer_get_device(struct ipath_devdata *dd)
-{
-	return &dd->pcidev->dev;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_device);
-
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
-{
-	return dd->ipath_deviceid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
-
-u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
-{
-	return dd->ipath_vendorid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid);
-
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
-{
-	return dd->ipath_lastibcstat;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
-
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
-{
-	return dd->ipath_ibmtu;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
-
 void ipath_layer_add(struct ipath_devdata *dd)
 {
 	mutex_lock(&ipath_layer_mutex);
@@ -411,10 +119,6 @@
 		dd->ipath_layer.l_arg =
 			layer_add_one(dd->ipath_unit, dd);
 
-	if (verbs_add_one)
-		dd->verbs_layer.l_arg =
-			verbs_add_one(dd->ipath_unit, dd);
-
 	mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -427,11 +131,6 @@
 		dd->ipath_layer.l_arg = NULL;
 	}
 
-	if (dd->verbs_layer.l_arg && verbs_remove_one) {
-		verbs_remove_one(dd->verbs_layer.l_arg);
-		dd->verbs_layer.l_arg = NULL;
-	}
-
 	mutex_unlock(&ipath_layer_mutex);
 }
 
@@ -463,9 +162,6 @@
 		if (dd->ipath_layer.l_arg)
 			continue;
 
-		if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
-			*dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-
 		spin_unlock_irqrestore(&ipath_devs_lock, flags);
 		dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
 		spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -509,107 +205,6 @@
 
 EXPORT_SYMBOL_GPL(ipath_layer_unregister);
 
-static void __ipath_verbs_timer(unsigned long arg)
-{
-	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
-
-	/*
-	 * If port 0 receive packet interrupts are not available, or
-	 * can be missed, poll the receive queue
-	 */
-	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
-		ipath_kreceive(dd);
-
-	/* Handle verbs layer timeouts. */
-	if (dd->verbs_layer.l_arg && verbs_timer_cb)
-		verbs_timer_cb(dd->verbs_layer.l_arg);
-
-	mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
-}
-
-/**
- * ipath_verbs_register - verbs layer registration
- * @l_piobufavail: callback for when PIO buffers become available
- * @l_rcv: callback for receiving a packet
- * @l_timer_cb: timer callback
- * @ipath_devdata: device data structure is put here
- */
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail) (void *arg),
-			 void (*l_rcv) (void *arg, void *rhdr,
-					void *data, u32 tlen),
-			 void (*l_timer_cb) (void *arg))
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	mutex_lock(&ipath_layer_mutex);
-
-	verbs_add_one = l_add;
-	verbs_remove_one = l_remove;
-	verbs_piobufavail = l_piobufavail;
-	verbs_rcv = l_rcv;
-	verbs_timer_cb = l_timer_cb;
-
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		if (!(dd->ipath_flags & IPATH_INITTED))
-			continue;
-
-		if (dd->verbs_layer.l_arg)
-			continue;
-
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-		dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	mutex_unlock(&ipath_layer_mutex);
-
-	ipath_verbs_registered = 1;
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_register);
-
-void ipath_verbs_unregister(void)
-{
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	mutex_lock(&ipath_layer_mutex);
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		*dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-
-		if (dd->verbs_layer.l_arg && verbs_remove_one) {
-			spin_unlock_irqrestore(&ipath_devs_lock, flags);
-			verbs_remove_one(dd->verbs_layer.l_arg);
-			spin_lock_irqsave(&ipath_devs_lock, flags);
-			dd->verbs_layer.l_arg = NULL;
-		}
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-	verbs_add_one = NULL;
-	verbs_remove_one = NULL;
-	verbs_piobufavail = NULL;
-	verbs_rcv = NULL;
-	verbs_timer_cb = NULL;
-
-	ipath_verbs_registered = 0;
-
-	mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
-
 int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
 {
 	int ret;
@@ -698,390 +293,6 @@
 
 EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
 
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
-{
-	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
-
-static void update_sge(struct ipath_sge_state *ss, u32 length)
-{
-	struct ipath_sge *sge = &ss->sge;
-
-	sge->vaddr += length;
-	sge->length -= length;
-	sge->sge_length -= length;
-	if (sge->sge_length == 0) {
-		if (--ss->num_sge)
-			*sge = *ss->sg_list++;
-	} else if (sge->length == 0 && sge->mr != NULL) {
-		if (++sge->n >= IPATH_SEGSZ) {
-			if (++sge->m >= sge->mr->mapsz)
-				return;
-			sge->n = 0;
-		}
-		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
-		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
-	}
-}
-
-#ifdef __LITTLE_ENDIAN
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#else
-static inline u32 get_upper_bits(u32 data, u32 shift)
-{
-	return data << shift;
-}
-
-static inline u32 set_upper_bits(u32 data, u32 shift)
-{
-	return data >> shift;
-}
-
-static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
-{
-	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
-	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
-	return data;
-}
-#endif
-
-static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-		    u32 length)
-{
-	u32 extra = 0;
-	u32 data = 0;
-	u32 last;
-
-	while (1) {
-		u32 len = ss->sge.length;
-		u32 off;
-
-		BUG_ON(len == 0);
-		if (len > length)
-			len = length;
-		if (len > ss->sge.sge_length)
-			len = ss->sge.sge_length;
-		/* If the source address is not aligned, try to align it. */
-		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
-		if (off) {
-			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
-					    ~(sizeof(u32) - 1));
-			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
-			u32 y;
-
-			y = sizeof(u32) - off;
-			if (len > y)
-				len = y;
-			if (len + extra >= sizeof(u32)) {
-				data |= set_upper_bits(v, extra *
-						       BITS_PER_BYTE);
-				len = sizeof(u32) - extra;
-				if (len == length) {
-					last = data;
-					break;
-				}
-				__raw_writel(data, piobuf);
-				piobuf++;
-				extra = 0;
-				data = 0;
-			} else {
-				/* Clear unused upper bytes */
-				data |= clear_upper_bytes(v, len, extra);
-				if (len == length) {
-					last = data;
-					break;
-				}
-				extra += len;
-			}
-		} else if (extra) {
-			/* Source address is aligned. */
-			u32 *addr = (u32 *) ss->sge.vaddr;
-			int shift = extra * BITS_PER_BYTE;
-			int ushift = 32 - shift;
-			u32 l = len;
-
-			while (l >= sizeof(u32)) {
-				u32 v = *addr;
-
-				data |= set_upper_bits(v, shift);
-				__raw_writel(data, piobuf);
-				data = get_upper_bits(v, ushift);
-				piobuf++;
-				addr++;
-				l -= sizeof(u32);
-			}
-			/*
-			 * We still have 'extra' number of bytes leftover.
-			 */
-			if (l) {
-				u32 v = *addr;
-
-				if (l + extra >= sizeof(u32)) {
-					data |= set_upper_bits(v, shift);
-					len -= l + extra - sizeof(u32);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					__raw_writel(data, piobuf);
-					piobuf++;
-					extra = 0;
-					data = 0;
-				} else {
-					/* Clear unused upper bytes */
-					data |= clear_upper_bytes(v, l,
-								  extra);
-					if (len == length) {
-						last = data;
-						break;
-					}
-					extra += l;
-				}
-			} else if (len == length) {
-				last = data;
-				break;
-			}
-		} else if (len == length) {
-			u32 w;
-
-			/*
-			 * Need to round up for the last dword in the
-			 * packet.
-			 */
-			w = (len + 3) >> 2;
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
-			piobuf += w - 1;
-			last = ((u32 *) ss->sge.vaddr)[w - 1];
-			break;
-		} else {
-			u32 w = len >> 2;
-
-			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
-			piobuf += w;
-
-			extra = len & (sizeof(u32) - 1);
-			if (extra) {
-				u32 v = ((u32 *) ss->sge.vaddr)[w];
-
-				/* Clear unused upper bytes */
-				data = clear_upper_bytes(v, extra, 0);
-			}
-		}
-		update_sge(ss, len);
-		length -= len;
-	}
-	/* Update address before sending packet. */
-	update_sge(ss, length);
-	/* must flush early everything before trigger word */
-	ipath_flush_wc();
-	__raw_writel(last, piobuf);
-	/* be sure trigger word is written */
-	ipath_flush_wc();
-}
-
-/**
- * ipath_verbs_send - send a packet from the verbs layer
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- *
- * This is like ipath_sma_send_pkt() in that we need to be able to send
- * packets after the chip is initialized (MADs) but also like
- * ipath_layer_send_hdr() since its used by the verbs layer.
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 *hdr, u32 len, struct ipath_sge_state *ss)
-{
-	u32 __iomem *piobuf;
-	u32 plen;
-	int ret;
-
-	/* +1 is for the qword padding of pbc */
-	plen = hdrwords + ((len + 3) >> 2) + 1;
-	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-		ipath_dbg("packet len 0x%x too long, failing\n", plen);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/* Get a PIO buffer to use. */
-	piobuf = ipath_getpiobuf(dd, NULL);
-	if (unlikely(piobuf == NULL)) {
-		ret = -EBUSY;
-		goto bail;
-	}
-
-	/*
-	 * Write len to control qword, no flags.
-	 * We have to flush after the PBC for correctness on some cpus
-	 * or WC buffer can be written out of order.
-	 */
-	writeq(plen, piobuf);
-	ipath_flush_wc();
-	piobuf += 2;
-	if (len == 0) {
-		/*
-		 * If there is just the header portion, must flush before
-		 * writing last word of header for correctness, and after
-		 * the last header word (trigger word).
-		 */
-		__iowrite32_copy(piobuf, hdr, hdrwords - 1);
-		ipath_flush_wc();
-		__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-		ipath_flush_wc();
-		ret = 0;
-		goto bail;
-	}
-
-	__iowrite32_copy(piobuf, hdr, hdrwords);
-	piobuf += hdrwords;
-
-	/* The common case is aligned and contained in one segment. */
-	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
-		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-		u32 w;
-		u32 *addr = (u32 *) ss->sge.vaddr;
-
-		/* Update address before sending packet. */
-		update_sge(ss, len);
-		/* Need to round up for the last dword in the packet. */
-		w = (len + 3) >> 2;
-		__iowrite32_copy(piobuf, addr, w - 1);
-		/* must flush early everything before trigger word */
-		ipath_flush_wc();
-		__raw_writel(addr[w - 1], piobuf + w - 1);
-		/* be sure trigger word is written */
-		ipath_flush_wc();
-		ret = 0;
-		goto bail;
-	}
-	copy_io(piobuf, ss, len);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_verbs_send);
-
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
-
-/**
- * ipath_layer_get_counters - get various chip counters
- * @dd: the infinipath device
- * @cntrs: counters are placed here
- *
- * Return the counters needed by recv_pma_get_portcounters().
- */
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			      struct ipath_layer_counters *cntrs)
-{
-	int ret;
-
-	if (!(dd->ipath_flags & IPATH_INITTED)) {
-		/* no hardware, freeze, etc. */
-		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
-		ret = -EINVAL;
-		goto bail;
-	}
-	cntrs->symbol_error_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
-	cntrs->link_error_recovery_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
-	/*
-	 * The link downed counter counts when the other side downs the
-	 * connection.  We add in the number of times we downed the link
-	 * due to local link integrity errors to compensate.
-	 */
-	cntrs->link_downed_counter =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
-	cntrs->port_rcv_errors =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
-	cntrs->port_rcv_remphys_errors =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
-	cntrs->port_xmit_discards =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
-	cntrs->port_xmit_data =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-	cntrs->port_rcv_data =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-	cntrs->port_xmit_packets =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-	cntrs->port_rcv_packets =
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
-
-int ipath_layer_want_buffer(struct ipath_devdata *dd)
-{
-	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
-
 int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
 {
 	int ret = 0;
@@ -1153,389 +364,3 @@
 }
 
 EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
-
-int ipath_layer_enable_timer(struct ipath_devdata *dd)
-{
-	/*
-	 * HT-400 has a design flaw where the chip and kernel idea
-	 * of the tail register don't always agree, and therefore we won't
-	 * get an interrupt on the next packet received.
-	 * If the board supports per packet receive interrupts, use it.
-	 * Otherwise, the timer function periodically checks for packets
-	 * to cover this case.
-	 * Either way, the timer is needed for verbs layer related
-	 * processing.
-	 */
-	if (dd->ipath_flags & IPATH_GPIO_INTR) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
-				 0x2074076542310ULL);
-		/* Enable GPIO bit 2 interrupt */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 (u64) (1 << 2));
-	}
-
-	init_timer(&dd->verbs_layer.l_timer);
-	dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
-	dd->verbs_layer.l_timer.data = (unsigned long)dd;
-	dd->verbs_layer.l_timer.expires = jiffies + 1;
-	add_timer(&dd->verbs_layer.l_timer);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
-
-int ipath_layer_disable_timer(struct ipath_devdata *dd)
-{
-	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
-
-	del_timer_sync(&dd->verbs_layer.l_timer);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
-
-/**
- * ipath_layer_set_verbs_flags - set the verbs layer flags
- * @dd: the infinipath device
- * @flags: the flags to set
- */
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
-{
-	struct ipath_devdata *ss;
-	unsigned long lflags;
-
-	spin_lock_irqsave(&ipath_devs_lock, lflags);
-
-	list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
-		if (!(ss->ipath_flags & IPATH_INITTED))
-			continue;
-		if ((flags & IPATH_VERBS_KERNEL_SMA) &&
-		    !(*ss->ipath_statusp & IPATH_STATUS_SMA))
-			*ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
-		else
-			*ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, lflags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
-
-/**
- * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
- * @dd: the infinipath device
- */
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
-{
-	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
-
-/**
- * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
- * @dd: the infinipath device
- * @index: the PKEY index
- */
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
-{
-	unsigned ret;
-
-	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
-		ret = 0;
-	else
-		ret = dd->ipath_pd[0]->port_pkeys[index];
-
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
-
-/**
- * ipath_layer_get_pkeys - return the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the pkey table is placed here
- */
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	struct ipath_portdata *pd = dd->ipath_pd[0];
-
-	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
-
-/**
- * rm_pkey - decrecment the reference count for the given PKEY
- * @dd: the infinipath device
- * @key: the PKEY index
- *
- * Return true if this was the last reference and the hardware table entry
- * needs to be changed.
- */
-static int rm_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (dd->ipath_pkeys[i] != key)
-			continue;
-		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
-			dd->ipath_pkeys[i] = 0;
-			ret = 1;
-			goto bail;
-		}
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * add_pkey - add the given PKEY to the hardware table
- * @dd: the infinipath device
- * @key: the PKEY
- *
- * Return an error code if unable to add the entry, zero if no change,
- * or 1 if the hardware PKEY register needs to be updated.
- */
-static int add_pkey(struct ipath_devdata *dd, u16 key)
-{
-	int i;
-	u16 lkey = key & 0x7FFF;
-	int any = 0;
-	int ret;
-
-	if (lkey == 0x7FFF) {
-		ret = 0;
-		goto bail;
-	}
-
-	/* Look for an empty slot or a matching PKEY. */
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i]) {
-			any++;
-			continue;
-		}
-		/* If it matches exactly, try to increment the ref count */
-		if (dd->ipath_pkeys[i] == key) {
-			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
-				ret = 0;
-				goto bail;
-			}
-			/* Lost the race. Look for an empty slot below. */
-			atomic_dec(&dd->ipath_pkeyrefs[i]);
-			any++;
-		}
-		/*
-		 * It makes no sense to have both the limited and unlimited
-		 * PKEY set at the same time since the unlimited one will
-		 * disable the limited one.
-		 */
-		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
-			ret = -EEXIST;
-			goto bail;
-		}
-	}
-	if (!any) {
-		ret = -EBUSY;
-		goto bail;
-	}
-	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
-		if (!dd->ipath_pkeys[i] &&
-		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
-			/* for ipathstats, etc. */
-			ipath_stats.sps_pkeys[i] = lkey;
-			dd->ipath_pkeys[i] = key;
-			ret = 1;
-			goto bail;
-		}
-	}
-	ret = -EBUSY;
-
-bail:
-	return ret;
-}
-
-/**
- * ipath_layer_set_pkeys - set the PKEY table for port 0
- * @dd: the infinipath device
- * @pkeys: the PKEY table
- */
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
-{
-	struct ipath_portdata *pd;
-	int i;
-	int changed = 0;
-
-	pd = dd->ipath_pd[0];
-
-	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
-		u16 key = pkeys[i];
-		u16 okey = pd->port_pkeys[i];
-
-		if (key == okey)
-			continue;
-		/*
-		 * The value of this PKEY table entry is changing.
-		 * Remove the old entry in the hardware's array of PKEYs.
-		 */
-		if (okey & 0x7FFF)
-			changed |= rm_pkey(dd, okey);
-		if (key & 0x7FFF) {
-			int ret = add_pkey(dd, key);
-
-			if (ret < 0)
-				key = 0;
-			else
-				changed |= ret;
-		}
-		pd->port_pkeys[i] = key;
-	}
-	if (changed) {
-		u64 pkey;
-
-		pkey = (u64) dd->ipath_pkeys[0] |
-			((u64) dd->ipath_pkeys[1] << 16) |
-			((u64) dd->ipath_pkeys[2] << 32) |
-			((u64) dd->ipath_pkeys[3] << 48);
-		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
-			   (unsigned long long) pkey);
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
-				 pkey);
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
-
-/**
- * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
- * @dd: the infinipath device
- *
- * Returns zero if the default is POLL, 1 if the default is SLEEP.
- */
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
-{
-	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
-
-/**
- * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
- * @dd: the infinipath device
- * @sleep: the new state
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep)
-{
-	if (sleep)
-		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	else
-		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl);
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
-
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
-
-/**
- * ipath_layer_set_phyerrthreshold - set the physical error threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
-
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
-{
-	return (dd->ipath_ibcctrl >>
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
-
-/**
- * ipath_layer_set_overrunthreshold - set the overrun threshold
- * @dd: the infinipath device
- * @n: the new threshold
- *
- * Note that this will only take effect when the link state changes.
- */
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
-{
-	unsigned v;
-
-	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
-		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
-	if (v != n) {
-		dd->ipath_ibcctrl &=
-			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
-			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
-		dd->ipath_ibcctrl |=
-			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-				 dd->ipath_ibcctrl);
-	}
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
-
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen)
-{
-	return dd->ipath_f_get_boardname(dd, name, namelen);
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
-
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
-{
-	return dd->ipath_rcvhdrentsize;
-}
-EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 7148509..3854a4e 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -40,73 +40,9 @@
  */
 
 struct sk_buff;
-struct ipath_sge_state;
 struct ipath_devdata;
 struct ether_header;
 
-struct ipath_layer_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct ipath_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of ipath_segs that fit in a page. */
-#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
-
-struct ipath_segarray {
-	struct ipath_seg segs[IPATH_SEGSZ];
-};
-
-struct ipath_mregion {
-	u64 user_base;		/* User's address for this region */
-	u64 iova;		/* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;		/* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;		/* number of ipath_segs in all the arrays */
-	u32 mapsz;		/* size of the map array */
-	struct ipath_segarray *map[0];	/* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct ipath_sge {
-	struct ipath_mregion *mr;
-	void *vaddr;		/* current pointer into the segment */
-	u32 sge_length;		/* length of the SGE */
-	u32 length;		/* remaining length of the segment */
-	u16 m;			/* current index: mr->map[m] */
-	u16 n;			/* current index: mr->map[m]->segs[n] */
-};
-
-struct ipath_sge_state {
-	struct ipath_sge *sg_list;	/* next SGE to be used if any */
-	struct ipath_sge sge;	/* progress state for the current SGE */
-	u8 num_sge;
-};
-
 int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
 			 void (*l_remove)(void *),
 			 int (*l_intr)(void *, u32),
@@ -114,62 +50,14 @@
 				      struct sk_buff *),
 			 u16 rcv_opcode,
 			 int (*l_rcv_lid)(void *, void *));
-int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
-			 void (*l_remove)(void *arg),
-			 int (*l_piobufavail)(void *arg),
-			 void (*l_rcv)(void *arg, void *rhdr,
-				       void *data, u32 tlen),
-			 void (*l_timer_cb)(void *arg));
 void ipath_layer_unregister(void);
-void ipath_verbs_unregister(void);
 int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
 u16 ipath_layer_get_lid(struct ipath_devdata *dd);
 int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
 u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
-int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
-int ipath_layer_set_mtu(struct ipath_devdata *, u16);
-int ipath_set_lid(struct ipath_devdata *, u32, u8);
 int ipath_layer_send_hdr(struct ipath_devdata *dd,
 			 struct ether_header *hdr);
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-		     u32 * hdr, u32 len, struct ipath_sge_state *ss);
 int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
-			      size_t namelen);
-int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
-				  u64 *rwords, u64 *spkts, u64 *rpkts,
-				  u64 *xmit_wait);
-int ipath_layer_get_counters(struct ipath_devdata *dd,
-			     struct ipath_layer_counters *cntrs);
-int ipath_layer_want_buffer(struct ipath_devdata *dd);
-int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
-__be64 ipath_layer_get_guid(struct ipath_devdata *);
-u32 ipath_layer_get_nguid(struct ipath_devdata *);
-u32 ipath_layer_get_majrev(struct ipath_devdata *);
-u32 ipath_layer_get_minrev(struct ipath_devdata *);
-u32 ipath_layer_get_pcirev(struct ipath_devdata *);
-u32 ipath_layer_get_flags(struct ipath_devdata *dd);
-struct device *ipath_layer_get_device(struct ipath_devdata *dd);
-u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
-u32 ipath_layer_get_vendorid(struct ipath_devdata *);
-u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
-u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
-int ipath_layer_enable_timer(struct ipath_devdata *dd);
-int ipath_layer_disable_timer(struct ipath_devdata *dd);
-int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
-unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
-unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
-int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
-int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
-int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
-					 int sleep);
-int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
-int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
-int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
-u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
 
 /* ipath_ether interrupt values */
 #define IPATH_LAYER_INT_IF_UP 0x2
@@ -178,9 +66,6 @@
 #define IPATH_LAYER_INT_SEND_CONTINUE 0x10
 #define IPATH_LAYER_INT_BCAST 0x40
 
-/* _verbs_layer.l_flags */
-#define IPATH_VERBS_KERNEL_SMA 0x1
-
 extern unsigned ipath_debug; /* debugging bit mask */
 
 #endif				/* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index d340234..72d1db8 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -101,15 +101,15 @@
 	nip->num_ports = ibdev->phys_port_cnt;
 	/* This is already in network order */
 	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
-	nip->node_guid = ipath_layer_get_guid(dd);
+	nip->node_guid = dd->ipath_guid;
 	nip->port_guid = nip->sys_guid;
-	nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
-	nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
-	majrev = ipath_layer_get_majrev(dd);
-	minrev = ipath_layer_get_minrev(dd);
+	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+	majrev = dd->ipath_majrev;
+	minrev = dd->ipath_minrev;
 	nip->revision = cpu_to_be32((majrev << 16) | minrev);
 	nip->local_port_num = port;
-	vendor = ipath_layer_get_vendorid(dd);
+	vendor = dd->ipath_vendorid;
 	nip->vendor_id[0] = 0;
 	nip->vendor_id[1] = vendor >> 8;
 	nip->vendor_id[2] = vendor;
@@ -133,13 +133,89 @@
 	 */
 	if (startgx == 0)
 		/* The first is a copy of the read-only HW GUID. */
-		*p = ipath_layer_get_guid(to_idev(ibdev)->dd);
+		*p = to_idev(ibdev)->dd->ipath_guid;
 	else
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return reply(smp);
 }
 
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+	return (dd->ipath_ibcctrl >>
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+	unsigned v;
+
+	v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+		INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+	if (v != n) {
+		dd->ipath_ibcctrl &=
+			~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+			  INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+		dd->ipath_ibcctrl |=
+			(u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+	}
+	return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+	return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
 static int recv_subn_get_portinfo(struct ib_smp *smp,
 				  struct ib_device *ibdev, u8 port)
 {
@@ -166,7 +242,7 @@
 	    (dev->mkeyprot_resv_lmc >> 6) == 0)
 		pip->mkey = dev->mkey;
 	pip->gid_prefix = dev->gid_prefix;
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
 	pip->sm_lid = cpu_to_be16(dev->sm_lid);
 	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -177,14 +253,14 @@
 	pip->link_width_supported = 3;	/* 1x or 4x */
 	pip->link_width_active = 2;	/* 4x */
 	pip->linkspeed_portstate = 0x10;	/* 2.5Gbps */
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
 	pip->portphysstate_linkdown =
 		(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
-		(ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
+		(get_linkdowndefaultstate(dev->dd) ? 1 : 2);
 	pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
 	pip->linkspeedactive_enabled = 0x11;	/* 2.5Gbps, 2.5Gbps */
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -217,7 +293,7 @@
 	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
 	pip->pkey_violations =
-		cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
+		cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
 			     dev->z_pkey_violations) & 0xFFFF);
 	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
 	/* Only the hardware GUID is supported for now */
@@ -226,8 +302,8 @@
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
-		(ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
-		ipath_layer_get_overrunthreshold(dev->dd);
+		(get_phyerrthreshold(dev->dd) << 4) |
+		get_overrunthreshold(dev->dd);
 	/* pip->max_credit_hint; */
 	/* pip->link_roundtrip_latency[3]; */
 
@@ -237,6 +313,20 @@
 	return ret;
 }
 
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+	struct ipath_portdata *pd = dd->ipath_pd[0];
+
+	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+	return 0;
+}
+
 static int recv_subn_get_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@@ -249,9 +339,9 @@
 	memset(smp->data, 0, sizeof(smp->data));
 	if (startpx == 0) {
 		struct ipath_ibdev *dev = to_idev(ibdev);
-		unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+		unsigned i, n = ipath_get_npkeys(dev->dd);
 
-		ipath_layer_get_pkeys(dev->dd, p);
+		get_pkeys(dev->dd, p);
 
 		for (i = 0; i < n; i++)
 			q[i] = cpu_to_be16(p[i]);
@@ -269,6 +359,24 @@
 }
 
 /**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+	if (sleep)
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	else
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+			 dd->ipath_ibcctrl);
+	return 0;
+}
+
+/**
  * recv_subn_set_portinfo - set port information
  * @smp: the incoming SM packet
  * @ibdev: the infiniband device
@@ -290,7 +398,7 @@
 	u8 state;
 	u16 lstate;
 	u32 mtu;
-	int ret;
+	int ret, ore;
 
 	if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
 		goto err;
@@ -304,7 +412,7 @@
 	dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
 	lid = be16_to_cpu(pip->lid);
-	if (lid != ipath_layer_get_lid(dev->dd)) {
+	if (lid != dev->dd->ipath_lid) {
 		/* Must be a valid unicast LID address. */
 		if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
 			goto err;
@@ -342,11 +450,11 @@
 	case 0: /* NOP */
 		break;
 	case 1: /* SLEEP */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
+		if (set_linkdowndefaultstate(dev->dd, 1))
 			goto err;
 		break;
 	case 2: /* POLL */
-		if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
+		if (set_linkdowndefaultstate(dev->dd, 0))
 			goto err;
 		break;
 	default:
@@ -376,7 +484,7 @@
 		/* XXX We have already partially updated our state! */
 		goto err;
 	}
-	ipath_layer_set_mtu(dev->dd, mtu);
+	ipath_set_mtu(dev->dd, mtu);
 
 	dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
 
@@ -392,20 +500,16 @@
 	 * later.
 	 */
 	if (pip->pkey_violations == 0)
-		dev->z_pkey_violations =
-			ipath_layer_get_cr_errpkey(dev->dd);
+		dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
 
 	if (pip->qkey_violations == 0)
 		dev->qkey_violations = 0;
 
-	if (ipath_layer_set_phyerrthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors >> 4) & 0xF))
+	ore = pip->localphyerrors_overrunerrors;
+	if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
 		goto err;
 
-	if (ipath_layer_set_overrunthreshold(
-		    dev->dd,
-		    (pip->localphyerrors_overrunerrors & 0xF)))
+	if (set_overrunthreshold(dev->dd, (ore & 0xF)))
 		goto err;
 
 	dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -423,7 +527,7 @@
 	 * is down or is being set to down.
 	 */
 	state = pip->linkspeed_portstate & 0xF;
-	flags = ipath_layer_get_flags(dev->dd);
+	flags = dev->dd->ipath_flags;
 	lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
 	if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
 		goto err;
@@ -439,7 +543,7 @@
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
 		if (lstate == 0)
-			if (ipath_layer_get_linkdowndefaultstate(dev->dd))
+			if (get_linkdowndefaultstate(dev->dd))
 				lstate = IPATH_IB_LINKDOWN_SLEEP;
 			else
 				lstate = IPATH_IB_LINKDOWN;
@@ -451,7 +555,7 @@
 			lstate = IPATH_IB_LINKDOWN_DISABLE;
 		else
 			goto err;
-		ipath_layer_set_linkstate(dev->dd, lstate);
+		ipath_set_linkstate(dev->dd, lstate);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@@ -460,7 +564,7 @@
 	case IB_PORT_ARMED:
 		if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
 		if (flags & IPATH_LINKACTIVE) {
 			event.event = IB_EVENT_PORT_ERR;
 			ib_dispatch_event(&event);
@@ -469,7 +573,7 @@
 	case IB_PORT_ACTIVE:
 		if (!(flags & IPATH_LINKARMED))
 			break;
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
 		event.event = IB_EVENT_PORT_ACTIVE;
 		ib_dispatch_event(&event);
 		break;
@@ -493,6 +597,152 @@
 	return ret;
 }
 
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (dd->ipath_pkeys[i] != key)
+			continue;
+		if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+			dd->ipath_pkeys[i] = 0;
+			ret = 1;
+			goto bail;
+		}
+		break;
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+	int i;
+	u16 lkey = key & 0x7FFF;
+	int any = 0;
+	int ret;
+
+	if (lkey == 0x7FFF) {
+		ret = 0;
+		goto bail;
+	}
+
+	/* Look for an empty slot or a matching PKEY. */
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i]) {
+			any++;
+			continue;
+		}
+		/* If it matches exactly, try to increment the ref count */
+		if (dd->ipath_pkeys[i] == key) {
+			if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+				ret = 0;
+				goto bail;
+			}
+			/* Lost the race. Look for an empty slot below. */
+			atomic_dec(&dd->ipath_pkeyrefs[i]);
+			any++;
+		}
+		/*
+		 * It makes no sense to have both the limited and unlimited
+		 * PKEY set at the same time since the unlimited one will
+		 * disable the limited one.
+		 */
+		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+			ret = -EEXIST;
+			goto bail;
+		}
+	}
+	if (!any) {
+		ret = -EBUSY;
+		goto bail;
+	}
+	for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+		if (!dd->ipath_pkeys[i] &&
+		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+			/* for ipathstats, etc. */
+			ipath_stats.sps_pkeys[i] = lkey;
+			dd->ipath_pkeys[i] = key;
+			ret = 1;
+			goto bail;
+		}
+	}
+	ret = -EBUSY;
+
+bail:
+	return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+	struct ipath_portdata *pd;
+	int i;
+	int changed = 0;
+
+	pd = dd->ipath_pd[0];
+
+	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+		u16 key = pkeys[i];
+		u16 okey = pd->port_pkeys[i];
+
+		if (key == okey)
+			continue;
+		/*
+		 * The value of this PKEY table entry is changing.
+		 * Remove the old entry in the hardware's array of PKEYs.
+		 */
+		if (okey & 0x7FFF)
+			changed |= rm_pkey(dd, okey);
+		if (key & 0x7FFF) {
+			int ret = add_pkey(dd, key);
+
+			if (ret < 0)
+				key = 0;
+			else
+				changed |= ret;
+		}
+		pd->port_pkeys[i] = key;
+	}
+	if (changed) {
+		u64 pkey;
+
+		pkey = (u64) dd->ipath_pkeys[0] |
+			((u64) dd->ipath_pkeys[1] << 16) |
+			((u64) dd->ipath_pkeys[2] << 32) |
+			((u64) dd->ipath_pkeys[3] << 48);
+		ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+			   (unsigned long long) pkey);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+				 pkey);
+	}
+	return 0;
+}
+
 static int recv_subn_set_pkeytable(struct ib_smp *smp,
 				   struct ib_device *ibdev)
 {
@@ -500,13 +750,12 @@
 	__be16 *p = (__be16 *) smp->data;
 	u16 *q = (u16 *) smp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	unsigned i, n = ipath_layer_get_npkeys(dev->dd);
+	unsigned i, n = ipath_get_npkeys(dev->dd);
 
 	for (i = 0; i < n; i++)
 		q[i] = be16_to_cpu(p[i]);
 
-	if (startpx != 0 ||
-	    ipath_layer_set_pkeys(dev->dd, q) != 0)
+	if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return recv_subn_get_pkeytable(smp, ibdev);
@@ -844,10 +1093,10 @@
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	u8 port_select = p->port_select;
 
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);
 
 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@@ -944,8 +1193,8 @@
 	u64 swords, rwords, spkts, rpkts, xwait;
 	u8 port_select = p->port_select;
 
-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);
 
 	/* Adjust counters for any resets done. */
 	swords -= dev->z_port_xmit_data;
@@ -978,13 +1227,13 @@
 	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
 		pmp->data;
 	struct ipath_ibdev *dev = to_idev(ibdev);
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 
 	/*
 	 * Since the HW doesn't support clearing counters, we save the
 	 * current count and subtract it from future responses.
 	 */
-	ipath_layer_get_counters(dev->dd, &cntrs);
+	ipath_get_counters(dev->dd, &cntrs);
 
 	if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
 		dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@@ -1041,8 +1290,8 @@
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	u64 swords, rwords, spkts, rpkts, xwait;
 
-	ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
-				      &rpkts, &xwait);
+	ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+				&rpkts, &xwait);
 
 	if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
 		dev->z_port_xmit_data = swords;
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 0000000..11b7378
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+	struct ipath_mmap_info *ip =
+		container_of(ref, struct ipath_mmap_info, ref);
+
+	vfree(ip->obj);
+	kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	kref_get(&ip->ref);
+	ip->mmap_cnt++;
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+	struct ipath_mmap_info *ip = vma->vm_private_data;
+
+	ip->mmap_cnt--;
+	kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+	.open =     ipath_vma_open,
+	.close =    ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct ipath_ibdev *dev = to_idev(context->device);
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct ipath_mmap_info *ip, **pp;
+	int ret = -EINVAL;
+
+	/*
+	 * Search the device's list of objects waiting for a mmap call.
+	 * Normally, this list is very short since a call to create a
+	 * CQ, QP, or SRQ is soon followed by a call to mmap().
+	 */
+	spin_lock_irq(&dev->pending_lock);
+	for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+		/* Only the creator is allowed to mmap the object */
+		if (context != ip->context || (void *) offset != ip->obj)
+			continue;
+		/* Don't allow a mmap larger than the object. */
+		if (size > ip->size)
+			break;
+
+		*pp = ip->next;
+		spin_unlock_irq(&dev->pending_lock);
+
+		ret = remap_vmalloc_range(vma, ip->obj, 0);
+		if (ret)
+			goto done;
+		vma->vm_ops = &ipath_vm_ops;
+		vma->vm_private_data = ip;
+		ipath_vma_open(vma);
+		goto done;
+	}
+	spin_unlock_irq(&dev->pending_lock);
+done:
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 4ac31a5..b36f6fb 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -36,6 +36,18 @@
 
 #include "ipath_verbs.h"
 
+/* Fast memory region */
+struct ipath_fmr {
+	struct ib_fmr ibfmr;
+	u8 page_shift;
+	struct ipath_mregion mr;        /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
 /**
  * ipath_get_dma_mr - get a DMA memory region
  * @pd: protection domain for this memory region
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 83e557b..224b0f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -35,7 +35,7 @@
 #include <linux/vmalloc.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 #define BITS_PER_PAGE		(PAGE_SIZE*BITS_PER_BYTE)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
@@ -44,19 +44,6 @@
 #define find_next_offset(map, off) find_next_zero_bit((map)->page, \
 						      BITS_PER_PAGE, off)
 
-#define TRANS_INVALID	0
-#define TRANS_ANY2RST	1
-#define TRANS_RST2INIT	2
-#define TRANS_INIT2INIT	3
-#define TRANS_INIT2RTR	4
-#define TRANS_RTR2RTS	5
-#define TRANS_RTS2RTS	6
-#define TRANS_SQERR2RTS	7
-#define TRANS_ANY2ERR	8
-#define TRANS_RTS2SQD	9  /* XXX Wait for expected ACKs & signal event */
-#define TRANS_SQD2SQD	10 /* error if not drained & parameter change */
-#define TRANS_SQD2RTS	11 /* error if not drained */
-
 /*
  * Convert the AETH credit code into the number of credits.
  */
@@ -287,7 +274,7 @@
 				free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				_VERBS_INFO("QP memory leak!\n");
+				ipath_dbg(KERN_INFO "QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@@ -355,8 +342,10 @@
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
-	qp->r_rq.head = 0;
-	qp->r_rq.tail = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
 	qp->r_reuse_sge = 0;
 }
 
@@ -373,8 +362,8 @@
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 
-	_VERBS_INFO("QP%d/%d in error state\n",
-		    qp->ibqp.qp_num, qp->remote_qpn);
+	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+		  qp->ibqp.qp_num, qp->remote_qpn);
 
 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
@@ -410,15 +399,32 @@
 	qp->s_hdrwords = 0;
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 
-	wc.opcode = IB_WC_RECV;
-	spin_lock(&qp->r_rq.lock);
-	while (qp->r_rq.tail != qp->r_rq.head) {
-		wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
-		if (++qp->r_rq.tail >= qp->r_rq.size)
-			qp->r_rq.tail = 0;
-		ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	if (qp->r_rq.wq) {
+		struct ipath_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		wc.opcode = IB_WC_RECV;
+		while (tail != head) {
+			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
 	}
-	spin_unlock(&qp->r_rq.lock);
 }
 
 /**
@@ -426,11 +432,12 @@
  * @ibqp: the queue pair who's attributes we're modifying
  * @attr: the new attributes
  * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
  *
  * Returns 0 on success, otherwise returns an errno.
  */
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask)
+		    int attr_mask, struct ib_udata *udata)
 {
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	struct ipath_qp *qp = to_iqp(ibqp);
@@ -448,19 +455,46 @@
 				attr_mask))
 		goto inval;
 
-	if (attr_mask & IB_QP_AV)
+	if (attr_mask & IB_QP_AV) {
 		if (attr->ah_attr.dlid == 0 ||
 		    attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
 			goto inval;
 
+		if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+		    (attr->ah_attr.grh.sgid_index > 1))
+			goto inval;
+	}
+
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
+		if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
 			goto inval;
 
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		if (attr->min_rnr_timer > 31)
 			goto inval;
 
+	if (attr_mask & IB_QP_PORT)
+		if (attr->port_num == 0 ||
+		    attr->port_num > ibqp->device->phys_port_cnt)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MTU)
+		if (attr->path_mtu > IB_MTU_4096)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		if (attr->max_rd_atomic > 1)
+			goto inval;
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE)
+		if (attr->path_mig_state != IB_MIG_MIGRATED &&
+		    attr->path_mig_state != IB_MIG_REARM)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
@@ -511,6 +545,9 @@
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		qp->r_min_rnr_timer = attr->min_rnr_timer;
 
+	if (attr_mask & IB_QP_TIMEOUT)
+		qp->timeout = attr->timeout;
+
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;
 
@@ -543,7 +580,7 @@
 	attr->dest_qp_num = qp->remote_qpn;
 	attr->qp_access_flags = qp->qp_access_flags;
 	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->r_rq.size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
 	attr->cap.max_send_sge = qp->s_max_sge;
 	attr->cap.max_recv_sge = qp->r_rq.max_sge;
 	attr->cap.max_inline_data = 0;
@@ -557,7 +594,7 @@
 	attr->max_dest_rd_atomic = 1;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
-	attr->timeout = 0;
+	attr->timeout = qp->timeout;
 	attr->retry_cnt = qp->s_retry_cnt;
 	attr->rnr_retry = qp->s_rnr_retry;
 	attr->alt_port_num = 0;
@@ -569,9 +606,10 @@
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	init_attr->sq_sig_type =
-		(qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
-		? IB_SIGNAL_REQ_WR : 0;
+	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	else
+		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
 	init_attr->qp_type = qp->ibqp.qp_type;
 	init_attr->port_num = 1;
 	return 0;
@@ -596,13 +634,23 @@
 	} else {
 		u32 min, max, x;
 		u32 credits;
+		struct ipath_rwq *wq = qp->r_rq.wq;
+		u32 head;
+		u32 tail;
 
+		/* sanity check pointers before trusting them */
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
 		/*
 		 * Compute the number of credits available (RWQEs).
 		 * XXX Not holding the r_rq.lock here so there is a small
 		 * chance that the pair of reads are not atomic.
 		 */
-		credits = qp->r_rq.head - qp->r_rq.tail;
+		credits = head - tail;
 		if ((int)credits < 0)
 			credits += qp->r_rq.size;
 		/*
@@ -679,27 +727,37 @@
 	case IB_QPT_UD:
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
-		qp = kmalloc(sizeof(*qp), GFP_KERNEL);
+		sz = sizeof(*qp);
+		if (init_attr->srq) {
+			struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+			sz += sizeof(*qp->r_sg_list) *
+				srq->rq.max_sge;
+		} else
+			sz += sizeof(*qp->r_sg_list) *
+				init_attr->cap.max_recv_sge;
+		qp = kmalloc(sz, GFP_KERNEL);
 		if (!qp) {
-			vfree(swq);
 			ret = ERR_PTR(-ENOMEM);
-			goto bail;
+			goto bail_swq;
 		}
 		if (init_attr->srq) {
+			sz = 0;
 			qp->r_rq.size = 0;
 			qp->r_rq.max_sge = 0;
 			qp->r_rq.wq = NULL;
+			init_attr->cap.max_recv_wr = 0;
+			init_attr->cap.max_recv_sge = 0;
 		} else {
 			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) +
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 				sizeof(struct ipath_rwqe);
-			qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
+			qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
 			if (!qp->r_rq.wq) {
-				kfree(qp);
-				vfree(swq);
 				ret = ERR_PTR(-ENOMEM);
-				goto bail;
+				goto bail_qp;
 			}
 		}
 
@@ -719,24 +777,19 @@
 		qp->s_wq = swq;
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
-		qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
-			1 << IPATH_S_SIGNAL_REQ_WR : 0;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+		else
+			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
 		err = ipath_alloc_qpn(&dev->qp_table, qp,
 				      init_attr->qp_type);
 		if (err) {
-			vfree(swq);
-			vfree(qp->r_rq.wq);
-			kfree(qp);
 			ret = ERR_PTR(err);
-			goto bail;
+			goto bail_rwq;
 		}
+		qp->ip = NULL;
 		ipath_reset_qp(qp);
-
-		/* Tell the core driver that the kernel SMA is present. */
-		if (init_attr->qp_type == IB_QPT_SMI)
-			ipath_layer_set_verbs_flags(dev->dd,
-						    IPATH_VERBS_KERNEL_SMA);
 		break;
 
 	default:
@@ -747,8 +800,63 @@
 
 	init_attr->cap.max_inline_data = 0;
 
-	ret = &qp->ibqp;
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) qp->r_rq.wq;
+		int err;
 
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_rwq;
+		}
+
+		if (qp->r_rq.wq) {
+			/* Allocate info for ipath_mmap(). */
+			ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+			if (!ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_rwq;
+			}
+			qp->ip = ip;
+			ip->context = ibpd->uobject->context;
+			ip->obj = qp->r_rq.wq;
+			kref_init(&ip->ref);
+			ip->mmap_cnt = 0;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      qp->r_rq.size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	}
+
+	spin_lock(&dev->n_qps_lock);
+	if (dev->n_qps_allocated == ib_ipath_max_qps) {
+		spin_unlock(&dev->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	dev->n_qps_allocated++;
+	spin_unlock(&dev->n_qps_lock);
+
+	ret = &qp->ibqp;
+	goto bail;
+
+bail_ip:
+	kfree(qp->ip);
+bail_rwq:
+	vfree(qp->r_rq.wq);
+bail_qp:
+	kfree(qp);
+bail_swq:
+	vfree(swq);
 bail:
 	return ret;
 }
@@ -768,15 +876,12 @@
 	struct ipath_ibdev *dev = to_idev(ibqp->device);
 	unsigned long flags;
 
-	/* Tell the core driver that the kernel SMA is gone. */
-	if (qp->ibqp.qp_type == IB_QPT_SMI)
-		ipath_layer_set_verbs_flags(dev->dd, 0);
-
-	spin_lock_irqsave(&qp->r_rq.lock, flags);
-	spin_lock(&qp->s_lock);
+	spin_lock_irqsave(&qp->s_lock, flags);
 	qp->state = IB_QPS_ERR;
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_lock(&dev->n_qps_lock);
+	dev->n_qps_allocated--;
+	spin_unlock(&dev->n_qps_lock);
 
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
@@ -797,8 +902,11 @@
 	if (atomic_read(&qp->refcount) != 0)
 		ipath_free_qp(&dev->qp_table, qp);
 
+	if (qp->ip)
+		kref_put(&qp->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
-	vfree(qp->r_rq.wq);
 	kfree(qp);
 	return 0;
 }
@@ -850,8 +958,8 @@
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-	_VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
-		    qp->ibqp.qp_num, qp->remote_qpn, wc->status);
+	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
 	/* XXX What if its already removed by the timeout code? */
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 774d161..a086540 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
@@ -540,7 +540,7 @@
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
@@ -557,7 +557,7 @@
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
+	hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -1323,8 +1323,7 @@
 		 * the eager header buffer size to 56 bytes so the last 4
 		 * bytes of the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 89df8f5..6e23b3d 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -36,8 +36,7 @@
 
 /*
  * This file should only be included by kernel source, and by the diags.  It
- * defines the registers, and their contents, for the InfiniPath HT-400
- * chip.
+ * defines the registers, and their contents, for InfiniPath chips.
  */
 
 /*
@@ -283,10 +282,12 @@
 #define INFINIPATH_XGXS_RESET          0x7ULL
 #define INFINIPATH_XGXS_MDIOADDR_MASK  0xfULL
 #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
 #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
 
-/* TID entries (memory), HT400-only */
+/* TID entries (memory), HT-only */
 #define INFINIPATH_RT_VALID 0x8000000000000000ULL
 #define INFINIPATH_RT_ADDR_SHIFT 0
 #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 772bc59..5c1da2d 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,54 @@
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	qp->r_len = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   &qp->r_sg_list[j], &wqe->sg_list[i],
+				   IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		qp->r_len += wqe->sg_list[i].length;
+		j++;
+	}
+	qp->r_sge.sge = qp->r_sg_list[0];
+	qp->r_sge.sg_list = qp->r_sg_list + 1;
+	qp->r_sge.num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
+
 /**
  * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
@@ -119,71 +167,71 @@
 {
 	unsigned long flags;
 	struct ipath_rq *rq;
+	struct ipath_rwq *wq;
 	struct ipath_srq *srq;
 	struct ipath_rwqe *wqe;
-	int ret = 1;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	int ret;
 
-	if (!qp->ibqp.srq) {
+	if (qp->ibqp.srq) {
+		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
+		rq = &srq->rq;
+	} else {
+		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
-		spin_lock_irqsave(&rq->lock, flags);
-
-		if (unlikely(rq->tail == rq->head)) {
-			ret = 0;
-			goto done;
-		}
-		wqe = get_rwqe_ptr(rq, rq->tail);
-		qp->r_wr_id = wqe->wr_id;
-		if (!wr_id_only) {
-			qp->r_sge.sge = wqe->sg_list[0];
-			qp->r_sge.sg_list = wqe->sg_list + 1;
-			qp->r_sge.num_sge = wqe->num_sge;
-			qp->r_len = wqe->length;
-		}
-		if (++rq->tail >= rq->size)
-			rq->tail = 0;
-		goto done;
 	}
 
-	srq = to_isrq(qp->ibqp.srq);
-	rq = &srq->rq;
 	spin_lock_irqsave(&rq->lock, flags);
-
-	if (unlikely(rq->tail == rq->head)) {
-		ret = 0;
-		goto done;
-	}
-	wqe = get_rwqe_ptr(rq, rq->tail);
+	wq = rq->wq;
+	tail = wq->tail;
+	/* Validate tail before using it since it is user writable. */
+	if (tail >= rq->size)
+		tail = 0;
+	do {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			ret = 0;
+			goto bail;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+	} while (!wr_id_only && !init_sge(qp, wqe));
 	qp->r_wr_id = wqe->wr_id;
-	if (!wr_id_only) {
-		qp->r_sge.sge = wqe->sg_list[0];
-		qp->r_sge.sg_list = wqe->sg_list + 1;
-		qp->r_sge.num_sge = wqe->num_sge;
-		qp->r_len = wqe->length;
-	}
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq->ibsrq.event_handler) {
-		struct ib_event ev;
+	wq->tail = tail;
+
+	ret = 1;
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
+			struct ib_event ev;
+
 			srq->limit = 0;
 			spin_unlock_irqrestore(&rq->lock, flags);
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 			goto bail;
 		}
 	}
-
-done:
 	spin_unlock_irqrestore(&rq->lock, flags);
+
 bail:
 	return ret;
 }
@@ -422,6 +470,15 @@
 		wake_up(&qp->wait);
 }
 
+static int want_buffer(struct ipath_devdata *dd)
+{
+	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			 dd->ipath_sendctrl);
+
+	return 0;
+}
+
 /**
  * ipath_no_bufs_available - tell the layer driver we need buffers
  * @qp: the QP that caused the problem
@@ -438,7 +495,7 @@
 		list_add_tail(&qp->piowait, &dev->piowait);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	/*
-	 * Note that as soon as ipath_layer_want_buffer() is called and
+	 * Note that as soon as want_buffer() is called and
 	 * possibly before it returns, ipath_ib_piobufavail()
 	 * could be called.  If we are still in the tasklet function,
 	 * tasklet_hi_schedule() will not call us until the next time
@@ -448,7 +505,7 @@
 	 */
 	clear_bit(IPATH_S_BUSY, &qp->s_flags);
 	tasklet_unlock(&qp->s_task);
-	ipath_layer_want_buffer(dev->dd);
+	want_buffer(dev->dd);
 	dev->n_piowait++;
 }
 
@@ -563,7 +620,7 @@
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
 	hdr->sgid.global.subnet_prefix = dev->gid_prefix;
-	hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd);
+	hdr->sgid.global.interface_id = dev->dd->ipath_guid;
 	hdr->dgid = grh->dgid;
 
 	/* GRH header size in 32-bit words. */
@@ -595,8 +652,7 @@
 	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
 		goto bail;
 
-	if (unlikely(qp->remote_ah_attr.dlid ==
-		     ipath_layer_get_lid(dev->dd))) {
+	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
 		ipath_ruc_loopback(qp);
 		goto clear;
 	}
@@ -663,8 +719,8 @@
 	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
 				       SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
-	bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index f760434..941e866 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -48,66 +48,39 @@
 			   struct ib_recv_wr **bad_wr)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	struct ipath_ibdev *dev = to_idev(ibsrq->device);
+	struct ipath_rwq *wq;
 	unsigned long flags;
 	int ret;
 
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > srq->rq.max_sge) {
+		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&srq->rq.lock, flags);
-		next = srq->rq.head + 1;
+		wq = srq->rq.wq;
+		next = wq->head + 1;
 		if (next >= srq->rq.size)
 			next = 0;
-		if (next == srq->rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&srq->rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
+		wqe = get_rwqe_ptr(&srq->rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(srq->ibsrq.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(&dev->lk_table,
-					   &wqe->sg_list[j],
-					   &wr->sg_list[i],
-					   IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&srq->rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		srq->rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 	}
 	ret = 0;
@@ -133,53 +106,95 @@
 
 	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	if (srq_init_attr->attr.max_wr == 0) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
 	    (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
 		ret = ERR_PTR(-EINVAL);
-		goto bail;
+		goto done;
 	}
 
 	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
 	if (!srq) {
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto done;
 	}
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 */
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
+	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
 		sizeof(struct ipath_rwqe);
-	srq->rq.wq = vmalloc(srq->rq.size * sz);
+	srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
-		kfree(srq);
 		ret = ERR_PTR(-ENOMEM);
-		goto bail;
+		goto bail_srq;
 	}
 
 	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See ipath_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		struct ipath_mmap_info *ip;
+		__u64 offset = (__u64) srq->rq.wq;
+		int err;
+
+		err = ib_copy_to_udata(udata, &offset, sizeof(offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_wq;
+		}
+
+		/* Allocate info for ipath_mmap(). */
+		ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+		if (!ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wq;
+		}
+		srq->ip = ip;
+		ip->context = ibpd->uobject->context;
+		ip->obj = srq->rq.wq;
+		kref_init(&ip->ref);
+		ip->mmap_cnt = 0;
+		ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+				      srq->rq.size * sz);
+		spin_lock_irq(&dev->pending_lock);
+		ip->next = dev->pending_mmaps;
+		dev->pending_mmaps = ip;
+		spin_unlock_irq(&dev->pending_lock);
+	} else
+		srq->ip = NULL;
+
+	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
 	 */
 	spin_lock_init(&srq->rq.lock);
-	srq->rq.head = 0;
-	srq->rq.tail = 0;
+	srq->rq.wq->head = 0;
+	srq->rq.wq->tail = 0;
 	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	srq->limit = srq_init_attr->attr.srq_limit;
 
-	ret = &srq->ibsrq;
-
 	dev->n_srqs_allocated++;
 
-bail:
+	ret = &srq->ibsrq;
+	goto done;
+
+bail_wq:
+	vfree(srq->rq.wq);
+
+bail_srq:
+	kfree(srq);
+
+done:
 	return ret;
 }
 
@@ -188,83 +203,130 @@
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
  */
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata)
 {
 	struct ipath_srq *srq = to_isrq(ibsrq);
-	unsigned long flags;
-	int ret;
-
-	if (attr_mask & IB_SRQ_MAX_WR)
-		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
-		    (attr->max_sge > srq->rq.max_sge)) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-	if (attr_mask & IB_SRQ_LIMIT)
-		if (attr->srq_limit >= srq->rq.size) {
-			ret = -EINVAL;
-			goto bail;
-		}
+	int ret = 0;
 
 	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct ipath_rwqe *wq, *p;
-		u32 sz, size, n;
+		struct ipath_rwq *owq;
+		struct ipath_rwq *wq;
+		struct ipath_rwqe *p;
+		u32 sz, size, n, head, tail;
+
+		/* Check that the requested sizes are below the limits. */
+		if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+		    ((attr_mask & IB_SRQ_LIMIT) ?
+		     attr->srq_limit : srq->limit) > attr->max_wr) {
+			ret = -EINVAL;
+			goto bail;
+		}
 
 		sz = sizeof(struct ipath_rwqe) +
-			attr->max_sge * sizeof(struct ipath_sge);
+			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc(size * sz);
+		wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
 		if (!wq) {
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		if (srq->rq.head < srq->rq.tail)
-			n = srq->rq.size + srq->rq.head - srq->rq.tail;
+		/*
+		 * Return the address of the RWQ as the offset to mmap.
+		 * See ipath_mmap() for details.
+		 */
+		if (udata && udata->inlen >= sizeof(__u64)) {
+			__u64 offset_addr;
+			__u64 offset = (__u64) wq;
+
+			ret = ib_copy_from_udata(&offset_addr, udata,
+						 sizeof(offset_addr));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+			udata->outbuf = (void __user *) offset_addr;
+			ret = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (ret) {
+				vfree(wq);
+				goto bail;
+			}
+		}
+
+		spin_lock_irq(&srq->rq.lock);
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		owq = srq->rq.wq;
+		head = owq->head;
+		if (head >= srq->rq.size)
+			head = 0;
+		tail = owq->tail;
+		if (tail >= srq->rq.size)
+			tail = 0;
+		n = head;
+		if (n < tail)
+			n += srq->rq.size - tail;
 		else
-			n = srq->rq.head - srq->rq.tail;
-		if (size <= n || size <= srq->limit) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			n -= tail;
+		if (size <= n) {
+			spin_unlock_irq(&srq->rq.lock);
 			vfree(wq);
 			ret = -EINVAL;
 			goto bail;
 		}
 		n = 0;
-		p = wq;
-		while (srq->rq.tail != srq->rq.head) {
+		p = wq->wq;
+		while (tail != head) {
 			struct ipath_rwqe *wqe;
 			int i;
 
-			wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
+			wqe = get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
-			p->length = wqe->length;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
 			p = (struct ipath_rwqe *)((char *) p + sz);
-			if (++srq->rq.tail >= srq->rq.size)
-				srq->rq.tail = 0;
+			if (++tail >= srq->rq.size)
+				tail = 0;
 		}
-		vfree(srq->rq.wq);
 		srq->rq.wq = wq;
 		srq->rq.size = size;
-		srq->rq.head = n;
-		srq->rq.tail = 0;
-		srq->rq.max_sge = attr->max_sge;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
+		wq->head = n;
+		wq->tail = 0;
+		if (attr_mask & IB_SRQ_LIMIT)
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
 
-	if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		srq->limit = attr->srq_limit;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
+		vfree(owq);
+
+		if (srq->ip) {
+			struct ipath_mmap_info *ip = srq->ip;
+			struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+
+			ip->obj = wq;
+			ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
+					      size * sz);
+			spin_lock_irq(&dev->pending_lock);
+			ip->next = dev->pending_mmaps;
+			dev->pending_mmaps = ip;
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	} else if (attr_mask & IB_SRQ_LIMIT) {
+		spin_lock_irq(&srq->rq.lock);
+		if (attr->srq_limit >= srq->rq.size)
+			ret = -EINVAL;
+		else
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
 	}
-	ret = 0;
 
 bail:
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 70351b7..30a8259 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -271,33 +271,6 @@
 		}
 	}
 
-	if (dd->ipath_nosma_bufs) {
-		dd->ipath_nosma_secs += 5;
-		if (dd->ipath_nosma_secs >= 30) {
-			ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
-				   "cancelling pending sends\n",
-				   dd->ipath_nosma_secs);
-			/*
-			 * issue an abort as well, in case we have a packet
-			 * stuck in launch fifo.  This could corrupt an
-			 * outgoing user packet in the worst case,
-			 * but this is a pretty catastrophic, anyway.
-			 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-					 INFINIPATH_S_ABORT);
-			ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-					     dd->ipath_piobcnt2k +
-					     dd->ipath_piobcnt4k -
-					     dd->ipath_lastport_piobuf);
-			/* start again, if necessary */
-			dd->ipath_nosma_secs = 0;
-		} else
-			ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
-				   "after %u seconds\n",
-				   dd->ipath_nosma_bufs,
-				   dd->ipath_nosma_secs);
-	}
-
 done:
 	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b98821d..e299148 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -35,7 +35,6 @@
 #include <linux/pci.h>
 
 #include "ipath_kernel.h"
-#include "ipath_layer.h"
 #include "ipath_common.h"
 
 /**
@@ -76,7 +75,7 @@
 static ssize_t show_version(struct device_driver *dev, char *buf)
 {
 	/* The string printed here is already newline-terminated. */
-	return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
+	return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
 }
 
 static ssize_t show_num_units(struct device_driver *dev, char *buf)
@@ -108,8 +107,8 @@
 	"Initted",
 	"Disabled",
 	"Admin_Disabled",
-	"OIB_SMA",
-	"SMA",
+	"", /* This used to be the old "OIB_SMA" status. */
+	"", /* This used to be the old "SMA" status. */
 	"Present",
 	"IB_link_up",
 	"IB_configured",
@@ -227,7 +226,6 @@
 	unit = dd->ipath_unit;
 
 	dd->ipath_mlid = mlid;
-	ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
 
 	goto bail;
 invalid:
@@ -467,7 +465,7 @@
 	if (ret < 0)
 		goto invalid;
 
-	r = ipath_layer_set_linkstate(dd, state);
+	r = ipath_set_linkstate(dd, state);
 	if (r < 0) {
 		ret = r;
 		goto bail;
@@ -502,7 +500,7 @@
 	if (ret < 0)
 		goto invalid;
 
-	r = ipath_layer_set_mtu(dd, mtu);
+	r = ipath_set_mtu(dd, mtu);
 	if (r < 0)
 		ret = r;
 
@@ -563,6 +561,33 @@
 	return ret;
 }
 
+static ssize_t store_rx_pol_inv(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf,
+			  size_t count)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int ret, r;
+	u16 val;
+
+	ret = ipath_parse_ushort(buf, &val);
+	if (ret < 0)
+		goto invalid;
+
+	r = ipath_set_rx_pol_inv(dd, val);
+	if (r < 0) {
+		ret = r;
+		goto bail;
+	}
+
+	goto bail;
+invalid:
+	ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+	return ret;
+}
+
+
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
 
@@ -589,6 +614,7 @@
 static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
 
 static struct attribute *dev_attributes[] = {
 	&dev_attr_guid.attr,
@@ -603,6 +629,7 @@
 	&dev_attr_boardversion.attr,
 	&dev_attr_unit.attr,
 	&dev_attr_enabled.attr,
+	&dev_attr_rx_pol_inv.attr,
 	NULL
 };
 
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index c33abea..0fd3cde 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -32,7 +32,7 @@
  */
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
@@ -261,8 +261,7 @@
 		 * size to 56 bytes so the last 4 bytes of
 		 * the BTH header (PSN) is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			psn = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 3466129..6991d1d 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -34,7 +34,54 @@
 #include <rdma/ib_smi.h>
 
 #include "ipath_verbs.h"
-#include "ipath_common.h"
+#include "ipath_kernel.h"
+
+static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+		    u32 *lengthp, struct ipath_sge_state *ss)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int user = to_ipd(qp->ibqp.pd)->user;
+	int i, j, ret;
+	struct ib_wc wc;
+
+	*lengthp = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if ((user && wqe->sg_list[i].lkey == 0) ||
+		    !ipath_lkey_ok(&dev->lk_table,
+				   j ? &ss->sg_list[j - 1] : &ss->sge,
+				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		*lengthp += wqe->sg_list[i].length;
+		j++;
+	}
+	ss->num_sge = j;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp_num = qp->ibqp.qp_num;
+	wc.src_qp = 0;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = 0;
+	wc.sl = 0;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	/* Signal solicited completion event. */
+	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
 
 /**
  * ipath_ud_loopback - handle send on loopback QPs
@@ -46,6 +93,8 @@
  *
  * This is called from ipath_post_ud_send() to forward a WQE addressed
  * to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
  */
 static void ipath_ud_loopback(struct ipath_qp *sqp,
 			      struct ipath_sge_state *ss,
@@ -60,7 +109,11 @@
 	struct ipath_srq *srq;
 	struct ipath_sge_state rsge;
 	struct ipath_sge *sge;
+	struct ipath_rwq *wq;
 	struct ipath_rwqe *wqe;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	u32 rlen;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
 	if (!qp)
@@ -94,6 +147,13 @@
 		wc->imm_data = 0;
 	}
 
+	if (wr->num_sge > 1) {
+		rsge.sg_list = kmalloc((wr->num_sge - 1) *
+					sizeof(struct ipath_sge),
+				       GFP_ATOMIC);
+	} else
+		rsge.sg_list = NULL;
+
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Note that it is safe to drop the lock after changing rq->tail
@@ -101,37 +161,52 @@
 	 */
 	if (qp->ibqp.srq) {
 		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
 		srq = NULL;
+		handler = NULL;
 		rq = &qp->r_rq;
 	}
+
 	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
-		dev->n_pkt_drops++;
-		goto done;
+	wq = rq->wq;
+	tail = wq->tail;
+	while (1) {
+		if (unlikely(tail == wq->head)) {
+			spin_unlock_irqrestore(&rq->lock, flags);
+			dev->n_pkt_drops++;
+			goto bail_sge;
+		}
+		wqe = get_rwqe_ptr(rq, tail);
+		if (++tail >= rq->size)
+			tail = 0;
+		if (init_sge(qp, wqe, &rlen, &rsge))
+			break;
+		wq->tail = tail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc->byte_len > wqe->length) {
+	if (wc->byte_len > rlen) {
 		spin_unlock_irqrestore(&rq->lock, flags);
 		dev->n_pkt_drops++;
-		goto done;
+		goto bail_sge;
 	}
+	wq->tail = tail;
 	wc->wr_id = wqe->wr_id;
-	rsge.sge = wqe->sg_list[0];
-	rsge.sg_list = wqe->sg_list + 1;
-	rsge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
+	if (handler) {
 		u32 n;
 
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
+		/*
+		 * validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
 		else
-			n = rq->head - rq->tail;
+			n -= tail;
 		if (n < srq->limit) {
 			struct ib_event ev;
 
@@ -140,12 +215,12 @@
 			ev.device = qp->ibqp.device;
 			ev.element.srq = qp->ibqp.srq;
 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
+			handler(&ev, srq->ibsrq.srq_context);
 		} else
 			spin_unlock_irqrestore(&rq->lock, flags);
 	} else
 		spin_unlock_irqrestore(&rq->lock, flags);
+
 	ah_attr = &to_iah(wr->wr.ud.ah)->attr;
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -186,7 +261,7 @@
 	wc->src_qp = sqp->ibqp.qp_num;
 	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc->pkey_index = 0;
-	wc->slid = ipath_layer_get_lid(dev->dd) |
+	wc->slid = dev->dd->ipath_lid |
 		(ah_attr->src_path_bits &
 		 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
 	wc->sl = ah_attr->sl;
@@ -196,6 +271,8 @@
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
 		       wr->send_flags & IB_SEND_SOLICITED);
 
+bail_sge:
+	kfree(rsge.sg_list);
 done:
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
@@ -276,7 +353,7 @@
 		ss.num_sge++;
 	}
 	/* Check for invalid packet size. */
-	if (len > ipath_layer_get_ibmtu(dev->dd)) {
+	if (len > dev->dd->ipath_ibmtu) {
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -298,7 +375,7 @@
 		dev->n_unicast_xmit++;
 		lid = ah_attr->dlid &
 			~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-		if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
+		if (unlikely(lid == dev->dd->ipath_lid)) {
 			/*
 			 * Pass in an uninitialized ib_wc to save stack
 			 * space.
@@ -327,7 +404,7 @@
 		qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
 			dev->gid_prefix;
 		qp->s_hdr.u.l.grh.sgid.global.interface_id =
-			ipath_layer_get_guid(dev->dd);
+			dev->dd->ipath_guid;
 		qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
 		/*
 		 * Don't worry about sending to locally attached multicast
@@ -357,7 +434,7 @@
 	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
 	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);	/* DEST LID */
 	qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
-	lid = ipath_layer_get_lid(dev->dd);
+	lid = dev->dd->ipath_lid;
 	if (lid) {
 		lid |= ah_attr->src_path_bits &
 			((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -368,7 +445,7 @@
 		bth0 |= 1 << 23;
 	bth0 |= extra_bytes << 20;
 	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
-		ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
+		ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
@@ -433,13 +510,9 @@
 	int opcode;
 	u32 hdrsize;
 	u32 pad;
-	unsigned long flags;
 	struct ib_wc wc;
 	u32 qkey;
 	u32 src_qp;
-	struct ipath_rq *rq;
-	struct ipath_srq *srq;
-	struct ipath_rwqe *wqe;
 	u16 dlid;
 	int header_in_data;
 
@@ -458,8 +531,7 @@
 		 * the eager header buffer size to 56 bytes so the last 12
 		 * bytes of the IB header is in the data buffer.
 		 */
-		header_in_data =
-			ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
+		header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
 		if (header_in_data) {
 			qkey = be32_to_cpu(((__be32 *) data)[1]);
 			src_qp = be32_to_cpu(((__be32 *) data)[2]);
@@ -547,19 +619,10 @@
 
 	/*
 	 * Get the next work request entry to find where to put the data.
-	 * Note that it is safe to drop the lock after changing rq->tail
-	 * since ipath_post_receive() won't fill the empty slot.
 	 */
-	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
-		rq = &srq->rq;
-	} else {
-		srq = NULL;
-		rq = &qp->r_rq;
-	}
-	spin_lock_irqsave(&rq->lock, flags);
-	if (rq->tail == rq->head) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (qp->r_reuse_sge)
+		qp->r_reuse_sge = 0;
+	else if (!ipath_get_rwqe(qp, 0)) {
 		/*
 		 * Count VL15 packets dropped due to no receive buffer.
 		 * Otherwise, count them as buffer overruns since usually,
@@ -573,39 +636,11 @@
 		goto bail;
 	}
 	/* Silently drop packets which are too big. */
-	wqe = get_rwqe_ptr(rq, rq->tail);
-	if (wc.byte_len > wqe->length) {
-		spin_unlock_irqrestore(&rq->lock, flags);
+	if (wc.byte_len > qp->r_len) {
+		qp->r_reuse_sge = 1;
 		dev->n_pkt_drops++;
 		goto bail;
 	}
-	wc.wr_id = wqe->wr_id;
-	qp->r_sge.sge = wqe->sg_list[0];
-	qp->r_sge.sg_list = wqe->sg_list + 1;
-	qp->r_sge.num_sge = wqe->num_sge;
-	if (++rq->tail >= rq->size)
-		rq->tail = 0;
-	if (srq && srq->ibsrq.event_handler) {
-		u32 n;
-
-		if (rq->head < rq->tail)
-			n = rq->size + rq->head - rq->tail;
-		else
-			n = rq->head - rq->tail;
-		if (n < srq->limit) {
-			struct ib_event ev;
-
-			srq->limit = 0;
-			spin_unlock_irqrestore(&rq->lock, flags);
-			ev.device = qp->ibqp.device;
-			ev.element.srq = qp->ibqp.srq;
-			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
-			srq->ibsrq.event_handler(&ev,
-						 srq->ibsrq.srq_context);
-		} else
-			spin_unlock_irqrestore(&rq->lock, flags);
-	} else
-		spin_unlock_irqrestore(&rq->lock, flags);
 	if (has_grh) {
 		ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
 			       sizeof(struct ib_grh));
@@ -614,6 +649,7 @@
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
+	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
 	wc.vendor_err = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index d70a9b6..b8381c5 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -33,15 +33,13 @@
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
 #include <linux/utsname.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
-/* Not static, because we don't want the compiler removing it */
-const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
-
 static unsigned int ib_ipath_qp_table_size = 251;
 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -52,10 +50,6 @@
 MODULE_PARM_DESC(lkey_table_size,
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
 
-unsigned int ib_ipath_debug;	/* debug mask */
-module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(debug, "Verbs debug mask");
-
 static unsigned int ib_ipath_max_pds = 0xFFFF;
 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_pds,
@@ -79,6 +73,10 @@
 		   S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
 
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
 unsigned int ib_ipath_max_sges = 0x60;
 module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
@@ -109,9 +107,9 @@
 		   uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
-MODULE_DESCRIPTION("QLogic InfiniPath driver");
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
 
 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = 0,
@@ -125,6 +123,16 @@
 	[IB_QPS_ERR] = 0,
 };
 
+struct ipath_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+						  *ibucontext)
+{
+	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
@@ -277,11 +285,12 @@
 			      struct ib_recv_wr **bad_wr)
 {
 	struct ipath_qp *qp = to_iqp(ibqp);
+	struct ipath_rwq *wq = qp->r_rq.wq;
 	unsigned long flags;
 	int ret;
 
 	/* Check that state is OK to post receive. */
-	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
+	if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 		*bad_wr = wr;
 		ret = -EINVAL;
 		goto bail;
@@ -290,59 +299,31 @@
 	for (; wr; wr = wr->next) {
 		struct ipath_rwqe *wqe;
 		u32 next;
-		int i, j;
+		int i;
 
-		if (wr->num_sge > qp->r_rq.max_sge) {
+		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
 		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = qp->r_rq.head + 1;
+		next = wq->head + 1;
 		if (next >= qp->r_rq.size)
 			next = 0;
-		if (next == qp->r_rq.tail) {
+		if (next == wq->tail) {
 			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 			*bad_wr = wr;
 			ret = -ENOMEM;
 			goto bail;
 		}
 
-		wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
+		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 		wqe->wr_id = wr->wr_id;
-		wqe->sg_list[0].mr = NULL;
-		wqe->sg_list[0].vaddr = NULL;
-		wqe->sg_list[0].length = 0;
-		wqe->sg_list[0].sge_length = 0;
-		wqe->length = 0;
-		for (i = 0, j = 0; i < wr->num_sge; i++) {
-			/* Check LKEY */
-			if (to_ipd(qp->ibqp.pd)->user &&
-			    wr->sg_list[i].lkey == 0) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			if (wr->sg_list[i].length == 0)
-				continue;
-			if (!ipath_lkey_ok(
-				    &to_idev(qp->ibqp.device)->lk_table,
-				    &wqe->sg_list[j], &wr->sg_list[i],
-				    IB_ACCESS_LOCAL_WRITE)) {
-				spin_unlock_irqrestore(&qp->r_rq.lock,
-						       flags);
-				*bad_wr = wr;
-				ret = -EINVAL;
-				goto bail;
-			}
-			wqe->length += wr->sg_list[i].length;
-			j++;
-		}
-		wqe->num_sge = j;
-		qp->r_rq.head = next;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		wq->head = next;
 		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 	}
 	ret = 0;
@@ -377,6 +358,9 @@
 	switch (qp->ibqp.qp_type) {
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:
+		if (ib_ipath_disable_sma)
+			break;
+		/* FALLTHROUGH */
 	case IB_QPT_UD:
 		ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 		break;
@@ -395,7 +379,7 @@
 }
 
 /**
- * ipath_ib_rcv - process and incoming packet
+ * ipath_ib_rcv - process an incoming packet
  * @arg: the device pointer
  * @rhdr: the header of the packet
  * @data: the packet data
@@ -404,9 +388,9 @@
  * This is called from ipath_kreceive() to process an incoming packet at
  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
  */
-static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+		  u32 tlen)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_ib_header *hdr = rhdr;
 	struct ipath_other_headers *ohdr;
 	struct ipath_qp *qp;
@@ -427,7 +411,7 @@
 	lid = be16_to_cpu(hdr->lrh[1]);
 	if (lid < IPATH_MULTICAST_LID_BASE) {
 		lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
-		if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
+		if (unlikely(lid != dev->dd->ipath_lid)) {
 			dev->rcv_errors++;
 			goto bail;
 		}
@@ -495,9 +479,8 @@
  * This is called from ipath_do_rcv_timer() at interrupt level to check for
  * QPs which need retransmits and to collect performance numbers.
  */
-static void ipath_ib_timer(void *arg)
+void ipath_ib_timer(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *resend = NULL;
 	struct list_head *last;
 	struct ipath_qp *qp;
@@ -539,19 +522,19 @@
 	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 	    --dev->pma_sample_start == 0) {
 		dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
-		ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
-					      &dev->ipath_rword,
-					      &dev->ipath_spkts,
-					      &dev->ipath_rpkts,
-					      &dev->ipath_xmit_wait);
+		ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+					&dev->ipath_rword,
+					&dev->ipath_spkts,
+					&dev->ipath_rpkts,
+					&dev->ipath_xmit_wait);
 	}
 	if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 		if (dev->pma_sample_interval == 0) {
 			u64 ta, tb, tc, td, te;
 
 			dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
-			ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
-						      &tc, &td, &te);
+			ipath_snapshot_counters(dev->dd, &ta, &tb,
+						&tc, &td, &te);
 
 			dev->ipath_sword = ta - dev->ipath_sword;
 			dev->ipath_rword = tb - dev->ipath_rword;
@@ -581,6 +564,362 @@
 	}
 }
 
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+	struct ipath_sge *sge = &ss->sge;
+
+	sge->vaddr += length;
+	sge->length -= length;
+	sge->sge_length -= length;
+	if (sge->sge_length == 0) {
+		if (--ss->num_sge)
+			*sge = *ss->sg_list++;
+	} else if (sge->length == 0 && sge->mr != NULL) {
+		if (++sge->n >= IPATH_SEGSZ) {
+			if (++sge->m >= sge->mr->mapsz)
+				return;
+			sge->n = 0;
+		}
+		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+	}
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+		    u32 length)
+{
+	u32 extra = 0;
+	u32 data = 0;
+	u32 last;
+
+	while (1) {
+		u32 len = ss->sge.length;
+		u32 off;
+
+		BUG_ON(len == 0);
+		if (len > length)
+			len = length;
+		if (len > ss->sge.sge_length)
+			len = ss->sge.sge_length;
+		/* If the source address is not aligned, try to align it. */
+		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+		if (off) {
+			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+					    ~(sizeof(u32) - 1));
+			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+			u32 y;
+
+			y = sizeof(u32) - off;
+			if (len > y)
+				len = y;
+			if (len + extra >= sizeof(u32)) {
+				data |= set_upper_bits(v, extra *
+						       BITS_PER_BYTE);
+				len = sizeof(u32) - extra;
+				if (len == length) {
+					last = data;
+					break;
+				}
+				__raw_writel(data, piobuf);
+				piobuf++;
+				extra = 0;
+				data = 0;
+			} else {
+				/* Clear unused upper bytes */
+				data |= clear_upper_bytes(v, len, extra);
+				if (len == length) {
+					last = data;
+					break;
+				}
+				extra += len;
+			}
+		} else if (extra) {
+			/* Source address is aligned. */
+			u32 *addr = (u32 *) ss->sge.vaddr;
+			int shift = extra * BITS_PER_BYTE;
+			int ushift = 32 - shift;
+			u32 l = len;
+
+			while (l >= sizeof(u32)) {
+				u32 v = *addr;
+
+				data |= set_upper_bits(v, shift);
+				__raw_writel(data, piobuf);
+				data = get_upper_bits(v, ushift);
+				piobuf++;
+				addr++;
+				l -= sizeof(u32);
+			}
+			/*
+			 * We still have 'extra' number of bytes leftover.
+			 */
+			if (l) {
+				u32 v = *addr;
+
+				if (l + extra >= sizeof(u32)) {
+					data |= set_upper_bits(v, shift);
+					len -= l + extra - sizeof(u32);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					__raw_writel(data, piobuf);
+					piobuf++;
+					extra = 0;
+					data = 0;
+				} else {
+					/* Clear unused upper bytes */
+					data |= clear_upper_bytes(v, l,
+								  extra);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					extra += l;
+				}
+			} else if (len == length) {
+				last = data;
+				break;
+			}
+		} else if (len == length) {
+			u32 w;
+
+			/*
+			 * Need to round up for the last dword in the
+			 * packet.
+			 */
+			w = (len + 3) >> 2;
+			__iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+			piobuf += w - 1;
+			last = ((u32 *) ss->sge.vaddr)[w - 1];
+			break;
+		} else {
+			u32 w = len >> 2;
+
+			__iowrite32_copy(piobuf, ss->sge.vaddr, w);
+			piobuf += w;
+
+			extra = len & (sizeof(u32) - 1);
+			if (extra) {
+				u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+				/* Clear unused upper bytes */
+				data = clear_upper_bytes(v, extra, 0);
+			}
+		}
+		update_sge(ss, len);
+		length -= len;
+	}
+	/* Update address before sending packet. */
+	update_sge(ss, length);
+	/* must flush early everything before trigger word */
+	ipath_flush_wc();
+	__raw_writel(last, piobuf);
+	/* be sure trigger word is written */
+	ipath_flush_wc();
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @dd: the infinipath device
+ * @hdrwords: the number of words in the header
+ * @hdr: the packet header
+ * @len: the length of the packet in bytes
+ * @ss: the SGE to send
+ */
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+		     u32 *hdr, u32 len, struct ipath_sge_state *ss)
+{
+	u32 __iomem *piobuf;
+	u32 plen;
+	int ret;
+
+	/* +1 is for the qword padding of pbc */
+	plen = hdrwords + ((len + 3) >> 2) + 1;
+	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
+		ipath_dbg("packet len 0x%x too long, failing\n", plen);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/* Get a PIO buffer to use. */
+	piobuf = ipath_getpiobuf(dd, NULL);
+	if (unlikely(piobuf == NULL)) {
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	/*
+	 * Write len to control qword, no flags.
+	 * We have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order.
+	 */
+	writeq(plen, piobuf);
+	ipath_flush_wc();
+	piobuf += 2;
+	if (len == 0) {
+		/*
+		 * If there is just the header portion, must flush before
+		 * writing last word of header for correctness, and after
+		 * the last header word (trigger word).
+		 */
+		__iowrite32_copy(piobuf, hdr, hdrwords - 1);
+		ipath_flush_wc();
+		__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+		ipath_flush_wc();
+		ret = 0;
+		goto bail;
+	}
+
+	__iowrite32_copy(piobuf, hdr, hdrwords);
+	piobuf += hdrwords;
+
+	/* The common case is aligned and contained in one segment. */
+	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+		u32 w;
+		u32 *addr = (u32 *) ss->sge.vaddr;
+
+		/* Update address before sending packet. */
+		update_sge(ss, len);
+		/* Need to round up for the last dword in the packet. */
+		w = (len + 3) >> 2;
+		__iowrite32_copy(piobuf, addr, w - 1);
+		/* must flush early everything before trigger word */
+		ipath_flush_wc();
+		__raw_writel(addr[w - 1], piobuf + w - 1);
+		/* be sure trigger word is written */
+		ipath_flush_wc();
+		ret = 0;
+		goto bail;
+	}
+	copy_io(piobuf, ss, len);
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+			    u64 *rwords, u64 *spkts, u64 *rpkts,
+			    u64 *xmit_wait)
+{
+	int ret;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+		ret = -EINVAL;
+		goto bail;
+	}
+	*swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	*rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	*spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+	*rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+	*xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+		       struct ipath_verbs_counters *cntrs)
+{
+	int ret;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* no hardware, freeze, etc. */
+		ipath_dbg("unit %u not usable\n", dd->ipath_unit);
+		ret = -EINVAL;
+		goto bail;
+	}
+	cntrs->symbol_error_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+	cntrs->link_error_recovery_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+	/*
+	 * The link downed counter counts when the other side downs the
+	 * connection.  We add in the number of times we downed the link
+	 * due to local link integrity errors to compensate.
+	 */
+	cntrs->link_downed_counter =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+	cntrs->port_rcv_errors =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+	cntrs->port_rcv_remphys_errors =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
+	cntrs->port_xmit_discards =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
+	cntrs->port_xmit_data =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	cntrs->port_rcv_data =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	cntrs->port_xmit_packets =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+	cntrs->port_rcv_packets =
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
+	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
 /**
  * ipath_ib_piobufavail - callback when a PIO buffer is available
  * @arg: the device pointer
@@ -591,9 +930,8 @@
  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
  * return zero).
  */
-static int ipath_ib_piobufavail(void *arg)
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *qp;
 	unsigned long flags;
 
@@ -624,14 +962,14 @@
 		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
 		IB_DEVICE_SYS_IMAGE_GUID;
 	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id = ipath_layer_get_vendorid(dev->dd);
-	props->vendor_part_id = ipath_layer_get_deviceid(dev->dd);
-	props->hw_ver = ipath_layer_get_pcirev(dev->dd);
+	props->vendor_id = dev->dd->ipath_vendorid;
+	props->vendor_part_id = dev->dd->ipath_deviceid;
+	props->hw_ver = dev->dd->ipath_pcirev;
 
 	props->sys_image_guid = dev->sys_image_guid;
 
 	props->max_mr_size = ~0ull;
-	props->max_qp = dev->qp_table.max;
+	props->max_qp = ib_ipath_max_qps;
 	props->max_qp_wr = ib_ipath_max_qp_wrs;
 	props->max_sge = ib_ipath_max_sges;
 	props->max_cq = ib_ipath_max_cqs;
@@ -647,7 +985,7 @@
 	props->max_srq_sge = ib_ipath_max_srq_sges;
 	/* props->local_ca_ack_delay */
 	props->atomic_cap = IB_ATOMIC_HCA;
-	props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
+	props->max_pkeys = ipath_get_npkeys(dev->dd);
 	props->max_mcast_grp = ib_ipath_max_mcast_grps;
 	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -672,12 +1010,17 @@
 	[INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
 };
 
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+	return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
 static int ipath_query_port(struct ib_device *ibdev,
 			    u8 port, struct ib_port_attr *props)
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	enum ib_mtu mtu;
-	u16 lid = ipath_layer_get_lid(dev->dd);
+	u16 lid = dev->dd->ipath_lid;
 	u64 ibcstat;
 
 	memset(props, 0, sizeof(*props));
@@ -685,16 +1028,16 @@
 	props->lmc = dev->mkeyprot_resv_lmc & 7;
 	props->sm_lid = dev->sm_lid;
 	props->sm_sl = dev->sm_sl;
-	ibcstat = ipath_layer_get_lastibcstat(dev->dd);
+	ibcstat = dev->dd->ipath_lastibcstat;
 	props->state = ((ibcstat >> 4) & 0x3) + 1;
 	/* See phys_state_show() */
 	props->phys_state = ipath_cvt_physportstate[
-		ipath_layer_get_lastibcstat(dev->dd) & 0xf];
+		dev->dd->ipath_lastibcstat & 0xf];
 	props->port_cap_flags = dev->port_cap_flags;
 	props->gid_tbl_len = 1;
 	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
-	props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
+	props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
+	props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
 		dev->z_pkey_violations;
 	props->qkey_viol_cntr = dev->qkey_violations;
 	props->active_width = IB_WIDTH_4X;
@@ -704,7 +1047,7 @@
 	props->init_type_reply = 0;
 
 	props->max_mtu = IB_MTU_4096;
-	switch (ipath_layer_get_ibmtu(dev->dd)) {
+	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -763,7 +1106,7 @@
 	dev->port_cap_flags |= props->set_port_cap_mask;
 	dev->port_cap_flags &= ~props->clr_port_cap_mask;
 	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+		ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
 	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
 		dev->qkey_violations = 0;
 	return 0;
@@ -780,7 +1123,7 @@
 		goto bail;
 	}
 	gid->global.subnet_prefix = dev->gid_prefix;
-	gid->global.interface_id = ipath_layer_get_guid(dev->dd);
+	gid->global.interface_id = dev->dd->ipath_guid;
 
 	ret = 0;
 
@@ -803,18 +1146,22 @@
 	 * we allow allocations of more than we report for this value.
 	 */
 
-	if (dev->n_pds_allocated == ib_ipath_max_pds) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
 	pd = kmalloc(sizeof *pd, GFP_KERNEL);
 	if (!pd) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
+	spin_lock(&dev->n_pds_lock);
+	if (dev->n_pds_allocated == ib_ipath_max_pds) {
+		spin_unlock(&dev->n_pds_lock);
+		kfree(pd);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
 	dev->n_pds_allocated++;
+	spin_unlock(&dev->n_pds_lock);
 
 	/* ib_alloc_pd() will initialize pd->ibpd. */
 	pd->user = udata != NULL;
@@ -830,7 +1177,9 @@
 	struct ipath_pd *pd = to_ipd(ibpd);
 	struct ipath_ibdev *dev = to_idev(ibpd->device);
 
+	spin_lock(&dev->n_pds_lock);
 	dev->n_pds_allocated--;
+	spin_unlock(&dev->n_pds_lock);
 
 	kfree(pd);
 
@@ -851,11 +1200,6 @@
 	struct ib_ah *ret;
 	struct ipath_ibdev *dev = to_idev(pd->device);
 
-	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
 	/* A multicast address requires a GRH (see ch. 8.4.1). */
 	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
 	    ah_attr->dlid != IPATH_PERMISSIVE_LID &&
@@ -881,7 +1225,16 @@
 		goto bail;
 	}
 
+	spin_lock(&dev->n_ahs_lock);
+	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+		spin_unlock(&dev->n_ahs_lock);
+		kfree(ah);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
 	dev->n_ahs_allocated++;
+	spin_unlock(&dev->n_ahs_lock);
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
@@ -903,7 +1256,9 @@
 	struct ipath_ibdev *dev = to_idev(ibah->device);
 	struct ipath_ah *ah = to_iah(ibah);
 
+	spin_lock(&dev->n_ahs_lock);
 	dev->n_ahs_allocated--;
+	spin_unlock(&dev->n_ahs_lock);
 
 	kfree(ah);
 
@@ -919,25 +1274,50 @@
 	return 0;
 }
 
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+	return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+	unsigned ret;
+
+	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+		ret = 0;
+	else
+		ret = dd->ipath_pd[0]->port_pkeys[index];
+
+	return ret;
+}
+
 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 			    u16 *pkey)
 {
 	struct ipath_ibdev *dev = to_idev(ibdev);
 	int ret;
 
-	if (index >= ipath_layer_get_npkeys(dev->dd)) {
+	if (index >= ipath_get_npkeys(dev->dd)) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	*pkey = ipath_layer_get_pkey(dev->dd, index);
+	*pkey = ipath_get_pkey(dev->dd, index);
 	ret = 0;
 
 bail:
 	return ret;
 }
 
-
 /**
  * ipath_alloc_ucontext - allocate a ucontest
  * @ibdev: the infiniband device
@@ -970,26 +1350,91 @@
 
 static int ipath_verbs_register_sysfs(struct ib_device *dev);
 
+static void __verbs_timer(unsigned long arg)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+	/*
+	 * If port 0 receive packet interrupts are not available, or
+	 * can be missed, poll the receive queue
+	 */
+	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
+		ipath_kreceive(dd);
+
+	/* Handle verbs layer timeouts. */
+	ipath_ib_timer(dd->verbs_dev);
+
+	mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+	/*
+	 * Early chips had a design flaw where the chip and kernel idea
+	 * of the tail register don't always agree, and therefore we won't
+	 * get an interrupt on the next packet received.
+	 * If the board supports per packet receive interrupts, use it.
+	 * Otherwise, the timer function periodically checks for packets
+	 * to cover this case.
+	 * Either way, the timer is needed for verbs layer related
+	 * processing.
+	 */
+	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+				 0x2074076542310ULL);
+		/* Enable GPIO bit 2 interrupt */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+				 (u64) (1 << 2));
+	}
+
+	init_timer(&dd->verbs_timer);
+	dd->verbs_timer.function = __verbs_timer;
+	dd->verbs_timer.data = (unsigned long)dd;
+	dd->verbs_timer.expires = jiffies + 1;
+	add_timer(&dd->verbs_timer);
+
+	return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+	/* Disable GPIO bit 2 interrupt */
+	if (dd->ipath_flags & IPATH_GPIO_INTR)
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+
+	del_timer_sync(&dd->verbs_timer);
+
+	return 0;
+}
+
 /**
  * ipath_register_ib_device - register our device with the infiniband core
- * @unit: the device number to register
  * @dd: the device data structure
  * Return the allocated ipath_ibdev pointer or NULL on error.
  */
-static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
+int ipath_register_ib_device(struct ipath_devdata *dd)
 {
-	struct ipath_layer_counters cntrs;
+	struct ipath_verbs_counters cntrs;
 	struct ipath_ibdev *idev;
 	struct ib_device *dev;
 	int ret;
 
 	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
-	if (idev == NULL)
+	if (idev == NULL) {
+		ret = -ENOMEM;
 		goto bail;
+	}
 
 	dev = &idev->ibdev;
 
 	/* Only need to initialize non-zero fields. */
+	spin_lock_init(&idev->n_pds_lock);
+	spin_lock_init(&idev->n_ahs_lock);
+	spin_lock_init(&idev->n_cqs_lock);
+	spin_lock_init(&idev->n_qps_lock);
+	spin_lock_init(&idev->n_srqs_lock);
+	spin_lock_init(&idev->n_mcast_grps_lock);
+
 	spin_lock_init(&idev->qp_table.lock);
 	spin_lock_init(&idev->lk_table.lock);
 	idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
@@ -1030,7 +1475,7 @@
 	idev->link_width_enabled = 3;	/* 1x or 4x */
 
 	/* Snapshot current HW counters to "clear" them. */
-	ipath_layer_get_counters(dd, &cntrs);
+	ipath_get_counters(dd, &cntrs);
 	idev->z_symbol_error_counter = cntrs.symbol_error_counter;
 	idev->z_link_error_recovery_counter =
 		cntrs.link_error_recovery_counter;
@@ -1054,14 +1499,14 @@
 	 * device types in the system, we can't be sure this is unique.
 	 */
 	if (!sys_image_guid)
-		sys_image_guid = ipath_layer_get_guid(dd);
+		sys_image_guid = dd->ipath_guid;
 	idev->sys_image_guid = sys_image_guid;
-	idev->ib_unit = unit;
+	idev->ib_unit = dd->ipath_unit;
 	idev->dd = dd;
 
 	strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
 	dev->owner = THIS_MODULE;
-	dev->node_guid = ipath_layer_get_guid(dd);
+	dev->node_guid = dd->ipath_guid;
 	dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
 	dev->uverbs_cmd_mask =
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
@@ -1093,9 +1538,9 @@
 		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	dev->node_type = IB_NODE_CA;
+	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
-	dev->dma_device = ipath_layer_get_device(dd);
+	dev->dma_device = &dd->pcidev->dev;
 	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
@@ -1137,9 +1582,10 @@
 	dev->attach_mcast = ipath_multicast_attach;
 	dev->detach_mcast = ipath_multicast_detach;
 	dev->process_mad = ipath_process_mad;
+	dev->mmap = ipath_mmap;
 
 	snprintf(dev->node_desc, sizeof(dev->node_desc),
-		 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
+		 IPATH_IDSTR " %s", system_utsname.nodename);
 
 	ret = ib_register_device(dev);
 	if (ret)
@@ -1148,7 +1594,7 @@
 	if (ipath_verbs_register_sysfs(dev))
 		goto err_class;
 
-	ipath_layer_enable_timer(dd);
+	enable_timer(dd);
 
 	goto bail;
 
@@ -1160,37 +1606,32 @@
 	kfree(idev->qp_table.table);
 err_qp:
 	ib_dealloc_device(dev);
-	_VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
-		     unit, -ret);
+	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 	idev = NULL;
 
 bail:
-	return idev;
+	dd->verbs_dev = idev;
+	return ret;
 }
 
-static void ipath_unregister_ib_device(void *arg)
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 {
-	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ib_device *ibdev = &dev->ibdev;
 
-	ipath_layer_disable_timer(dev->dd);
+	disable_timer(dev->dd);
 
 	ib_unregister_device(ibdev);
 
 	if (!list_empty(&dev->pending[0]) ||
 	    !list_empty(&dev->pending[1]) ||
 	    !list_empty(&dev->pending[2]))
-		_VERBS_ERROR("ipath%d pending list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "pending list not empty!\n");
 	if (!list_empty(&dev->piowait))
-		_VERBS_ERROR("ipath%d piowait list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "piowait list not empty!\n");
 	if (!list_empty(&dev->rnrwait))
-		_VERBS_ERROR("ipath%d rnrwait list not empty!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
 	if (!ipath_mcast_tree_empty())
-		_VERBS_ERROR("ipath%d multicast table memory leak!\n",
-			     dev->ib_unit);
+		ipath_dev_err(dev->dd, "multicast table memory leak!\n");
 	/*
 	 * Note that ipath_unregister_ib_device() can be called before all
 	 * the QPs are destroyed!
@@ -1201,25 +1642,12 @@
 	ib_dealloc_device(ibdev);
 }
 
-static int __init ipath_verbs_init(void)
-{
-	return ipath_verbs_register(ipath_register_ib_device,
-				    ipath_unregister_ib_device,
-				    ipath_ib_piobufavail, ipath_ib_rcv,
-				    ipath_ib_timer);
-}
-
-static void __exit ipath_verbs_cleanup(void)
-{
-	ipath_verbs_unregister();
-}
-
 static ssize_t show_rev(struct class_device *cdev, char *buf)
 {
 	struct ipath_ibdev *dev =
 		container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
-	return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd));
+	return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
 }
 
 static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1228,7 +1656,7 @@
 		container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 	int ret;
 
-	ret = ipath_layer_get_boardname(dev->dd, buf, 128);
+	ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
 	if (ret < 0)
 		goto bail;
 	strcat(buf, "\n");
@@ -1305,6 +1733,3 @@
 bail:
 	return ret;
 }
-
-module_init(ipath_verbs_init);
-module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 2df6847..09bbb3f 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,10 +38,10 @@
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kref.h>
 #include <rdma/ib_pack.h>
 
 #include "ipath_layer.h"
-#include "verbs_debug.h"
 
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -50,7 +50,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IPATH_UVERBS_ABI_VERSION       1
+#define IPATH_UVERBS_ABI_VERSION       2
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -152,19 +152,6 @@
 	int n_attached;
 };
 
-/* Memory region */
-struct ipath_mr {
-	struct ib_mr ibmr;
-	struct ipath_mregion mr;	/* must be last */
-};
-
-/* Fast memory region */
-struct ipath_fmr {
-	struct ib_fmr ibfmr;
-	u8 page_shift;
-	struct ipath_mregion mr;	/* must be last */
-};
-
 /* Protection domain */
 struct ipath_pd {
 	struct ib_pd ibpd;
@@ -178,58 +165,89 @@
 };
 
 /*
- * Quick description of our CQ/QP locking scheme:
- *
- * We have one global lock that protects dev->cq/qp_table.  Each
- * struct ipath_cq/qp also has its own lock.  An individual qp lock
- * may be taken inside of an individual cq lock.  Both cqs attached to
- * a qp may be locked, with the send cq locked first.  No other
- * nesting should be done.
- *
- * Each struct ipath_cq/qp also has an atomic_t ref count.  The
- * pointer from the cq/qp_table to the struct counts as one reference.
- * This reference also is good for access through the consumer API, so
- * modifying the CQ/QP etc doesn't need to take another reference.
- * Access because of a completion being polled does need a reference.
- *
- * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
- * destroy function to sleep on.
- *
- * This means that access from the consumer API requires nothing but
- * taking the struct's lock.
- *
- * Access because of a completion event should go as follows:
- * - lock cq/qp_table and look up struct
- * - increment ref count in struct
- * - drop cq/qp_table lock
- * - lock struct, do your thing, and unlock struct
- * - decrement ref count; if zero, wake up waiters
- *
- * To destroy a CQ/QP, we can do the following:
- * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
- * - decrement ref count
- * - wait_event until ref count is zero
- *
- * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
- * QP is destroyed.  Also, the consumer must make sure that calls to
- * qp_modify are serialized.
- *
- * Possible optimizations (wait for profile data to see if/where we
- * have locks bouncing between CPUs):
- * - split cq/qp table lock into n separate (cache-aligned) locks,
- *   indexed (say) by the page in the table
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
  */
+struct ipath_mmap_info {
+	struct ipath_mmap_info *next;
+	struct ib_ucontext *context;
+	void *obj;
+	struct kref ref;
+	unsigned size;
+	unsigned mmap_cnt;
+};
 
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct ipath_cq_wc {
+	u32 head;		/* index of next entry to fill */
+	u32 tail;		/* index of next ib_poll_cq() entry */
+	struct ib_wc queue[1];	/* this is actually size ibcq.cqe + 1 */
+};
+
+/*
+ * The completion queue structure.
+ */
 struct ipath_cq {
 	struct ib_cq ibcq;
 	struct tasklet_struct comptask;
 	spinlock_t lock;
 	u8 notify;
 	u8 triggered;
-	u32 head;		/* new records added to the head */
-	u32 tail;		/* poll_cq() reads from here. */
-	struct ib_wc *queue;	/* this is actually ibcq.cqe + 1 */
+	struct ipath_cq_wc *queue;
+	struct ipath_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+	void *vaddr;
+	size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ     (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+	struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+	u64 user_base;		/* User's address for this region */
+	u64 iova;		/* IB start address of this region */
+	size_t length;
+	u32 lkey;
+	u32 offset;		/* offset (bytes) to start of region */
+	int access_flags;
+	u32 max_segs;		/* number of ipath_segs in all the arrays */
+	u32 mapsz;		/* size of the map array */
+	struct ipath_segarray *map[0];	/* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+	struct ipath_mregion *mr;
+	void *vaddr;		/* current pointer into the segment */
+	u32 sge_length;		/* length of the SGE */
+	u32 length;		/* remaining length of the segment */
+	u16 m;			/* current index: mr->map[m] */
+	u16 n;			/* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+	struct ib_mr ibmr;
+	struct ipath_mregion mr;	/* must be last */
 };
 
 /*
@@ -248,32 +266,50 @@
 
 /*
  * Receive work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->r_max_sge.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
  */
 struct ipath_rwqe {
 	u64 wr_id;
-	u32 length;		/* total length of data in sg_list */
 	u8 num_sge;
-	struct ipath_sge sg_list[0];
+	struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
+	u32 head;		/* new work requests posted to the head */
+	u32 tail;		/* receives pull requests from here. */
+	struct ipath_rwqe wq[0];
 };
 
 struct ipath_rq {
+	struct ipath_rwq *wq;
 	spinlock_t lock;
-	u32 head;		/* new work requests posted to the head */
-	u32 tail;		/* receives pull requests from here. */
 	u32 size;		/* size of RWQE array */
 	u8 max_sge;
-	struct ipath_rwqe *wq;	/* RWQE array */
 };
 
 struct ipath_srq {
 	struct ib_srq ibsrq;
 	struct ipath_rq rq;
+	struct ipath_mmap_info *ip;
 	/* send signal when number of RWQEs < limit */
 	u32 limit;
 };
 
+struct ipath_sge_state {
+	struct ipath_sge *sg_list;      /* next SGE to be used if any */
+	struct ipath_sge sge;   /* progress state for the current SGE */
+	u8 num_sge;
+};
+
 /*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
@@ -293,6 +329,7 @@
 	atomic_t refcount;
 	wait_queue_head_t wait;
 	struct tasklet_struct s_task;
+	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
 	struct ipath_sge_state s_sge;	/* current send request data */
 	/* current RDMA read send data */
@@ -334,6 +371,7 @@
 	u8 s_retry;		/* requester retry counter */
 	u8 s_rnr_retry;		/* requester RNR retry counter */
 	u8 s_pkey_index;	/* PKEY index to use */
+	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
 	u32 qkey;		/* QKEY for this QP (for UD or RD) */
@@ -345,7 +383,8 @@
 	u32 s_ssn;		/* SSN of tail entry */
 	u32 s_lsn;		/* limit sequence number (credit) */
 	struct ipath_swqe *s_wq;	/* send work queue */
-	struct ipath_rq r_rq;	/* receive work queue */
+	struct ipath_rq r_rq;		/* receive work queue */
+	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
 /*
@@ -369,15 +408,15 @@
 
 /*
  * Since struct ipath_rwqe is not a fixed size, we can't simply index into
- * struct ipath_rq.wq.  This function does the array index computation.
+ * struct ipath_rwq.wq.  This function does the array index computation.
  */
 static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
 					      unsigned n)
 {
 	return (struct ipath_rwqe *)
-		((char *) rq->wq +
+		((char *) rq->wq->wq +
 		 (sizeof(struct ipath_rwqe) +
-		  rq->max_sge * sizeof(struct ipath_sge)) * n);
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
 }
 
 /*
@@ -417,6 +456,7 @@
 	struct ib_device ibdev;
 	struct list_head dev_list;
 	struct ipath_devdata *dd;
+	struct ipath_mmap_info *pending_mmaps;
 	int ib_unit;		/* This is the device number */
 	u16 sm_lid;		/* in host order */
 	u8 sm_sl;
@@ -435,11 +475,20 @@
 	__be64 sys_image_guid;	/* in network order */
 	__be64 gid_prefix;	/* in network order */
 	__be64 mkey;
+
 	u32 n_pds_allocated;	/* number of PDs allocated for device */
+	spinlock_t n_pds_lock;
 	u32 n_ahs_allocated;	/* number of AHs allocated for device */
+	spinlock_t n_ahs_lock;
 	u32 n_cqs_allocated;	/* number of CQs allocated for device */
+	spinlock_t n_cqs_lock;
+	u32 n_qps_allocated;	/* number of QPs allocated for device */
+	spinlock_t n_qps_lock;
 	u32 n_srqs_allocated;	/* number of SRQs allocated for device */
+	spinlock_t n_srqs_lock;
 	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+	spinlock_t n_mcast_grps_lock;
+
 	u64 ipath_sword;	/* total dwords sent (sample result) */
 	u64 ipath_rword;	/* total dwords received (sample result) */
 	u64 ipath_spkts;	/* total packets sent (sample result) */
@@ -494,8 +543,19 @@
 	struct ipath_opcode_stats opstats[128];
 };
 
-struct ipath_ucontext {
-	struct ib_ucontext ibucontext;
+struct ipath_verbs_counters {
+	u64 symbol_error_counter;
+	u64 link_error_recovery_counter;
+	u64 link_downed_counter;
+	u64 port_rcv_errors;
+	u64 port_rcv_remphys_errors;
+	u64 port_xmit_discards;
+	u64 port_xmit_data;
+	u64 port_rcv_data;
+	u64 port_xmit_packets;
+	u64 port_rcv_packets;
+	u32 local_link_integrity_errors;
+	u32 excessive_buffer_overrun_errors;
 };
 
 static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -503,11 +563,6 @@
 	return container_of(ibmr, struct ipath_mr, ibmr);
 }
 
-static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct ipath_fmr, ibfmr);
-}
-
 static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
 {
 	return container_of(ibpd, struct ipath_pd, ibpd);
@@ -545,12 +600,6 @@
 		      struct ib_grh *in_grh,
 		      struct ib_mad *in_mad, struct ib_mad *out_mad);
 
-static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct ipath_ucontext, ibucontext);
-}
-
 /*
  * Compare the lower 24 bits of the two values.
  * Returns an integer <, ==, or > than zero.
@@ -562,6 +611,13 @@
 
 struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
 
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+			    u64 *rwords, u64 *spkts, u64 *rpkts,
+			    u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+		       struct ipath_verbs_counters *cntrs);
+
 int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 
 int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -579,7 +635,7 @@
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		    int attr_mask);
+		    int attr_mask, struct ib_udata *udata);
 
 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		   int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -592,6 +648,9 @@
 
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
+int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
+		     u32 *hdr, u32 len, struct ipath_sge_state *ss);
+
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
 int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
@@ -638,7 +697,8 @@
 				struct ib_udata *udata);
 
 int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask,
+		     struct ib_udata *udata);
 
 int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
@@ -680,6 +740,10 @@
 
 int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
 
+void ipath_release_mmap_info(struct kref *ref);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
 void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
 
 void ipath_insert_rnr_queue(struct ipath_qp *qp);
@@ -700,6 +764,22 @@
 int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 		      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+void ipath_ib_timer(struct ipath_ibdev *);
+
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
+
 extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
 
 extern const u8 ipath_cvt_physportstate[];
@@ -714,6 +794,8 @@
 
 extern unsigned int ib_ipath_max_qp_wrs;
 
+extern unsigned int ib_ipath_max_qps;
+
 extern unsigned int ib_ipath_max_sges;
 
 extern unsigned int ib_ipath_max_mcast_grps;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index ee0e1d9..085e28b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -207,12 +207,17 @@
 		goto bail;
 	}
 
+	spin_lock(&dev->n_mcast_grps_lock);
 	if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+		spin_unlock(&dev->n_mcast_grps_lock);
 		ret = ENOMEM;
 		goto bail;
 	}
 
 	dev->n_mcast_grps_allocated++;
+	spin_unlock(&dev->n_mcast_grps_lock);
+
+	mcast->n_attached++;
 
 	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
 
@@ -343,7 +348,9 @@
 		atomic_dec(&mcast->refcount);
 		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
 		ipath_mcast_free(mcast);
+		spin_lock(&dev->n_mcast_grps_lock);
 		dev->n_mcast_grps_allocated--;
+		spin_unlock(&dev->n_mcast_grps_lock);
 	}
 
 	ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 0000000..036fde6
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only.  Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is ordered
+ *
+ * PowerPC systems (at least those in the 970 processor family)
+ * write partially filled store buffers in address order, but will write
+ * completely filled store buffers in "random" order, and therefore must
+ * have serialization for correctness with current InfiniPath chips.
+ *
+ */
+int ipath_unordered_wc(void)
+{
+	return 1;
+}
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644
index 6186676..0000000
--- a/drivers/infiniband/hw/ipath/verbs_debug.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _VERBS_DEBUG_H
-#define _VERBS_DEBUG_H
-
-/*
- * This file contains tracing code for the ib_ipath kernel module.
- */
-#ifndef _VERBS_DEBUGGING	/* tracing enabled or not */
-#define _VERBS_DEBUGGING 1
-#endif
-
-extern unsigned ib_ipath_debug;
-
-#define _VERBS_ERROR(fmt,...) \
-	do { \
-		printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_UNIT_ERROR(unit,fmt,...) \
-	do { \
-		printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
-	} while(0)
-
-#if _VERBS_DEBUGGING
-
-/*
- * Mask values for debugging.  The scheme allows us to compile out any
- * of the debug tracing stuff, and if compiled in, to enable or
- * disable dynamically.
- * This can be set at modprobe time also:
- *      modprobe ib_path ib_ipath_debug=3
- */
-
-#define __VERBS_INFO        0x1	/* generic low verbosity stuff */
-#define __VERBS_DBG         0x2	/* generic debug */
-#define __VERBS_VDBG        0x4	/* verbose debug */
-#define __VERBS_SMADBG      0x8000	/* sma packet debug */
-
-#define _VERBS_INFO(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
-			printk(KERN_INFO "%s: " fmt,"ib_ipath", \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_DBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_VDBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#define _VERBS_SMADBG(fmt,...) \
-	do { \
-		if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
-			printk(KERN_DEBUG "%s: " fmt, __func__, \
-			       ##__VA_ARGS__); \
-	} while(0)
-
-#else /* ! _VERBS_DEBUGGING */
-
-#define _VERBS_INFO(fmt,...)
-#define _VERBS_DBG(fmt,...)
-#define _VERBS_VDBG(fmt,...)
-#define _VERBS_SMADBG(fmt,...)
-
-#endif /* _VERBS_DEBUGGING */
-
-#endif /* _VERBS_DEBUG_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e215041..6959945 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -90,7 +90,7 @@
 	case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
 	case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
 	case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
-	default:		      return port_rate;
+	default:		      return mult_to_ib_rate(port_rate);
 	}
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index c3bec74..cd044ea 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -34,6 +34,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include "mthca_dev.h"
 
@@ -48,9 +49,41 @@
 
 static DEFINE_SPINLOCK(catas_lock);
 
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int catas_reset_disable;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
+
+static void catas_reset(void *work_ptr)
+{
+	struct mthca_dev *dev, *tmpdev;
+	LIST_HEAD(tlist);
+	int ret;
+
+	mutex_lock(&mthca_device_mutex);
+
+	spin_lock_irq(&catas_lock);
+	list_splice_init(&catas_list, &tlist);
+	spin_unlock_irq(&catas_lock);
+
+	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+		ret = __mthca_restart_one(dev->pdev);
+		if (ret)
+			mthca_err(dev, "Reset failed (%d)\n", ret);
+		else
+			mthca_dbg(dev, "Reset succeeded\n");
+	}
+
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static void handle_catas(struct mthca_dev *dev)
 {
 	struct ib_event event;
+	unsigned long flags;
 	const char *type;
 	int i;
 
@@ -82,6 +115,14 @@
 	for (i = 0; i < dev->catas_err.size; ++i)
 		mthca_err(dev, "  buf[%02x]: %08x\n",
 			  i, swab32(readl(dev->catas_err.map + i)));
+
+	if (catas_reset_disable)
+		return;
+
+	spin_lock_irqsave(&catas_lock, flags);
+	list_add(&dev->catas_err.list, &catas_list);
+	queue_work(catas_wq, &catas_work);
+	spin_unlock_irqrestore(&catas_lock, flags);
 }
 
 static void poll_catas(unsigned long dev_ptr)
@@ -135,6 +176,7 @@
 	dev->catas_err.timer.data     = (unsigned long) dev;
 	dev->catas_err.timer.function = poll_catas;
 	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+	INIT_LIST_HEAD(&dev->catas_err.list);
 	add_timer(&dev->catas_err.timer);
 }
 
@@ -153,4 +195,24 @@
 				    dev->catas_err.addr),
 				   dev->catas_err.size * 4);
 	}
+
+	spin_lock_irq(&catas_lock);
+	list_del(&dev->catas_err.list);
+	spin_unlock_irq(&catas_lock);
+}
+
+int __init mthca_catas_init(void)
+{
+	INIT_WORK(&catas_work, catas_reset, NULL);
+
+	catas_wq = create_singlethread_workqueue("mthca_catas");
+	if (!catas_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+	destroy_workqueue(catas_wq);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index deabc14..99a94d7 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -34,7 +34,7 @@
  * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <linux/sched.h>
+#include <linux/completion.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <asm/io.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 3e27a08..e393681 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -544,11 +544,11 @@
 		wq = &(*cur_qp)->rq;
 		wqe = be32_to_cpu(cqe->wqe);
 		wqe_index = wqe >> wq->wqe_shift;
-               /*
-		* WQE addr == base - 1 might be reported in receive completion
-		* with error instead of (rq size - 1) by Sinai FW 1.0.800 and
-		* Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
-		*/
+		/*
+		 * WQE addr == base - 1 might be reported in receive completion
+		 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+		 * Arbel FW 5.1.400.  This bug should be fixed in later FW revs.
+		 */
 		if (unlikely(wqe_index < 0))
 			wqe_index = wq->max - 1;
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index f8160b8..fe5cecf 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -45,6 +45,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/timer.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
 
 #include <asm/semaphore.h>
 
@@ -283,8 +284,11 @@
 	unsigned long		stop;
 	u32			size;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
+extern struct mutex mthca_device_mutex;
+
 struct mthca_dev {
 	struct ib_device  ib_dev;
 	struct pci_dev   *pdev;
@@ -450,6 +454,9 @@
 
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
 
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
@@ -506,7 +513,7 @@
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask);
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
@@ -521,7 +528,8 @@
 		    enum ib_event_type event_type);
 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		   struct ib_qp_init_attr *qp_init_attr);
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index d9bc030..45e106f 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -119,7 +119,7 @@
 
 			mthca_update_rate(to_mdev(ibdev), port_num);
 			update_sm_ah(to_mdev(ibdev), port_num,
-				     be16_to_cpu(pinfo->lid),
+				     be16_to_cpu(pinfo->sm_lid),
 				     pinfo->neighbormtu_mastersmsl & 0xf);
 
 			event.device           = ibdev;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 7b82c19..47ea021 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -80,6 +80,8 @@
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
+struct mutex mthca_device_mutex;
+
 static const char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -978,28 +980,15 @@
 					MTHCA_FLAG_SINAI_OPT }
 };
 
-static int __devinit mthca_init_one(struct pci_dev *pdev,
-				    const struct pci_device_id *id)
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 {
-	static int mthca_version_printed = 0;
 	int ddr_hidden = 0;
 	int err;
 	struct mthca_dev *mdev;
 
-	if (!mthca_version_printed) {
-		printk(KERN_INFO "%s", mthca_version);
-		++mthca_version_printed;
-	}
-
 	printk(KERN_INFO PFX "Initializing %s\n",
 	       pci_name(pdev));
 
-	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
-		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
-		       pci_name(pdev), id->driver_data);
-		return -ENODEV;
-	}
-
 	err = pci_enable_device(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -1065,7 +1054,7 @@
 
 	mdev->pdev = pdev;
 
-	mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
+	mdev->mthca_flags = mthca_hca_table[hca_type].flags;
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
@@ -1099,13 +1088,13 @@
 	if (err)
 		goto err_cmd;
 
-	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
+	if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
 		mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
 			   (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
 			   (int) (mdev->fw_ver & 0xffff),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
-			   (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+			   (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
 		mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
 	}
 
@@ -1122,6 +1111,7 @@
 		goto err_unregister;
 
 	pci_set_drvdata(pdev, mdev);
+	mdev->hca_type = hca_type;
 
 	return 0;
 
@@ -1166,7 +1156,7 @@
 	return err;
 }
 
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void __mthca_remove_one(struct pci_dev *pdev)
 {
 	struct mthca_dev *mdev = pci_get_drvdata(pdev);
 	u8 status;
@@ -1211,6 +1201,51 @@
 	}
 }
 
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+	struct mthca_dev *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return -ENODEV;
+	__mthca_remove_one(pdev);
+	return __mthca_init_one(pdev, mdev->hca_type);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	static int mthca_version_printed = 0;
+	int ret;
+
+	mutex_lock(&mthca_device_mutex);
+
+	if (!mthca_version_printed) {
+		printk(KERN_INFO "%s", mthca_version);
+		++mthca_version_printed;
+	}
+
+	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+		       pci_name(pdev), id->driver_data);
+		mutex_unlock(&mthca_device_mutex);
+		return -ENODEV;
+	}
+
+	ret = __mthca_init_one(pdev, id->driver_data);
+
+	mutex_unlock(&mthca_device_mutex);
+
+	return ret;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+	mutex_lock(&mthca_device_mutex);
+	__mthca_remove_one(pdev);
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static struct pci_device_id mthca_pci_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
 	  .driver_data = TAVOR },
@@ -1248,13 +1283,24 @@
 {
 	int ret;
 
+	mutex_init(&mthca_device_mutex);
+	ret = mthca_catas_init();
+	if (ret)
+		return ret;
+
 	ret = pci_register_driver(&mthca_driver);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		mthca_catas_cleanup();
+		return ret;
+	}
+
+	return 0;
 }
 
 static void __exit mthca_cleanup(void)
 {
 	pci_unregister_driver(&mthca_driver);
+	mthca_catas_cleanup();
 }
 
 module_init(mthca_init);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 265b1d1..981fe2e 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1288,7 +1288,7 @@
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
-	dev->ib_dev.node_type            = IB_NODE_CA;
+	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
 	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 2e8f6f3..5e5c58b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -408,7 +408,7 @@
 	ib_ah_attr->sl       	  = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
 	ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
 	ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
-						     path->static_rate & 0x7,
+						     path->static_rate & 0xf,
 						     ib_ah_attr->port_num);
 	ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
 	if (ib_ah_attr->ah_flags) {
@@ -472,10 +472,14 @@
 	if (qp->transport == RC || qp->transport == UC) {
 		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
 		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+		qp_attr->alt_pkey_index =
+			be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+		qp_attr->alt_port_num 	= qp_attr->alt_ah_attr.port_num;
 	}
 
-	qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
-	qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+	qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+	qp_attr->port_num   =
+		(be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
 
 	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
 	qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
@@ -486,11 +490,9 @@
 		1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
 	qp_attr->min_rnr_timer 	    =
 		(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
-	qp_attr->port_num 	    = qp_attr->ah_attr.port_num;
 	qp_attr->timeout 	    = context->pri_path.ackto >> 3;
 	qp_attr->retry_cnt 	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
 	qp_attr->rnr_retry 	    = context->pri_path.rnr_retry >> 5;
-	qp_attr->alt_port_num 	    = qp_attr->alt_ah_attr.port_num;
 	qp_attr->alt_timeout 	    = context->alt_path.ackto >> 3;
 	qp_init_attr->cap 	    = qp_attr->cap;
 
@@ -527,7 +529,8 @@
 	return 0;
 }
 
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+		    struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -842,11 +845,10 @@
 	 * entries and reinitialize the QP.
 	 */
 	if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+		mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
 			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
 		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+			mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
 
 		mthca_wq_reset(&qp->sq);
 		qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b60a9d7..0f316c8 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -358,7 +358,7 @@
 }
 
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		     enum ib_srq_attr_mask attr_mask)
+		     enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 8e92198..8b72848 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -60,7 +60,7 @@
 	ret = mthca_alloc_init(&dev->uar_table.alloc,
 			       dev->limits.num_uars,
 			       dev->limits.num_uars - 1,
-			       dev->limits.reserved_uars);
+			       dev->limits.reserved_uars + 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 474aa21..0b8a79d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,8 @@
 extern int ipoib_sendq_size;
 extern int ipoib_recvq_size;
 
+extern struct ib_sa_client ipoib_sa_client;
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 extern int ipoib_debug_level;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5033666..f426a69 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -169,117 +169,129 @@
 	return 0;
 }
 
-static void ipoib_ib_handle_wc(struct net_device *dev,
-			       struct ib_wc *wc)
+static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
+	struct sk_buff *skb;
+	dma_addr_t addr;
+
+	ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
+		       wr_id, wc->opcode, wc->status);
+
+	if (unlikely(wr_id >= ipoib_recvq_size)) {
+		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_recvq_size);
+		return;
+	}
+
+	skb  = priv->rx_ring[wr_id].skb;
+	addr = priv->rx_ring[wr_id].mapping;
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			ipoib_warn(priv, "failed recv event "
+				   "(status=%d, wrid=%d vend_err %x)\n",
+				   wc->status, wr_id, wc->vendor_err);
+		dma_unmap_single(priv->ca->dma_device, addr,
+				 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(skb);
+		priv->rx_ring[wr_id].skb = NULL;
+		return;
+	}
+
+	/*
+	 * If we can't allocate a new RX buffer, dump
+	 * this packet and reuse the old buffer.
+	 */
+	if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+		++priv->stats.rx_dropped;
+		goto repost;
+	}
+
+	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+		       wc->byte_len, wc->slid);
+
+	dma_unmap_single(priv->ca->dma_device, addr,
+			 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+
+	skb_put(skb, wc->byte_len);
+	skb_pull(skb, IB_GRH_BYTES);
+
+	if (wc->slid != priv->local_lid ||
+	    wc->src_qp != priv->qp->qp_num) {
+		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+		skb->mac.raw = skb->data;
+		skb_pull(skb, IPOIB_ENCAP_LEN);
+
+		dev->last_rx = jiffies;
+		++priv->stats.rx_packets;
+		priv->stats.rx_bytes += skb->len;
+
+		skb->dev = dev;
+		/* XXX get correct PACKET_ type here */
+		skb->pkt_type = PACKET_HOST;
+		netif_rx_ni(skb);
+	} else {
+		ipoib_dbg_data(priv, "dropping loopback packet\n");
+		dev_kfree_skb_any(skb);
+	}
+
+repost:
+	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+		ipoib_warn(priv, "ipoib_ib_post_receive failed "
+			   "for buf %d\n", wr_id);
+}
+
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	unsigned int wr_id = wc->wr_id;
+	struct ipoib_tx_buf *tx_req;
+	unsigned long flags;
 
-	ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+	ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
 		       wr_id, wc->opcode, wc->status);
 
-	if (wr_id & IPOIB_OP_RECV) {
-		wr_id &= ~IPOIB_OP_RECV;
-
-		if (wr_id < ipoib_recvq_size) {
-			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
-			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
-
-			if (unlikely(wc->status != IB_WC_SUCCESS)) {
-				if (wc->status != IB_WC_WR_FLUSH_ERR)
-					ipoib_warn(priv, "failed recv event "
-						   "(status=%d, wrid=%d vend_err %x)\n",
-						   wc->status, wr_id, wc->vendor_err);
-				dma_unmap_single(priv->ca->dma_device, addr,
-						 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-				dev_kfree_skb_any(skb);
-				priv->rx_ring[wr_id].skb = NULL;
-				return;
-			}
-
-			/*
-			 * If we can't allocate a new RX buffer, dump
-			 * this packet and reuse the old buffer.
-			 */
-			if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
-				++priv->stats.rx_dropped;
-				goto repost;
-			}
-
-			ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
-				       wc->byte_len, wc->slid);
-
-			dma_unmap_single(priv->ca->dma_device, addr,
-					 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-
-			skb_put(skb, wc->byte_len);
-			skb_pull(skb, IB_GRH_BYTES);
-
-			if (wc->slid != priv->local_lid ||
-			    wc->src_qp != priv->qp->qp_num) {
-				skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-				skb->mac.raw = skb->data;
-				skb_pull(skb, IPOIB_ENCAP_LEN);
-
-				dev->last_rx = jiffies;
-				++priv->stats.rx_packets;
-				priv->stats.rx_bytes += skb->len;
-
-				skb->dev = dev;
-				/* XXX get correct PACKET_ type here */
-				skb->pkt_type = PACKET_HOST;
-				netif_rx_ni(skb);
-			} else {
-				ipoib_dbg_data(priv, "dropping loopback packet\n");
-				dev_kfree_skb_any(skb);
-			}
-
-		repost:
-			if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
-				ipoib_warn(priv, "ipoib_ib_post_receive failed "
-					   "for buf %d\n", wr_id);
-		} else
-			ipoib_warn(priv, "completion event with wrid %d\n",
-				   wr_id);
-
-	} else {
-		struct ipoib_tx_buf *tx_req;
-		unsigned long flags;
-
-		if (wr_id >= ipoib_sendq_size) {
-			ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
-				   wr_id, ipoib_sendq_size);
-			return;
-		}
-
-		ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
-
-		tx_req = &priv->tx_ring[wr_id];
-
-		dma_unmap_single(priv->ca->dma_device,
-				 pci_unmap_addr(tx_req, mapping),
-				 tx_req->skb->len,
-				 DMA_TO_DEVICE);
-
-		++priv->stats.tx_packets;
-		priv->stats.tx_bytes += tx_req->skb->len;
-
-		dev_kfree_skb_any(tx_req->skb);
-
-		spin_lock_irqsave(&priv->tx_lock, flags);
-		++priv->tx_tail;
-		if (netif_queue_stopped(dev) &&
-		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
-		    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
-			netif_wake_queue(dev);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
-
-		if (wc->status != IB_WC_SUCCESS &&
-		    wc->status != IB_WC_WR_FLUSH_ERR)
-			ipoib_warn(priv, "failed send event "
-				   "(status=%d, wrid=%d vend_err %x)\n",
-				   wc->status, wr_id, wc->vendor_err);
+	if (unlikely(wr_id >= ipoib_sendq_size)) {
+		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
+			   wr_id, ipoib_sendq_size);
+		return;
 	}
+
+	tx_req = &priv->tx_ring[wr_id];
+
+	dma_unmap_single(priv->ca->dma_device,
+			 pci_unmap_addr(tx_req, mapping),
+			 tx_req->skb->len,
+			 DMA_TO_DEVICE);
+
+	++priv->stats.tx_packets;
+	priv->stats.tx_bytes += tx_req->skb->len;
+
+	dev_kfree_skb_any(tx_req->skb);
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
+	++priv->tx_tail;
+	if (netif_queue_stopped(dev) &&
+	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
+	    priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+		netif_wake_queue(dev);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	if (wc->status != IB_WC_SUCCESS &&
+	    wc->status != IB_WC_WR_FLUSH_ERR)
+		ipoib_warn(priv, "failed send event "
+			   "(status=%d, wrid=%d vend_err %x)\n",
+			   wc->status, wr_id, wc->vendor_err);
+}
+
+static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
+{
+	if (wc->wr_id & IPOIB_OP_RECV)
+		ipoib_ib_handle_rx_wc(dev, wc);
+	else
+		ipoib_ib_handle_tx_wc(dev, wc);
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
@@ -320,7 +332,7 @@
 	struct ipoib_tx_buf *tx_req;
 	dma_addr_t addr;
 
-	if (skb->len > dev->mtu + INFINIBAND_ALEN) {
+	if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
 			   skb->len, dev->mtu + INFINIBAND_ALEN);
 		++priv->stats.tx_dropped;
@@ -619,8 +631,10 @@
 	 * The device could have been brought down between the start and when
 	 * we get here, don't bring it back up if it's not configured up
 	 */
-	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
 		ipoib_ib_dev_up(dev);
+		ipoib_mcast_restart_task(dev);
+	}
 
 	mutex_lock(&priv->vlan_mutex);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf71d2a..1eaf00e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,7 +40,6 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
@@ -82,6 +81,8 @@
 
 struct workqueue_struct *ipoib_workqueue;
 
+struct ib_sa_client ipoib_sa_client;
+
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 
@@ -336,7 +337,8 @@
 	struct ipoib_path *path, *tp;
 	LIST_HEAD(remove_list);
 
-	spin_lock_irq(&priv->lock);
+	spin_lock_irq(&priv->tx_lock);
+	spin_lock(&priv->lock);
 
 	list_splice(&priv->path_list, &remove_list);
 	INIT_LIST_HEAD(&priv->path_list);
@@ -347,12 +349,15 @@
 	list_for_each_entry_safe(path, tp, &remove_list, list) {
 		if (path->query)
 			ib_sa_cancel_query(path->query_id, path->query);
-		spin_unlock_irq(&priv->lock);
+		spin_unlock(&priv->lock);
+		spin_unlock_irq(&priv->tx_lock);
 		wait_for_completion(&path->done);
 		path_free(dev, path);
-		spin_lock_irq(&priv->lock);
+		spin_lock_irq(&priv->tx_lock);
+		spin_lock(&priv->lock);
 	}
-	spin_unlock_irq(&priv->lock);
+	spin_unlock(&priv->lock);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 static void path_rec_completion(int status,
@@ -459,7 +464,7 @@
 	init_completion(&path->done);
 
 	path->query_id =
-		ib_sa_path_rec_get(priv->ca, priv->port,
+		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
 				   &path->pathrec,
 				   IB_SA_PATH_REC_DGID		|
 				   IB_SA_PATH_REC_SGID		|
@@ -615,7 +620,7 @@
 	struct ipoib_neigh *neigh;
 	unsigned long flags;
 
-	if (!spin_trylock_irqsave(&priv->tx_lock, flags))
+	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
 		return NETDEV_TX_LOCKED;
 
 	/*
@@ -628,7 +633,7 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	if (skb->dst && skb->dst->neighbour) {
+	if (likely(skb->dst && skb->dst->neighbour)) {
 		if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
 			ipoib_path_lookup(skb, dev);
 			goto out;
@@ -1107,13 +1112,16 @@
 	struct ipoib_dev_priv *priv;
 	int s, e, p;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
 	if (!dev_list)
 		return;
 
 	INIT_LIST_HEAD(dev_list);
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -1137,6 +1145,9 @@
 	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list;
 
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
 	dev_list = ib_get_client_data(device, &ipoib_client);
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
@@ -1181,13 +1192,16 @@
 		goto err_fs;
 	}
 
+	ib_sa_register_client(&ipoib_sa_client);
+
 	ret = ib_register_client(&ipoib_client);
 	if (ret)
-		goto err_wq;
+		goto err_sa;
 
 	return 0;
 
-err_wq:
+err_sa:
+	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
 
 err_fs:
@@ -1199,6 +1213,7 @@
 static void __exit ipoib_cleanup_module(void)
 {
 	ib_unregister_client(&ipoib_client);
+	ib_sa_unregister_client(&ipoib_sa_client);
 	ipoib_unregister_debugfs();
 	destroy_workqueue(ipoib_workqueue);
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ec356ce..3faa182 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -361,7 +361,7 @@
 
 	init_completion(&mcast->done);
 
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
+	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
 				     IB_SA_MCMEMBER_REC_MGID		|
 				     IB_SA_MCMEMBER_REC_PORT_GID	|
 				     IB_SA_MCMEMBER_REC_PKEY		|
@@ -472,22 +472,32 @@
 
 	if (create) {
 		comp_mask |=
-			IB_SA_MCMEMBER_REC_QKEY		|
-			IB_SA_MCMEMBER_REC_SL		|
-			IB_SA_MCMEMBER_REC_FLOW_LABEL	|
-			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+			IB_SA_MCMEMBER_REC_QKEY			|
+			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
+			IB_SA_MCMEMBER_REC_MTU			|
+			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
+			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
+			IB_SA_MCMEMBER_REC_RATE			|
+			IB_SA_MCMEMBER_REC_SL			|
+			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
+			IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
 		rec.qkey	  = priv->broadcast->mcmember.qkey;
+		rec.mtu_selector  = IB_SA_EQ;
+		rec.mtu		  = priv->broadcast->mcmember.mtu;
+		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+		rec.rate_selector = IB_SA_EQ;
+		rec.rate	  = priv->broadcast->mcmember.rate;
 		rec.sl		  = priv->broadcast->mcmember.sl;
 		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
-		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
 	init_completion(&mcast->done);
 
-	ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
-				     mcast->backoff * 1000, GFP_ATOMIC,
-				     ipoib_mcast_join_complete,
+	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
+				     &rec, comp_mask, mcast->backoff * 1000,
+				     GFP_ATOMIC, ipoib_mcast_join_complete,
 				     mcast, &mcast->query);
 
 	if (ret < 0) {
@@ -528,7 +538,7 @@
 			priv->local_rate = attr.active_speed *
 				ib_width_enum_to_int(attr.active_width);
 		} else
-			ipoib_warn(priv, "ib_query_port failed\n");
+		ipoib_warn(priv, "ib_query_port failed\n");
 	}
 
 	if (!priv->broadcast) {
@@ -681,7 +691,7 @@
 	 * Just make one shot at leaving and don't wait for a reply;
 	 * if we fail, too bad.
 	 */
-	ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec,
+	ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
 					IB_SA_MCMEMBER_REC_MGID		|
 					IB_SA_MCMEMBER_REC_PORT_GID	|
 					IB_SA_MCMEMBER_REC_PKEY		|
@@ -795,7 +805,7 @@
 	}
 
 	if (priv->broadcast) {
- 		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
 		list_add_tail(&priv->broadcast->list, &remove_list);
 		priv->broadcast = NULL;
 	}
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index fead87d..365a1b5 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_ISER
 	tristate "ISCSI RDMA Protocol"
-	depends on INFINIBAND && SCSI
+	depends on INFINIBAND && SCSI && INET
 	select SCSI_ISCSI_ATTRS
 	---help---
 	  Support for the ISCSI RDMA Protocol over InfiniBand.  This
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 1437d7e..2a14fe2 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -141,18 +141,11 @@
 
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
 		BUG_ON(ctask->total_length == 0);
-		/* bytes to be sent via RDMA operations */
-		iser_ctask->rdma_data_count = ctask->total_length -
-					 ctask->imm_count -
-					 ctask->unsol_count;
 
-		debug_scsi("cmd [itt %x total %d imm %d unsol_data %d "
-			   "rdma_data %d]\n",
+		debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n",
 			   ctask->itt, ctask->total_length, ctask->imm_count,
-			   ctask->unsol_count, iser_ctask->rdma_data_count);
-	} else
-		/* bytes to be sent via RDMA operations */
-		iser_ctask->rdma_data_count = ctask->total_length;
+			   ctask->unsol_count);
+	}
 
 	iser_ctask_rdma_init(iser_ctask);
 }
@@ -196,13 +189,10 @@
 {
 	struct iscsi_data  hdr;
 	int error = 0;
-	struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
 
 	/* Send data-out PDUs while there's still unsolicited data to send */
 	while (ctask->unsol_count > 0) {
-		iscsi_prep_unsolicit_data_pdu(ctask, &hdr,
-					      iser_ctask->rdma_data_count);
-
+		iscsi_prep_unsolicit_data_pdu(ctask, &hdr);
 		debug_scsi("Sending data-out: itt 0x%x, data count %d\n",
 			   hdr.itt, ctask->data_count);
 
@@ -555,6 +545,7 @@
 	.queuecommand           = iscsi_queuecommand,
 	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
 	.sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
+	.max_sectors		= 1024,
 	.cmd_per_lun            = ISCSI_MAX_CMD_PER_LUN,
 	.eh_abort_handler       = iscsi_eh_abort,
 	.eh_host_reset_handler	= iscsi_eh_host_reset,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 3350ba6..2cf9ae0 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -82,8 +82,12 @@
 		       __func__ , ## arg);		\
 	} while (0)
 
+#define SHIFT_4K	12
+#define SIZE_4K	(1UL << SHIFT_4K)
+#define MASK_4K	(~(SIZE_4K-1))
+
 					/* support upto 512KB in one RDMA */
-#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> PAGE_SHIFT)
+#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
 #define ISCSI_ISER_MAX_LUN		256
 #define ISCSI_ISER_MAX_CMD_LEN		16
 
@@ -171,6 +175,7 @@
 	u64  va;
 	u64  len;
 	void *mem_h;
+	int  is_fmr;
 };
 
 struct iser_regd_buf {
@@ -257,7 +262,6 @@
 struct iscsi_iser_cmd_task {
 	struct iser_desc             desc;
 	struct iscsi_iser_conn	     *iser_conn;
-	int			     rdma_data_count;/* RDMA bytes           */
 	enum iser_task_status 	     status;
 	int                          command_sent;  /* set if command  sent  */
 	int                          dir[ISER_DIRS_NUM];      /* set if dir use*/
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 31950a5..d0b03f4 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -42,6 +42,7 @@
 #include "iscsi_iser.h"
 
 #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+
 /**
  * Decrements the reference count for the
  * registered buffer & releases it
@@ -55,7 +56,7 @@
 	if ((atomic_read(&regd_buf->ref_count) == 0) ||
 	    atomic_dec_and_test(&regd_buf->ref_count)) {
 		/* if we used the dma mr, unreg is just NOP */
-		if (regd_buf->reg.rkey != 0)
+		if (regd_buf->reg.is_fmr)
 			iser_unreg_mem(&regd_buf->reg);
 
 		if (regd_buf->dma_addr) {
@@ -90,9 +91,9 @@
 	BUG_ON(dma_mapping_error(dma_addr));
 
 	regd_buf->reg.lkey = device->mr->lkey;
-	regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */
 	regd_buf->reg.len  = regd_buf->data_size;
 	regd_buf->reg.va   = dma_addr;
+	regd_buf->reg.is_fmr = 0;
 
 	regd_buf->dma_addr  = dma_addr;
 	regd_buf->direction = direction;
@@ -239,7 +240,7 @@
 	int i;
 
 	/* compute the offset of first element */
-	page_vec->offset = (u64) sg[0].offset;
+	page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
 
 	for (i = 0; i < data->dma_nents; i++) {
 		total_sz += sg_dma_len(&sg[i]);
@@ -247,21 +248,30 @@
 		first_addr = sg_dma_address(&sg[i]);
 		last_addr  = first_addr + sg_dma_len(&sg[i]);
 
-		start_aligned = !(first_addr & ~PAGE_MASK);
-		end_aligned   = !(last_addr  & ~PAGE_MASK);
+		start_aligned = !(first_addr & ~MASK_4K);
+		end_aligned   = !(last_addr  & ~MASK_4K);
 
 		/* continue to collect page fragments till aligned or SG ends */
 		while (!end_aligned && (i + 1 < data->dma_nents)) {
 			i++;
 			total_sz += sg_dma_len(&sg[i]);
 			last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]);
-			end_aligned = !(last_addr  & ~PAGE_MASK);
+			end_aligned = !(last_addr  & ~MASK_4K);
 		}
 
-		first_addr = first_addr & PAGE_MASK;
+		/* handle the 1st page in the 1st DMA element */
+		if (cur_page == 0) {
+			page = first_addr & MASK_4K;
+			page_vec->pages[cur_page] = page;
+			cur_page++;
+			page += SIZE_4K;
+		} else
+			page = first_addr;
 
-		for (page = first_addr; page < last_addr; page += PAGE_SIZE)
-			page_vec->pages[cur_page++] = page;
+		for (; page < last_addr; page += SIZE_4K) {
+			page_vec->pages[cur_page] = page;
+			cur_page++;
+		}
 
 	}
 	page_vec->data_size = total_sz;
@@ -269,8 +279,7 @@
 	return cur_page;
 }
 
-#define MASK_4K			((1UL << 12) - 1) /* 0xFFF */
-#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & MASK_4K) == 0)
+#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -320,9 +329,9 @@
 	struct scatterlist *sg = (struct scatterlist *)data->buf;
 	int i;
 
-	for (i = 0; i < data->size; i++)
+	for (i = 0; i < data->dma_nents; i++)
 		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
-			 "off:%d sz:%d dma_len:%d\n",
+			 "off:0x%x sz:0x%x dma_len:0x%x\n",
 			 i, (unsigned long)sg_dma_address(&sg[i]),
 			 sg[i].page, sg[i].offset,
 			 sg[i].length,sg_dma_len(&sg[i]));
@@ -352,7 +361,7 @@
 
 	page_vec->length = page_vec_len;
 
-	if (page_vec_len * PAGE_SIZE < page_vec->data_size) {
+	if (page_vec_len * SIZE_4K < page_vec->data_size) {
 		iser_err("page_vec too short to hold this SG\n");
 		iser_data_buf_dump(data);
 		iser_dump_page_vec(page_vec);
@@ -370,15 +379,18 @@
 		      enum   iser_data_dir        cmd_dir)
 {
 	struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
+	struct iser_device   *device = ib_conn->device;
 	struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
 	struct iser_regd_buf *regd_buf;
 	int aligned_len;
 	int err;
+	int i;
+	struct scatterlist *sg;
 
 	regd_buf = &iser_ctask->rdma_regd[cmd_dir];
 
 	aligned_len = iser_data_buf_aligned_len(mem);
-	if (aligned_len != mem->size) {
+	if (aligned_len != mem->dma_nents) {
 		iser_err("rdma alignment violation %d/%d aligned\n",
 			 aligned_len, mem->size);
 		iser_data_buf_dump(mem);
@@ -389,10 +401,38 @@
 		mem = &iser_ctask->data_copy[cmd_dir];
 	}
 
-	iser_page_vec_build(mem, ib_conn->page_vec);
-	err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
-	if (err)
-		return err;
+	/* if there a single dma entry, FMR is not needed */
+	if (mem->dma_nents == 1) {
+		sg = (struct scatterlist *)mem->buf;
+
+		regd_buf->reg.lkey = device->mr->lkey;
+		regd_buf->reg.rkey = device->mr->rkey;
+		regd_buf->reg.len  = sg_dma_len(&sg[0]);
+		regd_buf->reg.va   = sg_dma_address(&sg[0]);
+		regd_buf->reg.is_fmr = 0;
+
+		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
+			 "va: 0x%08lX sz: %ld]\n",
+			 (unsigned int)regd_buf->reg.lkey,
+			 (unsigned int)regd_buf->reg.rkey,
+			 (unsigned long)regd_buf->reg.va,
+			 (unsigned long)regd_buf->reg.len);
+	} else { /* use FMR for multiple dma entries */
+		iser_page_vec_build(mem, ib_conn->page_vec);
+		err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+		if (err) {
+			iser_data_buf_dump(mem);
+			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
+				 ntoh24(iser_ctask->desc.iscsi_header.dlength));
+			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
+				 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
+				 ib_conn->page_vec->offset);
+			for (i=0 ; i<ib_conn->page_vec->length ; i++)
+				iser_err("page_vec[%d] = 0x%llx\n", i,
+					 (unsigned long long) ib_conn->page_vec->pages[i]);
+			return err;
+		}
+	}
 
 	/* take a reference on this regd buf such that it will not be released *
 	 * (eg in send dto completion) before we get the scsi response         */
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 72febf1..ecdca7f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -88,8 +88,9 @@
 		     iser_cq_tasklet_fn,
 		     (unsigned long)device);
 
-	device->mr = ib_get_dma_mr(device->pd,
-				   IB_ACCESS_LOCAL_WRITE);
+	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
+				   IB_ACCESS_REMOTE_WRITE |
+				   IB_ACCESS_REMOTE_READ);
 	if (IS_ERR(device->mr))
 		goto dma_mr_err;
 
@@ -150,7 +151,7 @@
 	}
 	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
 
-	params.page_shift        = PAGE_SHIFT;
+	params.page_shift        = SHIFT_4K;
 	/* when the first/last SG element are not start/end *
 	 * page aligned, the map whould be of N+1 pages     */
 	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
@@ -604,8 +605,9 @@
 
 	mem_reg->lkey  = mem->fmr->lkey;
 	mem_reg->rkey  = mem->fmr->rkey;
-	mem_reg->len   = page_vec->length * PAGE_SIZE;
+	mem_reg->len   = page_vec->length * SIZE_4K;
 	mem_reg->va    = io_addr;
+	mem_reg->is_fmr = 1;
 	mem_reg->mem_h = (void *)mem;
 
 	mem_reg->va   += page_vec->offset;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fd8344c..44b9e5b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -96,6 +96,8 @@
 	.remove = srp_remove_one
 };
 
+static struct ib_sa_client srp_sa_client;
+
 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
 {
 	return (struct srp_target_port *) host->hostdata;
@@ -267,7 +269,8 @@
 
 	init_completion(&target->done);
 
-	target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev,
+	target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
+						   target->srp_host->dev->dev,
 						   target->srp_host->port,
 						   &target->path,
 						   IB_SA_PATH_REC_DGID		|
@@ -330,7 +333,7 @@
 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
 					      SRP_BUF_FORMAT_INDIRECT);
 	/*
-	 * In the published SRP specification (draft rev. 16a), the 
+	 * In the published SRP specification (draft rev. 16a), the
 	 * port identifier format is 8 bytes of ID extension followed
 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
 	 * opposite order, so that the GUID comes first.
@@ -1449,12 +1452,28 @@
 	return sprintf(buf, "%d\n", target->zero_req_lim);
 }
 
-static CLASS_DEVICE_ATTR(id_ext,	S_IRUGO, show_id_ext,		NULL);
-static CLASS_DEVICE_ATTR(ioc_guid,	S_IRUGO, show_ioc_guid,		NULL);
-static CLASS_DEVICE_ATTR(service_id,	S_IRUGO, show_service_id,	NULL);
-static CLASS_DEVICE_ATTR(pkey,		S_IRUGO, show_pkey,		NULL);
-static CLASS_DEVICE_ATTR(dgid,		S_IRUGO, show_dgid,		NULL);
-static CLASS_DEVICE_ATTR(zero_req_lim,	S_IRUGO, show_zero_req_lim,	NULL);
+static ssize_t show_local_ib_port(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	return sprintf(buf, "%d\n", target->srp_host->port);
+}
+
+static ssize_t show_local_ib_device(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	return sprintf(buf, "%s\n", target->srp_host->dev->dev->name);
+}
+
+static CLASS_DEVICE_ATTR(id_ext,	  S_IRUGO, show_id_ext,		 NULL);
+static CLASS_DEVICE_ATTR(ioc_guid,	  S_IRUGO, show_ioc_guid,	 NULL);
+static CLASS_DEVICE_ATTR(service_id,	  S_IRUGO, show_service_id,	 NULL);
+static CLASS_DEVICE_ATTR(pkey,		  S_IRUGO, show_pkey,		 NULL);
+static CLASS_DEVICE_ATTR(dgid,		  S_IRUGO, show_dgid,		 NULL);
+static CLASS_DEVICE_ATTR(zero_req_lim,	  S_IRUGO, show_zero_req_lim,	 NULL);
+static CLASS_DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,	 NULL);
+static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
 
 static struct class_device_attribute *srp_host_attrs[] = {
 	&class_device_attr_id_ext,
@@ -1463,6 +1482,8 @@
 	&class_device_attr_pkey,
 	&class_device_attr_dgid,
 	&class_device_attr_zero_req_lim,
+	&class_device_attr_local_ib_port,
+	&class_device_attr_local_ib_device,
 	NULL
 };
 
@@ -1881,7 +1902,7 @@
 	if (IS_ERR(srp_dev->fmr_pool))
 		srp_dev->fmr_pool = NULL;
 
-	if (device->node_type == IB_NODE_SWITCH) {
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
@@ -1980,9 +2001,12 @@
 		return ret;
 	}
 
+	ib_sa_register_client(&srp_sa_client);
+
 	ret = ib_register_client(&srp_client);
 	if (ret) {
 		printk(KERN_ERR PFX "couldn't register IB client\n");
+		ib_sa_unregister_client(&srp_sa_client);
 		class_unregister(&srp_class);
 		return ret;
 	}
@@ -1993,6 +2017,7 @@
 static void __exit srp_cleanup_module(void)
 {
 	ib_unregister_client(&srp_client);
+	ib_sa_unregister_client(&srp_sa_client);
 	class_unregister(&srp_class);
 }
 
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index c69d23b..efd51e0 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -45,8 +45,8 @@
 #include <linux/pmu.h>
 
 #include <asm/machdep.h>
-#include <asm/backlight.h>
 #ifdef CONFIG_PPC_PMAC
+#include <asm/backlight.h>
 #include <asm/pmac_feature.h>
 #endif
 
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 82657bc..d562160 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -139,7 +139,9 @@
 {
 	struct macio_dev * macio_dev;
 	struct of_device * of;
-	char *scratch, *compat, *compat2;
+	char *scratch;
+	const char *compat, *compat2;
+
 	int i = 0;
 	int length, cplen, cplen2, seen = 0;
 
@@ -173,7 +175,7 @@
          * it's not really legal to split it out with commas. We split it
          * up using a number of environment variables instead. */
 
-	compat = (char *) get_property(of->node, "compatible", &cplen);
+	compat = get_property(of->node, "compatible", &cplen);
 	compat2 = compat;
 	cplen2= cplen;
 	while (compat && cplen > 0) {
@@ -454,7 +456,7 @@
 					       struct resource *parent_res)
 {
 	struct macio_dev *dev;
-	u32 *reg;
+	const u32 *reg;
 	
 	if (np == NULL)
 		return NULL;
@@ -489,7 +491,7 @@
 #endif
 			MAX_NODE_NAME_SIZE, np->name);
 	} else {
-		reg = (u32 *)get_property(np, "reg", NULL);
+		reg = get_property(np, "reg", NULL);
 		sprintf(dev->ofdev.dev.bus_id, "%1d.%08x:%.*s",
 			chip->lbus.index,
 			reg ? *reg : 0, MAX_NODE_NAME_SIZE, np->name);
diff --git a/drivers/macintosh/macio_sysfs.c b/drivers/macintosh/macio_sysfs.c
index cae24a1..8566bdf 100644
--- a/drivers/macintosh/macio_sysfs.c
+++ b/drivers/macintosh/macio_sysfs.c
@@ -16,12 +16,12 @@
 compatible_show (struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct of_device *of;
-	char *compat;
+	const char *compat;
 	int cplen;
 	int length = 0;
 
 	of = &to_macio_device (dev)->ofdev;
-	compat = (char *) get_property(of->node, "compatible", &cplen);
+	compat = get_property(of->node, "compatible", &cplen);
 	if (!compat) {
 		*buf = '\0';
 		return 0;
@@ -42,12 +42,12 @@
 			      char *buf)
 {
 	struct of_device *of;
-	char *compat;
+	const char *compat;
 	int cplen;
 	int length;
 
 	of = &to_macio_device (dev)->ofdev;
-	compat = (char *) get_property (of->node, "compatible", &cplen);
+	compat = get_property(of->node, "compatible", &cplen);
 	if (!compat) compat = "", cplen = 1;
 	length = sprintf (buf, "of:N%sT%s", of->node->name, of->node->type);
 	buf += length;
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 00ef468..090e40f 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -454,7 +454,7 @@
 int __init smu_init (void)
 {
 	struct device_node *np;
-	u32 *data;
+	const u32 *data;
 
         np = of_find_node_by_type(NULL, "smu");
         if (np == NULL)
@@ -490,7 +490,7 @@
 		printk(KERN_ERR "SMU: Can't find doorbell GPIO !\n");
 		goto fail;
 	}
-	data = (u32 *)get_property(smu->db_node, "reg", NULL);
+	data = get_property(smu->db_node, "reg", NULL);
 	if (data == NULL) {
 		of_node_put(smu->db_node);
 		smu->db_node = NULL;
@@ -511,7 +511,7 @@
 		smu->msg_node = of_find_node_by_name(NULL, "smu-interrupt");
 		if (smu->msg_node == NULL)
 			break;
-		data = (u32 *)get_property(smu->msg_node, "reg", NULL);
+		data = get_property(smu->msg_node, "reg", NULL);
 		if (data == NULL) {
 			of_node_put(smu->msg_node);
 			smu->msg_node = NULL;
@@ -982,11 +982,11 @@
 /* Note: Only allowed to return error code in pointers (using ERR_PTR)
  * when interruptible is 1
  */
-struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size,
-						int interruptible)
+const struct smu_sdbp_header *__smu_get_sdb_partition(int id,
+		unsigned int *size, int interruptible)
 {
 	char pname[32];
-	struct smu_sdbp_header *part;
+	const struct smu_sdbp_header *part;
 
 	if (!smu)
 		return NULL;
@@ -1003,8 +1003,7 @@
 	} else
 		mutex_lock(&smu_part_access);
 
-	part = (struct smu_sdbp_header *)get_property(smu->of_node,
-						      pname, size);
+	part = get_property(smu->of_node, pname, size);
 	if (part == NULL) {
 		DPRINTK("trying to extract from SMU ...\n");
 		part = smu_create_sdb_partition(id);
@@ -1015,7 +1014,7 @@
 	return part;
 }
 
-struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size)
+const struct smu_sdbp_header *smu_get_sdb_partition(int id, unsigned int *size)
 {
 	return __smu_get_sdb_partition(id, size, 0);
 }
@@ -1094,7 +1093,7 @@
 		pp->mode = smu_file_events;
 		return 0;
 	} else if (hdr.cmdtype == SMU_CMDTYPE_GET_PARTITION) {
-		struct smu_sdbp_header *part;
+		const struct smu_sdbp_header *part;
 		part = __smu_get_sdb_partition(hdr.cmd, NULL, 1);
 		if (part == NULL)
 			return -EINVAL;
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 7f86478..a0f30d0 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -47,7 +47,7 @@
 
 static u8 default_limits_local[3] = {70, 50, 70};    /* local, sensor1, sensor2 */
 static u8 default_limits_chip[3] = {80, 65, 80};    /* local, sensor1, sensor2 */
-static char *sensor_location[3] = {NULL, NULL, NULL};
+static const char *sensor_location[3] = {NULL, NULL, NULL};
 
 static int limit_adjust = 0;
 static int fan_speed = -1;
@@ -553,7 +553,7 @@
 thermostat_init(void)
 {
 	struct device_node* np;
-	u32 *prop;
+	const u32 *prop;
 	int i = 0, offset = 0;
 	
 	np = of_find_node_by_name(NULL, "fan");
@@ -566,13 +566,13 @@
 	else
 		return -ENODEV;
 
-	prop = (u32 *)get_property(np, "hwsensor-params-version", NULL);
+	prop = get_property(np, "hwsensor-params-version", NULL);
 	printk(KERN_INFO "adt746x: version %d (%ssupported)\n", *prop,
 			 (*prop == 1)?"":"un");
 	if (*prop != 1)
 		return -ENODEV;
 
-	prop = (u32 *)get_property(np, "reg", NULL);
+	prop = get_property(np, "reg", NULL);
 	if (!prop)
 		return -ENODEV;
 
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 20bf672..d00c0c3 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -660,7 +660,7 @@
 {
 	struct device_node *np;
 	char nodename[64];
-	u8 *data;
+	const u8 *data;
 	int len;
 
 	/* prom.c routine for finding a node by path is a bit brain dead
@@ -673,7 +673,7 @@
 		printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid node from device-tree\n");
 		return -ENODEV;
 	}
-	data = (u8 *)get_property(np, "cpuid", &len);
+	data = get_property(np, "cpuid", &len);
 	if (data == NULL) {
 		printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid property from device-tree\n");
 		of_node_put(np);
@@ -1336,7 +1336,7 @@
 	 */
 	u3 = of_find_node_by_path("/u3@0,f8000000");
 	if (u3 != NULL) {
-		u32 *vers = (u32 *)get_property(u3, "device-rev", NULL);
+		const u32 *vers = get_property(u3, "device-rev", NULL);
 		if (vers)
 			if (((*vers) & 0x3f) < 0x34)
 				u3h = 0;
@@ -2111,8 +2111,8 @@
 
 	while ((np = of_get_next_child(fcu_node, np)) != NULL) {
 		int type = -1;
-		char *loc;
-		u32 *reg;
+		const char *loc;
+		const u32 *reg;
 
 		DBG(" control: %s, type: %s\n", np->name, np->type);
 
@@ -2128,8 +2128,8 @@
 			continue;
 
 		/* Lookup for a matching location */
-		loc = (char *)get_property(np, "location", NULL);
-		reg = (u32 *)get_property(np, "reg", NULL);
+		loc = get_property(np, "location", NULL);
+		reg = get_property(np, "reg", NULL);
 		if (loc == NULL || reg == NULL)
 			continue;
 		DBG(" matching location: %s, reg: 0x%08x\n", loc, *reg);
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index c7d1c29..738faab 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -484,14 +484,14 @@
 static int __init
 g4fan_init( void )
 {
-	struct apple_thermal_info *info;
+	const struct apple_thermal_info *info;
 	struct device_node *np;
 
 	init_MUTEX( &x.lock );
 
 	if( !(np=of_find_node_by_name(NULL, "power-mgt")) )
 		return -ENODEV;
-	info = (struct apple_thermal_info*)get_property(np, "thermal-info", NULL);
+	info = get_property(np, "thermal-info", NULL);
 	of_node_put(np);
 
 	if( !info || !machine_is_compatible("PowerMac3,6") )
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 69d5452..7512d1c 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -123,7 +123,7 @@
 {
     struct adb_request req;
     phys_addr_t taddr;
-    u32 *reg;
+    const u32 *reg;
     int err;
 
     if (vias != 0)
@@ -132,7 +132,7 @@
     if (vias == 0)
 	return 0;
 
-    reg = (u32 *)get_property(vias, "reg", NULL);
+    reg = get_property(vias, "reg", NULL);
     if (reg == NULL) {
 	    printk(KERN_ERR "via-cuda: No \"reg\" property !\n");
 	    goto fail;
diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c
index 5189d54..179af10 100644
--- a/drivers/macintosh/via-pmu-led.c
+++ b/drivers/macintosh/via-pmu-led.c
@@ -120,7 +120,7 @@
 	dt = of_find_node_by_path("/");
 	if (dt == NULL)
 		return -ENODEV;
-	model = (const char *)get_property(dt, "model", NULL);
+	model = get_property(dt, "model", NULL);
 	if (model == NULL)
 		return -ENODEV;
 	if (strncmp(model, "PowerBook", strlen("PowerBook")) != 0 &&
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 14610a6..dda0398 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -280,7 +280,7 @@
 int __init find_via_pmu(void)
 {
 	u64 taddr;
-	u32 *reg;
+	const u32 *reg;
 
 	if (via != 0)
 		return 1;
@@ -288,7 +288,7 @@
 	if (vias == NULL)
 		return 0;
 
-	reg = (u32 *)get_property(vias, "reg", NULL);
+	reg = get_property(vias, "reg", NULL);
 	if (reg == NULL) {
 		printk(KERN_ERR "via-pmu: No \"reg\" property !\n");
 		goto fail;
@@ -330,7 +330,7 @@
 		
 		gpiop = of_find_node_by_name(NULL, "gpio");
 		if (gpiop) {
-			reg = (u32 *)get_property(gpiop, "reg", NULL);
+			reg = get_property(gpiop, "reg", NULL);
 			if (reg)
 				gaddr = of_translate_address(gpiop, reg);
 			if (gaddr != OF_BAD_ADDR)
@@ -479,9 +479,9 @@
 		pmu_batteries[1].flags |= PMU_BATT_TYPE_SMART;
 	} else {
 		struct device_node* prim = find_devices("power-mgt");
-		u32 *prim_info = NULL;
+		const u32 *prim_info = NULL;
 		if (prim)
-			prim_info = (u32 *)get_property(prim, "prim-info", NULL);
+			prim_info = get_property(prim, "prim-info", NULL);
 		if (prim_info) {
 			/* Other stuffs here yet unknown */
 			pmu_battery_count = (prim_info[6] >> 16) & 0xff;
diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c
index f1df6ef..2ff546e 100644
--- a/drivers/macintosh/windfarm_pm81.c
+++ b/drivers/macintosh/windfarm_pm81.c
@@ -396,7 +396,7 @@
 static void wf_smu_create_cpu_fans(void)
 {
 	struct wf_cpu_pid_param pid_param;
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 	struct smu_sdbp_cpupiddata *piddata;
 	struct smu_sdbp_fvt *fvt;
 	s32 tmax, tdelta, maxpow, powadj;
@@ -702,7 +702,7 @@
 
 static int wf_init_pm(void)
 {
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 
 	hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL);
 	if (hdr != 0) {
diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index 0d6372e..59e9ffe 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -144,7 +144,7 @@
 static void wf_smu_create_cpu_fans(void)
 {
 	struct wf_cpu_pid_param pid_param;
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 	struct smu_sdbp_cpupiddata *piddata;
 	struct smu_sdbp_fvt *fvt;
 	s32 tmax, tdelta, maxpow, powadj;
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index a9e88ed..bff1f37 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -159,14 +159,15 @@
 					      int pwm_fan)
 {
 	struct smu_fan_control *fct;
-	s32 *v; u32 *reg;
-	char *l;
+	const s32 *v;
+	const u32 *reg;
+	const char *l;
 
 	fct = kmalloc(sizeof(struct smu_fan_control), GFP_KERNEL);
 	if (fct == NULL)
 		return NULL;
 	fct->ctrl.ops = &smu_fan_ops;
-	l = (char *)get_property(node, "location", NULL);
+	l = get_property(node, "location", NULL);
 	if (l == NULL)
 		goto fail;
 
@@ -223,17 +224,17 @@
 		goto fail;
 
 	/* Get min & max values*/
-	v = (s32 *)get_property(node, "min-value", NULL);
+	v = get_property(node, "min-value", NULL);
 	if (v == NULL)
 		goto fail;
 	fct->min = *v;
-	v = (s32 *)get_property(node, "max-value", NULL);
+	v = get_property(node, "max-value", NULL);
 	if (v == NULL)
 		goto fail;
 	fct->max = *v;
 
 	/* Get "reg" value */
-	reg = (u32 *)get_property(node, "reg", NULL);
+	reg = get_property(node, "reg", NULL);
 	if (reg == NULL)
 		goto fail;
 	fct->reg = *reg;
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index e295a07..aceb61d 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -233,15 +233,15 @@
 {
 	struct wf_sat *sat;
 	struct wf_sat_sensor *sens;
-	u32 *reg;
-	char *loc, *type;
+	const u32 *reg;
+	const char *loc, *type;
 	u8 addr, chip, core;
 	struct device_node *child;
 	int shift, cpu, index;
 	char *name;
 	int vsens[2], isens[2];
 
-	reg = (u32 *) get_property(dev, "reg", NULL);
+	reg = get_property(dev, "reg", NULL);
 	if (reg == NULL)
 		return;
 	addr = *reg;
@@ -268,7 +268,7 @@
 	isens[0] = isens[1] = -1;
 	child = NULL;
 	while ((child = of_get_next_child(dev, child)) != NULL) {
-		reg = (u32 *) get_property(child, "reg", NULL);
+		reg = get_property(child, "reg", NULL);
 		type = get_property(child, "device_type", NULL);
 		loc = get_property(child, "location", NULL);
 		if (reg == NULL || loc == NULL)
diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
index bed25dc..defe992 100644
--- a/drivers/macintosh/windfarm_smu_sensors.c
+++ b/drivers/macintosh/windfarm_smu_sensors.c
@@ -198,14 +198,14 @@
 static struct smu_ad_sensor *smu_ads_create(struct device_node *node)
 {
 	struct smu_ad_sensor *ads;
-	char *c, *l;
-	u32 *v;
+	const char *c, *l;
+	const u32 *v;
 
 	ads = kmalloc(sizeof(struct smu_ad_sensor), GFP_KERNEL);
 	if (ads == NULL)
 		return NULL;
-	c = (char *)get_property(node, "device_type", NULL);
-	l = (char *)get_property(node, "location", NULL);
+	c = get_property(node, "device_type", NULL);
+	l = get_property(node, "location", NULL);
 	if (c == NULL || l == NULL)
 		goto fail;
 
@@ -255,7 +255,7 @@
 	} else
 		goto fail;
 
-	v = (u32 *)get_property(node, "reg", NULL);
+	v = get_property(node, "reg", NULL);
 	if (v == NULL)
 		goto fail;
 	ads->reg = *v;
@@ -382,7 +382,7 @@
 
 static void smu_fetch_param_partitions(void)
 {
-	struct smu_sdbp_header *hdr;
+	const struct smu_sdbp_header *hdr;
 
 	/* Get CPU voltage/current/power calibration data */
 	hdr = smu_get_sdb_partition(SMU_SDB_CPUVCP_ID, NULL);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6022ed1..bdbd349 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -5,6 +5,7 @@
  * This file is released under the GPL.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -78,11 +79,13 @@
 	 */
 	struct crypt_iv_operations *iv_gen_ops;
 	char *iv_mode;
-	void *iv_gen_private;
+	struct crypto_cipher *iv_gen_private;
 	sector_t iv_offset;
 	unsigned int iv_size;
 
-	struct crypto_tfm *tfm;
+	char cipher[CRYPTO_MAX_ALG_NAME];
+	char chainmode[CRYPTO_MAX_ALG_NAME];
+	struct crypto_blkcipher *tfm;
 	unsigned int key_size;
 	u8 key[0];
 };
@@ -96,12 +99,12 @@
 /*
  * Different IV generation algorithms:
  *
- * plain: the initial vector is the 32-bit low-endian version of the sector
+ * plain: the initial vector is the 32-bit little-endian version of the sector
  *        number, padded with zeros if neccessary.
  *
- * ess_iv: "encrypted sector|salt initial vector", the sector number is
- *         encrypted with the bulk cipher using a salt as key. The salt
- *         should be derived from the bulk cipher's key via hashing.
+ * essiv: "encrypted sector|salt initial vector", the sector number is
+ *        encrypted with the bulk cipher using a salt as key. The salt
+ *        should be derived from the bulk cipher's key via hashing.
  *
  * plumb: unimplemented, see:
  * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
@@ -118,11 +121,13 @@
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	                      const char *opts)
 {
-	struct crypto_tfm *essiv_tfm;
-	struct crypto_tfm *hash_tfm;
+	struct crypto_cipher *essiv_tfm;
+	struct crypto_hash *hash_tfm;
+	struct hash_desc desc;
 	struct scatterlist sg;
 	unsigned int saltsize;
 	u8 *salt;
+	int err;
 
 	if (opts == NULL) {
 		ti->error = "Digest algorithm missing for ESSIV mode";
@@ -130,76 +135,70 @@
 	}
 
 	/* Hash the cipher key with the given hash algorithm */
-	hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (hash_tfm == NULL) {
+	hash_tfm = crypto_alloc_hash(opts, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hash_tfm)) {
 		ti->error = "Error initializing ESSIV hash";
-		return -EINVAL;
+		return PTR_ERR(hash_tfm);
 	}
 
-	if (crypto_tfm_alg_type(hash_tfm) != CRYPTO_ALG_TYPE_DIGEST) {
-		ti->error = "Expected digest algorithm for ESSIV hash";
-		crypto_free_tfm(hash_tfm);
-		return -EINVAL;
-	}
-
-	saltsize = crypto_tfm_alg_digestsize(hash_tfm);
+	saltsize = crypto_hash_digestsize(hash_tfm);
 	salt = kmalloc(saltsize, GFP_KERNEL);
 	if (salt == NULL) {
 		ti->error = "Error kmallocing salt storage in ESSIV";
-		crypto_free_tfm(hash_tfm);
+		crypto_free_hash(hash_tfm);
 		return -ENOMEM;
 	}
 
 	sg_set_buf(&sg, cc->key, cc->key_size);
-	crypto_digest_digest(hash_tfm, &sg, 1, salt);
-	crypto_free_tfm(hash_tfm);
+	desc.tfm = hash_tfm;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = crypto_hash_digest(&desc, &sg, cc->key_size, salt);
+	crypto_free_hash(hash_tfm);
+
+	if (err) {
+		ti->error = "Error calculating hash in ESSIV";
+		return err;
+	}
 
 	/* Setup the essiv_tfm with the given salt */
-	essiv_tfm = crypto_alloc_tfm(crypto_tfm_alg_name(cc->tfm),
-	                             CRYPTO_TFM_MODE_ECB |
-	                             CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (essiv_tfm == NULL) {
+	essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(essiv_tfm)) {
 		ti->error = "Error allocating crypto tfm for ESSIV";
 		kfree(salt);
-		return -EINVAL;
+		return PTR_ERR(essiv_tfm);
 	}
-	if (crypto_tfm_alg_blocksize(essiv_tfm)
-	    != crypto_tfm_alg_ivsize(cc->tfm)) {
+	if (crypto_cipher_blocksize(essiv_tfm) !=
+	    crypto_blkcipher_ivsize(cc->tfm)) {
 		ti->error = "Block size of ESSIV cipher does "
 			        "not match IV size of block cipher";
-		crypto_free_tfm(essiv_tfm);
+		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
 		return -EINVAL;
 	}
-	if (crypto_cipher_setkey(essiv_tfm, salt, saltsize) < 0) {
+	err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
+	if (err) {
 		ti->error = "Failed to set key for ESSIV cipher";
-		crypto_free_tfm(essiv_tfm);
+		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
-		return -EINVAL;
+		return err;
 	}
 	kfree(salt);
 
-	cc->iv_gen_private = (void *)essiv_tfm;
+	cc->iv_gen_private = essiv_tfm;
 	return 0;
 }
 
 static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 {
-	crypto_free_tfm((struct crypto_tfm *)cc->iv_gen_private);
+	crypto_free_cipher(cc->iv_gen_private);
 	cc->iv_gen_private = NULL;
 }
 
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 {
-	struct scatterlist sg;
-
 	memset(iv, 0, cc->iv_size);
 	*(u64 *)iv = cpu_to_le64(sector);
-
-	sg_set_buf(&sg, iv, cc->iv_size);
-	crypto_cipher_encrypt((struct crypto_tfm *)cc->iv_gen_private,
-	                      &sg, &sg, cc->iv_size);
-
+	crypto_cipher_encrypt_one(cc->iv_gen_private, iv, iv);
 	return 0;
 }
 
@@ -220,6 +219,11 @@
                           int write, sector_t sector)
 {
 	u8 iv[cc->iv_size];
+	struct blkcipher_desc desc = {
+		.tfm = cc->tfm,
+		.info = iv,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
+	};
 	int r;
 
 	if (cc->iv_gen_ops) {
@@ -228,14 +232,14 @@
 			return r;
 
 		if (write)
-			r = crypto_cipher_encrypt_iv(cc->tfm, out, in, length, iv);
+			r = crypto_blkcipher_encrypt_iv(&desc, out, in, length);
 		else
-			r = crypto_cipher_decrypt_iv(cc->tfm, out, in, length, iv);
+			r = crypto_blkcipher_decrypt_iv(&desc, out, in, length);
 	} else {
 		if (write)
-			r = crypto_cipher_encrypt(cc->tfm, out, in, length);
+			r = crypto_blkcipher_encrypt(&desc, out, in, length);
 		else
-			r = crypto_cipher_decrypt(cc->tfm, out, in, length);
+			r = crypto_blkcipher_decrypt(&desc, out, in, length);
 	}
 
 	return r;
@@ -510,13 +514,12 @@
 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct crypt_config *cc;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 	char *tmp;
 	char *cipher;
 	char *chainmode;
 	char *ivmode;
 	char *ivopts;
-	unsigned int crypto_flags;
 	unsigned int key_size;
 	unsigned long long tmpll;
 
@@ -556,31 +559,25 @@
 		ivmode = "plain";
 	}
 
-	/* Choose crypto_flags according to chainmode */
-	if (strcmp(chainmode, "cbc") == 0)
-		crypto_flags = CRYPTO_TFM_MODE_CBC;
-	else if (strcmp(chainmode, "ecb") == 0)
-		crypto_flags = CRYPTO_TFM_MODE_ECB;
-	else {
-		ti->error = "Unknown chaining mode";
-		goto bad1;
-	}
-
-	if (crypto_flags != CRYPTO_TFM_MODE_ECB && !ivmode) {
+	if (strcmp(chainmode, "ecb") && !ivmode) {
 		ti->error = "This chaining mode requires an IV mechanism";
 		goto bad1;
 	}
 
-	tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (!tfm) {
+	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, 
+		     cipher) >= CRYPTO_MAX_ALG_NAME) {
+		ti->error = "Chain mode + cipher name is too long";
+		goto bad1;
+	}
+
+	tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
 		ti->error = "Error allocating crypto tfm";
 		goto bad1;
 	}
-	if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) {
-		ti->error = "Expected cipher algorithm";
-		goto bad2;
-	}
 
+	strcpy(cc->cipher, cipher);
+	strcpy(cc->chainmode, chainmode);
 	cc->tfm = tfm;
 
 	/*
@@ -603,12 +600,12 @@
 	    cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
 		goto bad2;
 
-	if (tfm->crt_cipher.cit_decrypt_iv && tfm->crt_cipher.cit_encrypt_iv)
+	cc->iv_size = crypto_blkcipher_ivsize(tfm);
+	if (cc->iv_size)
 		/* at least a 64 bit sector number should fit in our buffer */
-		cc->iv_size = max(crypto_tfm_alg_ivsize(tfm),
+		cc->iv_size = max(cc->iv_size,
 		                  (unsigned int)(sizeof(u64) / sizeof(u8)));
 	else {
-		cc->iv_size = 0;
 		if (cc->iv_gen_ops) {
 			DMWARN("Selected cipher does not support IVs");
 			if (cc->iv_gen_ops->dtr)
@@ -629,7 +626,7 @@
 		goto bad4;
 	}
 
-	if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
+	if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) {
 		ti->error = "Error setting key";
 		goto bad5;
 	}
@@ -675,7 +672,7 @@
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
 bad2:
-	crypto_free_tfm(tfm);
+	crypto_free_blkcipher(tfm);
 bad1:
 	/* Must zero key material before freeing */
 	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
@@ -693,7 +690,7 @@
 	kfree(cc->iv_mode);
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
-	crypto_free_tfm(cc->tfm);
+	crypto_free_blkcipher(cc->tfm);
 	dm_put_device(ti, cc->dev);
 
 	/* Must zero key material before freeing */
@@ -858,18 +855,9 @@
 		break;
 
 	case STATUSTYPE_TABLE:
-		cipher = crypto_tfm_alg_name(cc->tfm);
+		cipher = crypto_blkcipher_name(cc->tfm);
 
-		switch(cc->tfm->crt_cipher.cit_mode) {
-		case CRYPTO_TFM_MODE_CBC:
-			chainmode = "cbc";
-			break;
-		case CRYPTO_TFM_MODE_ECB:
-			chainmode = "ecb";
-			break;
-		default:
-			BUG();
-		}
+		chainmode = cc->chainmode;
 
 		if (cc->iv_mode)
 			DMEMIT("%s-%s-%s ", cipher, chainmode, cc->iv_mode);
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 85696f3..e57bb03 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -162,7 +162,13 @@
 	.show_starget_port_id = 1,
 	.set_rport_dev_loss_tmo = mptfc_set_rport_loss_tmo,
 	.show_rport_dev_loss_tmo = 1,
-
+	.show_host_supported_speeds = 1,
+	.show_host_maxframe_size = 1,
+	.show_host_speed = 1,
+	.show_host_fabric_name = 1,
+	.show_host_port_type = 1,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
 };
 
 static void
@@ -839,33 +845,95 @@
 static void
 mptfc_init_host_attr(MPT_ADAPTER *ioc,int portnum)
 {
-	unsigned class = 0, cos = 0;
+	unsigned	class = 0;
+	unsigned	cos = 0;
+	unsigned	speed;
+	unsigned	port_type;
+	unsigned	port_state;
+	FCPortPage0_t	*pp0;
+	struct Scsi_Host *sh;
+	char		*sn;
 
 	/* don't know what to do as only one scsi (fc) host was allocated */
 	if (portnum != 0)
 		return;
 
-	class = ioc->fc_port_page0[portnum].SupportedServiceClass;
+	pp0 = &ioc->fc_port_page0[portnum];
+	sh = ioc->sh;
+
+	sn = fc_host_symbolic_name(sh);
+	snprintf(sn, FC_SYMBOLIC_NAME_SIZE, "%s %s%08xh",
+	    ioc->prod_name,
+	    MPT_FW_REV_MAGIC_ID_STRING,
+	    ioc->facts.FWVersion.Word);
+
+	fc_host_tgtid_bind_type(sh) = FC_TGTID_BIND_BY_WWPN;
+
+	fc_host_maxframe_size(sh) = pp0->MaxFrameSize;
+
+	fc_host_node_name(sh) =
+	    	(u64)pp0->WWNN.High << 32 | (u64)pp0->WWNN.Low;
+
+	fc_host_port_name(sh) =
+	    	(u64)pp0->WWPN.High << 32 | (u64)pp0->WWPN.Low;
+
+	fc_host_port_id(sh) = pp0->PortIdentifier;
+
+	class = pp0->SupportedServiceClass;
 	if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_1)
 		cos |= FC_COS_CLASS1;
 	if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_2)
 		cos |= FC_COS_CLASS2;
 	if (class & MPI_FCPORTPAGE0_SUPPORT_CLASS_3)
 		cos |= FC_COS_CLASS3;
+	fc_host_supported_classes(sh) = cos;
 
-	fc_host_node_name(ioc->sh) =
-	    	(u64)ioc->fc_port_page0[portnum].WWNN.High << 32
-		    | (u64)ioc->fc_port_page0[portnum].WWNN.Low;
+	if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_1GBIT)
+		speed = FC_PORTSPEED_1GBIT;
+	else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_2GBIT)
+		speed = FC_PORTSPEED_2GBIT;
+	else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_4GBIT)
+		speed = FC_PORTSPEED_4GBIT;
+	else if (pp0->CurrentSpeed == MPI_FCPORTPAGE0_CURRENT_SPEED_10GBIT)
+		speed = FC_PORTSPEED_10GBIT;
+	else
+		speed = FC_PORTSPEED_UNKNOWN;
+	fc_host_speed(sh) = speed;
 
-	fc_host_port_name(ioc->sh) =
-	    	(u64)ioc->fc_port_page0[portnum].WWPN.High << 32
-		    | (u64)ioc->fc_port_page0[portnum].WWPN.Low;
+	speed = 0;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_1GBIT_SPEED)
+		speed |= FC_PORTSPEED_1GBIT;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_2GBIT_SPEED)
+		speed |= FC_PORTSPEED_2GBIT;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_4GBIT_SPEED)
+		speed |= FC_PORTSPEED_4GBIT;
+	if (pp0->SupportedSpeeds & MPI_FCPORTPAGE0_SUPPORT_10GBIT_SPEED)
+		speed |= FC_PORTSPEED_10GBIT;
+	fc_host_supported_speeds(sh) = speed;
 
-	fc_host_port_id(ioc->sh) = ioc->fc_port_page0[portnum].PortIdentifier;
+	port_state = FC_PORTSTATE_UNKNOWN;
+	if (pp0->PortState == MPI_FCPORTPAGE0_PORTSTATE_ONLINE)
+		port_state = FC_PORTSTATE_ONLINE;
+	else if (pp0->PortState == MPI_FCPORTPAGE0_PORTSTATE_OFFLINE)
+		port_state = FC_PORTSTATE_LINKDOWN;
+	fc_host_port_state(sh) = port_state;
 
-	fc_host_supported_classes(ioc->sh) = cos;
+	port_type = FC_PORTTYPE_UNKNOWN;
+	if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_POINT_TO_POINT)
+		port_type = FC_PORTTYPE_PTP;
+	else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_PRIVATE_LOOP)
+		port_type = FC_PORTTYPE_LPORT;
+	else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_PUBLIC_LOOP)
+		port_type = FC_PORTTYPE_NLPORT;
+	else if (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_ATTACH_FABRIC_DIRECT)
+		port_type = FC_PORTTYPE_NPORT;
+	fc_host_port_type(sh) = port_type;
 
-	fc_host_tgtid_bind_type(ioc->sh) = FC_TGTID_BIND_BY_WWPN;
+	fc_host_fabric_name(sh) =
+	    (pp0->Flags & MPI_FCPORTPAGE0_FLAGS_FABRIC_WWN_VALID) ?
+		(u64) pp0->FabricWWNN.High << 32 | (u64) pp0->FabricWWPN.Low :
+		(u64)pp0->WWNN.High << 32 | (u64)pp0->WWNN.Low;
+
 }
 
 static void
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index f66f220..b752a47 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -852,6 +852,10 @@
 	dma_addr_t dma_handle;
 	int error;
 
+	/* FIXME: only have link errors on local phys */
+	if (!scsi_is_sas_phy_local(phy))
+		return -EINVAL;
+
 	hdr.PageVersion = MPI_SASPHY1_PAGEVERSION;
 	hdr.ExtPageLength = 0;
 	hdr.PageNumber = 1 /* page number 1*/;
@@ -924,6 +928,10 @@
 	unsigned long timeleft;
 	int error = -ERESTARTSYS;
 
+	/* FIXME: fusion doesn't allow non-local phy reset */
+	if (!scsi_is_sas_phy_local(phy))
+		return -EINVAL;
+
 	/* not implemented for expanders */
 	if (phy->identify.target_port_protocols & SAS_PROTOCOL_SMP)
 		return -ENXIO;
@@ -1570,9 +1578,6 @@
 
 	if (!phy_info->phy) {
 
-		if (local)
-			phy->local_attached = 1;
-
 		error = sas_phy_add(phy);
 		if (error) {
 			sas_phy_free(phy);
@@ -1642,14 +1647,18 @@
 
 			for (i = 0; i < port_info->num_phys; i++)
 				if (port_info->phy_info[i].identify.sas_address ==
-				    identify.sas_address)
+				    identify.sas_address) {
+					sas_port_mark_backlink(port);
 					goto out;
+				}
 
 		} else if (scsi_is_sas_rphy(parent)) {
 			struct sas_rphy *parent_rphy = dev_to_rphy(parent);
 			if (identify.sas_address ==
-			    parent_rphy->identify.sas_address)
+			    parent_rphy->identify.sas_address) {
+				sas_port_mark_backlink(port);
 				goto out;
+			}
 		}
 
 		switch (identify.device_type) {
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 1344ad7..a03e862 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -101,7 +101,7 @@
 
 config MTD_CMDLINE_PARTS
 	bool "Command line partition table parsing"
-	depends on MTD_PARTITIONS = "y"
+	depends on MTD_PARTITIONS = "y" && MTD = "y"
 	---help---
 	  Allow generic configuration of the MTD partition tables via the kernel
 	  command line. Multiple flash resources are supported for hardware where
@@ -263,6 +263,14 @@
 
 		http://www.gensw.com/pages/prod/bios/rfd.htm
 
+config SSFDC
+	tristate "NAND SSFDC (SmartMedia) read only translation layer"
+	depends on MTD
+	default n
+	help
+	  This enables read only access to SmartMedia formatted NAND
+	  flash. You can mount it with FAT file system.
+
 source "drivers/mtd/chips/Kconfig"
 
 source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index fc93744..1e36b9a 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -21,6 +21,7 @@
 obj-$(CONFIG_NFTL)		+= nftl.o mtd_blkdevs.o
 obj-$(CONFIG_INFTL)		+= inftl.o mtd_blkdevs.o
 obj-$(CONFIG_RFD_FTL)		+= rfd_ftl.o mtd_blkdevs.o
+obj-$(CONFIG_SSFDC)		+= ssfdc.o mtd_blkdevs.o
 
 nftl-objs		:= nftlcore.o nftlmount.o
 inftl-objs		:= inftlcore.o inftlmount.o
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 39edb82..7ea49a0 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -908,7 +908,7 @@
 
 static int __xipram xip_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
-		unsigned long adr, int *chip_op_time )
+		unsigned long adr, unsigned int chip_op_time )
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -917,7 +917,7 @@
 	flstate_t oldstate, newstate;
 
        	start = xip_currtime();
-	usec = *chip_op_time * 8;
+	usec = chip_op_time * 8;
 	if (usec == 0)
 		usec = 500000;
 	done = 0;
@@ -1027,8 +1027,8 @@
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
 	INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, p_usec) \
-	xip_wait_for_operation(map, chip, cmd_adr, p_usec)
+#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \
+	xip_wait_for_operation(map, chip, cmd_adr, usec)
 
 #else
 
@@ -1040,64 +1040,64 @@
 static int inval_cache_and_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
 		unsigned long cmd_adr, unsigned long inval_adr, int inval_len,
-		int *chip_op_time )
+		unsigned int chip_op_time)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	map_word status, status_OK = CMD(0x80);
-	int z, chip_state = chip->state;
-	unsigned long timeo;
+	int chip_state = chip->state;
+	unsigned int timeo, sleep_time;
 
 	spin_unlock(chip->mutex);
 	if (inval_len)
 		INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
-	if (*chip_op_time)
-		cfi_udelay(*chip_op_time);
 	spin_lock(chip->mutex);
 
-	timeo = *chip_op_time * 8 * HZ / 1000000;
-	if (timeo < HZ/2)
-		timeo = HZ/2;
-	timeo += jiffies;
+	/* set our timeout to 8 times the expected delay */
+	timeo = chip_op_time * 8;
+	if (!timeo)
+		timeo = 500000;
+	sleep_time = chip_op_time / 2;
 
-	z = 0;
 	for (;;) {
-		if (chip->state != chip_state) {
-			/* Someone's suspended the operation: sleep */
-			DECLARE_WAITQUEUE(wait, current);
-
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
-			schedule();
-			remove_wait_queue(&chip->wq, &wait);
-			timeo = jiffies + (HZ / 2); /* FIXME */
-			spin_lock(chip->mutex);
-			continue;
-		}
-
 		status = map_read(map, cmd_adr);
 		if (map_word_andequal(map, status, status_OK, status_OK))
 			break;
 
-		/* OK Still waiting */
-		if (time_after(jiffies, timeo)) {
+		if (!timeo) {
 			map_write(map, CMD(0x70), cmd_adr);
 			chip->state = FL_STATUS;
 			return -ETIME;
 		}
 
-		/* Latency issues. Drop the lock, wait a while and retry */
-		z++;
+		/* OK Still waiting. Drop the lock, wait a while and retry. */
 		spin_unlock(chip->mutex);
-		cfi_udelay(1);
+		if (sleep_time >= 1000000/HZ) {
+			/*
+			 * Half of the normal delay still remaining
+			 * can be performed with a sleeping delay instead
+			 * of busy waiting.
+			 */
+			msleep(sleep_time/1000);
+			timeo -= sleep_time;
+			sleep_time = 1000000/HZ;
+		} else {
+			udelay(1);
+			cond_resched();
+			timeo--;
+		}
 		spin_lock(chip->mutex);
-	}
 
-	if (!z) {
-		if (!--(*chip_op_time))
-			*chip_op_time = 1;
-	} else if (z > 1)
-		++(*chip_op_time);
+		if (chip->state != chip_state) {
+			/* Someone's suspended the operation: sleep */
+			DECLARE_WAITQUEUE(wait, current);
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			add_wait_queue(&chip->wq, &wait);
+			spin_unlock(chip->mutex);
+			schedule();
+			remove_wait_queue(&chip->wq, &wait);
+			spin_lock(chip->mutex);
+		}
+	}
 
 	/* Done and happy. */
  	chip->state = FL_STATUS;
@@ -1107,8 +1107,7 @@
 #endif
 
 #define WAIT_TIMEOUT(map, chip, adr, udelay) \
-	({ int __udelay = (udelay); \
-	   INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, &__udelay); })
+	INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay);
 
 
 static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len)
@@ -1332,7 +1331,7 @@
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, map_bankwidth(map),
-				   &chip->word_write_time);
+				   chip->word_write_time);
 	if (ret) {
 		xip_enable(map, chip, adr);
 		printk(KERN_ERR "%s: word write error (status timeout)\n", map->name);
@@ -1569,7 +1568,7 @@
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr,
 				   adr, len,
-				   &chip->buffer_write_time);
+				   chip->buffer_write_time);
 	if (ret) {
 		map_write(map, CMD(0x70), cmd_adr);
 		chip->state = FL_STATUS;
@@ -1704,7 +1703,7 @@
 
 	ret = INVAL_CACHE_AND_WAIT(map, chip, adr,
 				   adr, len,
-				   &chip->erase_time);
+				   chip->erase_time);
 	if (ret) {
 		map_write(map, CMD(0x70), adr);
 		chip->state = FL_STATUS;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 9885726..702ae4c 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -45,9 +45,11 @@
 #define MAX_WORD_RETRIES 3
 
 #define MANUFACTURER_AMD	0x0001
+#define MANUFACTURER_ATMEL	0x001F
 #define MANUFACTURER_SST	0x00BF
 #define SST49LF004B	        0x0060
 #define SST49LF008A		0x005a
+#define AT49BV6416		0x00d6
 
 static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
 static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
@@ -68,6 +70,9 @@
 static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr);
 #include "fwh_lock.h"
 
+static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len);
+static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len);
+
 static struct mtd_chip_driver cfi_amdstd_chipdrv = {
 	.probe		= NULL, /* Not usable directly */
 	.destroy	= cfi_amdstd_destroy,
@@ -161,6 +166,26 @@
 	}
 }
 
+/* Atmel chips don't use the same PRI format as AMD chips */
+static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
+{
+	struct map_info *map = mtd->priv;
+	struct cfi_private *cfi = map->fldrv_priv;
+	struct cfi_pri_amdstd *extp = cfi->cmdset_priv;
+	struct cfi_pri_atmel atmel_pri;
+
+	memcpy(&atmel_pri, extp, sizeof(atmel_pri));
+	memset((char *)extp + 5, 0, sizeof(*extp) - 5);
+
+	if (atmel_pri.Features & 0x02)
+		extp->EraseSuspend = 2;
+
+	if (atmel_pri.BottomBoot)
+		extp->TopBottom = 2;
+	else
+		extp->TopBottom = 3;
+}
+
 static void fixup_use_secsi(struct mtd_info *mtd, void *param)
 {
 	/* Setup for chips with a secsi area */
@@ -179,6 +204,17 @@
 
 }
 
+/*
+ * Some Atmel chips (e.g. the AT49BV6416) power-up with all sectors
+ * locked by default.
+ */
+static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param)
+{
+	mtd->lock = cfi_atmel_lock;
+	mtd->unlock = cfi_atmel_unlock;
+	mtd->flags |= MTD_STUPID_LOCK;
+}
+
 static struct cfi_fixup cfi_fixup_table[] = {
 #ifdef AMD_BOOTLOC_BUG
 	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
@@ -192,6 +228,7 @@
 #if !FORCE_WORD_WRITE
 	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, },
 #endif
+	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 	{ 0, 0, NULL, NULL }
 };
 static struct cfi_fixup jedec_fixup_table[] = {
@@ -207,6 +244,7 @@
 	 * we know that is the case.
 	 */
 	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip, NULL },
+	{ CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock, NULL },
 	{ 0, 0, NULL, NULL }
 };
 
@@ -1607,6 +1645,80 @@
 	return 0;
 }
 
+static int do_atmel_lock(struct map_info *map, struct flchip *chip,
+			 unsigned long adr, int len, void *thunk)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	int ret;
+
+	spin_lock(chip->mutex);
+	ret = get_chip(map, chip, adr + chip->start, FL_LOCKING);
+	if (ret)
+		goto out_unlock;
+	chip->state = FL_LOCKING;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): LOCK 0x%08lx len %d\n",
+	      __func__, adr, len);
+
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	map_write(map, CMD(0x40), chip->start + adr);
+
+	chip->state = FL_READY;
+	put_chip(map, chip, adr + chip->start);
+	ret = 0;
+
+out_unlock:
+	spin_unlock(chip->mutex);
+	return ret;
+}
+
+static int do_atmel_unlock(struct map_info *map, struct flchip *chip,
+			   unsigned long adr, int len, void *thunk)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	int ret;
+
+	spin_lock(chip->mutex);
+	ret = get_chip(map, chip, adr + chip->start, FL_UNLOCKING);
+	if (ret)
+		goto out_unlock;
+	chip->state = FL_UNLOCKING;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): LOCK 0x%08lx len %d\n",
+	      __func__, adr, len);
+
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	map_write(map, CMD(0x70), adr);
+
+	chip->state = FL_READY;
+	put_chip(map, chip, adr + chip->start);
+	ret = 0;
+
+out_unlock:
+	spin_unlock(chip->mutex);
+	return ret;
+}
+
+static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	return cfi_varsize_frob(mtd, do_atmel_lock, ofs, len, NULL);
+}
+
+static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, size_t len)
+{
+	return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL);
+}
+
 
 static void cfi_amdstd_sync (struct mtd_info *mtd)
 {
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 8f39d0a..1154dac 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -111,6 +111,7 @@
 #define MX29LV040C	0x004F
 #define MX29LV160T	0x22C4
 #define MX29LV160B	0x2249
+#define MX29F040	0x00A4
 #define MX29F016	0x00AD
 #define MX29F002T	0x00B0
 #define MX29F004T	0x0045
@@ -1172,6 +1173,19 @@
 		}
 	}, {
 		.mfr_id		= MANUFACTURER_MACRONIX,
+		.dev_id		= MX29F040,
+		.name		= "Macronix MX29F040",
+		.uaddr		= {
+			[0] = MTD_UADDR_0x0555_0x02AA /* x8 */
+		},
+		.DevSize	= SIZE_512KiB,
+		.CmdSet		= P_ID_AMD_STD,
+		.NumEraseRegions= 1,
+		.regions	= {
+			ERASEINFO(0x10000,8),
+		}
+        }, {
+		.mfr_id		= MANUFACTURER_MACRONIX,
 		.dev_id		= MX29F016,
 		.name		= "Macronix MX29F016",
 		.uaddr		= {
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index ede3561..401c6a2 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -18,6 +18,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
+#include <linux/mount.h>
 
 #define VERSION "$Revision: 1.30 $"
 
@@ -236,6 +237,8 @@
 	}
 	return 0;
 }
+
+
 static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf)
 {
@@ -299,6 +302,19 @@
 
 	/* Get a handle on the device */
 	bdev = open_bdev_excl(devname, O_RDWR, NULL);
+#ifndef MODULE
+	if (IS_ERR(bdev)) {
+
+		/* We might not have rootfs mounted at this point. Try
+		   to resolve the device name by other means. */
+
+		dev_t dev = name_to_dev_t(devname);
+		if (dev != 0) {
+			bdev = open_by_devnum(dev, FMODE_WRITE | FMODE_READ);
+		}
+	}
+#endif
+
 	if (IS_ERR(bdev)) {
 		ERROR("error: cannot open device %s", devname);
 		goto devinit_err;
@@ -393,26 +409,6 @@
 }
 
 
-static int parse_name(char **pname, const char *token, size_t limit)
-{
-	size_t len;
-	char *name;
-
-	len = strlen(token) + 1;
-	if (len > limit)
-		return -ENOSPC;
-
-	name = kmalloc(len, GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-
-	strcpy(name, token);
-
-	*pname = name;
-	return 0;
-}
-
-
 static inline void kill_final_newline(char *str)
 {
 	char *newline = strrchr(str, '\n');
@@ -426,9 +422,15 @@
 	return 0;				\
 } while (0)
 
-static int block2mtd_setup(const char *val, struct kernel_param *kp)
+#ifndef MODULE
+static int block2mtd_init_called = 0;
+static __initdata char block2mtd_paramline[80 + 12]; /* 80 for device, 12 for erase size */
+#endif
+
+
+static int block2mtd_setup2(const char *val)
 {
-	char buf[80+12]; /* 80 for device, 12 for erase size */
+	char buf[80 + 12]; /* 80 for device, 12 for erase size */
 	char *str = buf;
 	char *token[2];
 	char *name;
@@ -450,13 +452,9 @@
 	if (!token[0])
 		parse_err("no argument");
 
-	ret = parse_name(&name, token[0], 80);
-	if (ret == -ENOMEM)
-		parse_err("out of memory");
-	if (ret == -ENOSPC)
-		parse_err("name too long");
-	if (ret)
-		return 0;
+	name = token[0];
+	if (strlen(name) + 1 > 80)
+		parse_err("device name too long");
 
 	if (token[1]) {
 		ret = parse_num(&erase_size, token[1]);
@@ -472,13 +470,48 @@
 }
 
 
+static int block2mtd_setup(const char *val, struct kernel_param *kp)
+{
+#ifdef MODULE
+	return block2mtd_setup2(val);
+#else
+	/* If more parameters are later passed in via
+	   /sys/module/block2mtd/parameters/block2mtd
+	   and block2mtd_init() has already been called,
+	   we can parse the argument now. */
+
+	if (block2mtd_init_called)
+		return block2mtd_setup2(val);
+
+	/* During early boot stage, we only save the parameters
+	   here. We must parse them later: if the param passed
+	   from kernel boot command line, block2mtd_setup() is
+	   called so early that it is not possible to resolve
+	   the device (even kmalloc() fails). Deter that work to
+	   block2mtd_setup2(). */
+
+	strlcpy(block2mtd_paramline, val, sizeof(block2mtd_paramline));
+
+	return 0;
+#endif
+}
+
+
 module_param_call(block2mtd, block2mtd_setup, NULL, NULL, 0200);
 MODULE_PARM_DESC(block2mtd, "Device to use. \"block2mtd=<dev>[,<erasesize>]\"");
 
 static int __init block2mtd_init(void)
 {
+	int ret = 0;
 	INFO("version " VERSION);
-	return 0;
+
+#ifndef MODULE
+	if (strlen(block2mtd_paramline))
+		ret = block2mtd_setup2(block2mtd_paramline);
+	block2mtd_init_called = 1;
+#endif
+
+	return ret;
 }
 
 
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index a846614..ef4a731 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -406,13 +406,13 @@
 
 static struct flash_info __devinitdata m25p_data [] = {
 	/* REVISIT: fill in JEDEC ids, for parts that have them */
-	{ "m25p05", 0x05, 0x0000, 32 * 1024, 2 },
-	{ "m25p10", 0x10, 0x0000, 32 * 1024, 4 },
-	{ "m25p20", 0x11, 0x0000, 64 * 1024, 4 },
-	{ "m25p40", 0x12, 0x0000, 64 * 1024, 8 },
+	{ "m25p05", 0x05, 0x2010, 32 * 1024, 2 },
+	{ "m25p10", 0x10, 0x2011, 32 * 1024, 4 },
+	{ "m25p20", 0x11, 0x2012, 64 * 1024, 4 },
+	{ "m25p40", 0x12, 0x2013, 64 * 1024, 8 },
 	{ "m25p80", 0x13, 0x0000, 64 * 1024, 16 },
-	{ "m25p16", 0x14, 0x0000, 64 * 1024, 32 },
-	{ "m25p32", 0x15, 0x0000, 64 * 1024, 64 },
+	{ "m25p16", 0x14, 0x2015, 64 * 1024, 32 },
+	{ "m25p32", 0x15, 0x2016, 64 * 1024, 64 },
 	{ "m25p64", 0x16, 0x2017, 64 * 1024, 128 },
 };
 
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index 6f9bbf6..354e165 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -4,82 +4,82 @@
  * PMC551 PCI Mezzanine Ram Device
  *
  * Author:
- *       Mark Ferrell <mferrell@mvista.com>
- *       Copyright 1999,2000 Nortel Networks
+ *	Mark Ferrell <mferrell@mvista.com>
+ *	Copyright 1999,2000 Nortel Networks
  *
  * License:
- *	 As part of this driver was derived from the slram.c driver it
- *	 falls under the same license, which is GNU General Public
- *	 License v2
+ *	As part of this driver was derived from the slram.c driver it
+ *	falls under the same license, which is GNU General Public
+ *	License v2
  *
  * Description:
- *	 This driver is intended to support the PMC551 PCI Ram device
- *	 from Ramix Inc.  The PMC551 is a PMC Mezzanine module for
- *	 cPCI embedded systems.  The device contains a single SROM
- *	 that initially programs the V370PDC chipset onboard the
- *	 device, and various banks of DRAM/SDRAM onboard.  This driver
- *	 implements this PCI Ram device as an MTD (Memory Technology
- *	 Device) so that it can be used to hold a file system, or for
- *	 added swap space in embedded systems.  Since the memory on
- *	 this board isn't as fast as main memory we do not try to hook
- *	 it into main memory as that would simply reduce performance
- *	 on the system.  Using it as a block device allows us to use
- *	 it as high speed swap or for a high speed disk device of some
- *	 sort.  Which becomes very useful on diskless systems in the
- *	 embedded market I might add.
+ *	This driver is intended to support the PMC551 PCI Ram device
+ *	from Ramix Inc.  The PMC551 is a PMC Mezzanine module for
+ *	cPCI embedded systems.  The device contains a single SROM
+ *	that initially programs the V370PDC chipset onboard the
+ *	device, and various banks of DRAM/SDRAM onboard.  This driver
+ *	implements this PCI Ram device as an MTD (Memory Technology
+ *	Device) so that it can be used to hold a file system, or for
+ *	added swap space in embedded systems.  Since the memory on
+ *	this board isn't as fast as main memory we do not try to hook
+ *	it into main memory as that would simply reduce performance
+ *	on the system.  Using it as a block device allows us to use
+ *	it as high speed swap or for a high speed disk device of some
+ *	sort.  Which becomes very useful on diskless systems in the
+ *	embedded market I might add.
  *
  * Notes:
- *	 Due to what I assume is more buggy SROM, the 64M PMC551 I
- *	 have available claims that all 4 of it's DRAM banks have 64M
- *	 of ram configured (making a grand total of 256M onboard).
- *	 This is slightly annoying since the BAR0 size reflects the
- *	 aperture size, not the dram size, and the V370PDC supplies no
- *	 other method for memory size discovery.  This problem is
- *	 mostly only relevant when compiled as a module, as the
- *	 unloading of the module with an aperture size smaller then
- *	 the ram will cause the driver to detect the onboard memory
- *	 size to be equal to the aperture size when the module is
- *	 reloaded.  Soooo, to help, the module supports an msize
- *	 option to allow the specification of the onboard memory, and
- *	 an asize option, to allow the specification of the aperture
- *	 size.  The aperture must be equal to or less then the memory
- *	 size, the driver will correct this if you screw it up.  This
- *	 problem is not relevant for compiled in drivers as compiled
- *	 in drivers only init once.
+ *	Due to what I assume is more buggy SROM, the 64M PMC551 I
+ *	have available claims that all 4 of it's DRAM banks have 64M
+ *	of ram configured (making a grand total of 256M onboard).
+ *	This is slightly annoying since the BAR0 size reflects the
+ *	aperture size, not the dram size, and the V370PDC supplies no
+ *	other method for memory size discovery.  This problem is
+ *	mostly only relevant when compiled as a module, as the
+ *	unloading of the module with an aperture size smaller then
+ *	the ram will cause the driver to detect the onboard memory
+ *	size to be equal to the aperture size when the module is
+ *	reloaded.  Soooo, to help, the module supports an msize
+ *	option to allow the specification of the onboard memory, and
+ *	an asize option, to allow the specification of the aperture
+ *	size.  The aperture must be equal to or less then the memory
+ *	size, the driver will correct this if you screw it up.  This
+ *	problem is not relevant for compiled in drivers as compiled
+ *	in drivers only init once.
  *
  * Credits:
- *       Saeed Karamooz <saeed@ramix.com> of Ramix INC. for the
- *       initial example code of how to initialize this device and for
- *       help with questions I had concerning operation of the device.
+ *	Saeed Karamooz <saeed@ramix.com> of Ramix INC. for the
+ *	initial example code of how to initialize this device and for
+ *	help with questions I had concerning operation of the device.
  *
- *       Most of the MTD code for this driver was originally written
- *       for the slram.o module in the MTD drivers package which
- *       allows the mapping of system memory into an MTD device.
- *       Since the PMC551 memory module is accessed in the same
- *       fashion as system memory, the slram.c code became a very nice
- *       fit to the needs of this driver.  All we added was PCI
- *       detection/initialization to the driver and automatically figure
- *       out the size via the PCI detection.o, later changes by Corey
- *       Minyard set up the card to utilize a 1M sliding apature.
+ *	Most of the MTD code for this driver was originally written
+ *	for the slram.o module in the MTD drivers package which
+ *	allows the mapping of system memory into an MTD device.
+ *	Since the PMC551 memory module is accessed in the same
+ *	fashion as system memory, the slram.c code became a very nice
+ *	fit to the needs of this driver.  All we added was PCI
+ *	detection/initialization to the driver and automatically figure
+ *	out the size via the PCI detection.o, later changes by Corey
+ *	Minyard set up the card to utilize a 1M sliding apature.
  *
- *	 Corey Minyard <minyard@nortelnetworks.com>
- *       * Modified driver to utilize a sliding aperture instead of
- *         mapping all memory into kernel space which turned out to
- *         be very wasteful.
- *       * Located a bug in the SROM's initialization sequence that
- *         made the memory unusable, added a fix to code to touch up
- *         the DRAM some.
+ *	Corey Minyard <minyard@nortelnetworks.com>
+ *	* Modified driver to utilize a sliding aperture instead of
+ *	 mapping all memory into kernel space which turned out to
+ *	 be very wasteful.
+ *	* Located a bug in the SROM's initialization sequence that
+ *	 made the memory unusable, added a fix to code to touch up
+ *	 the DRAM some.
  *
  * Bugs/FIXME's:
- *       * MUST fix the init function to not spin on a register
- *       waiting for it to set .. this does not safely handle busted
- *       devices that never reset the register correctly which will
- *       cause the system to hang w/ a reboot being the only chance at
- *       recover. [sort of fixed, could be better]
- *       * Add I2C handling of the SROM so we can read the SROM's information
- *       about the aperture size.  This should always accurately reflect the
- *       onboard memory size.
- *       * Comb the init routine.  It's still a bit cludgy on a few things.
+ *	* MUST fix the init function to not spin on a register
+ *	waiting for it to set .. this does not safely handle busted
+ *	devices that never reset the register correctly which will
+ *	cause the system to hang w/ a reboot being the only chance at
+ *	recover. [sort of fixed, could be better]
+ *	* Add I2C handling of the SROM so we can read the SROM's information
+ *	about the aperture size.  This should always accurately reflect the
+ *	onboard memory size.
+ *	* Comb the init routine.  It's still a bit cludgy on a few things.
  */
 
 #include <linux/kernel.h>
@@ -99,84 +99,83 @@
 #include <asm/system.h>
 #include <linux/pci.h>
 
-#ifndef CONFIG_PCI
-#error Enable PCI in your kernel config
-#endif
-
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/pmc551.h>
 #include <linux/mtd/compatmac.h>
 
 static struct mtd_info *pmc551list;
 
-static int pmc551_erase (struct mtd_info *mtd, struct erase_info *instr)
+static int pmc551_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi, soff_lo; /* start address offset hi/lo */
-        u32 eoff_hi, eoff_lo; /* end address offset hi/lo */
-        unsigned long end;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi, soff_lo;	/* start address offset hi/lo */
+	u32 eoff_hi, eoff_lo;	/* end address offset hi/lo */
+	unsigned long end;
 	u_char *ptr;
 	size_t retlen;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_erase(pos:%ld, len:%ld)\n", (long)instr->addr, (long)instr->len);
+	printk(KERN_DEBUG "pmc551_erase(pos:%ld, len:%ld)\n", (long)instr->addr,
+		(long)instr->len);
 #endif
 
-        end = instr->addr + instr->len - 1;
+	end = instr->addr + instr->len - 1;
 
-        /* Is it past the end? */
-        if ( end > mtd->size ) {
+	/* Is it past the end? */
+	if (end > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_erase() out of bounds (%ld > %ld)\n", (long)end, (long)mtd->size);
+		printk(KERN_DEBUG "pmc551_erase() out of bounds (%ld > %ld)\n",
+			(long)end, (long)mtd->size);
 #endif
-                return -EINVAL;
-        }
+		return -EINVAL;
+	}
 
-        eoff_hi = end & ~(priv->asize - 1);
-        soff_hi = instr->addr & ~(priv->asize - 1);
-        eoff_lo = end & (priv->asize - 1);
-        soff_lo = instr->addr & (priv->asize - 1);
+	eoff_hi = end & ~(priv->asize - 1);
+	soff_hi = instr->addr & ~(priv->asize - 1);
+	eoff_lo = end & (priv->asize - 1);
+	soff_lo = instr->addr & (priv->asize - 1);
 
-	pmc551_point (mtd, instr->addr, instr->len, &retlen, &ptr);
+	pmc551_point(mtd, instr->addr, instr->len, &retlen, &ptr);
 
-        if ( soff_hi == eoff_hi || mtd->size == priv->asize) {
-                /* The whole thing fits within one access, so just one shot
-                   will do it. */
-                memset(ptr, 0xff, instr->len);
-        } else {
-                /* We have to do multiple writes to get all the data
-                   written. */
-                while (soff_hi != eoff_hi) {
+	if (soff_hi == eoff_hi || mtd->size == priv->asize) {
+		/* The whole thing fits within one access, so just one shot
+		   will do it. */
+		memset(ptr, 0xff, instr->len);
+	} else {
+		/* We have to do multiple writes to get all the data
+		   written. */
+		while (soff_hi != eoff_hi) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-			printk( KERN_DEBUG "pmc551_erase() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
+			printk(KERN_DEBUG "pmc551_erase() soff_hi: %ld, "
+				"eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
 #endif
-                        memset(ptr, 0xff, priv->asize);
-                        if (soff_hi + priv->asize >= mtd->size) {
-                                goto out;
-                        }
-                        soff_hi += priv->asize;
-			pmc551_point (mtd,(priv->base_map0|soff_hi),
-				      priv->asize, &retlen, &ptr);
-                }
-                memset (ptr, 0xff, eoff_lo);
-        }
+			memset(ptr, 0xff, priv->asize);
+			if (soff_hi + priv->asize >= mtd->size) {
+				goto out;
+			}
+			soff_hi += priv->asize;
+			pmc551_point(mtd, (priv->base_map0 | soff_hi),
+				     priv->asize, &retlen, &ptr);
+		}
+		memset(ptr, 0xff, eoff_lo);
+	}
 
-out:
+      out:
 	instr->state = MTD_ERASE_DONE;
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_erase() done\n");
 #endif
 
-        mtd_erase_callback(instr);
-        return 0;
+	mtd_erase_callback(instr);
+	return 0;
 }
 
-
-static int pmc551_point (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char **mtdbuf)
+static int pmc551_point(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t * retlen, u_char ** mtdbuf)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi;
-        u32 soff_lo;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi;
+	u32 soff_lo;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_point(%ld, %ld)\n", (long)from, (long)len);
@@ -184,18 +183,19 @@
 
 	if (from + len > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-		printk(KERN_DEBUG "pmc551_point() out of bounds (%ld > %ld)\n", (long)from+len, (long)mtd->size);
+		printk(KERN_DEBUG "pmc551_point() out of bounds (%ld > %ld)\n",
+			(long)from + len, (long)mtd->size);
 #endif
 		return -EINVAL;
 	}
 
-        soff_hi = from & ~(priv->asize - 1);
-        soff_lo = from & (priv->asize - 1);
+	soff_hi = from & ~(priv->asize - 1);
+	soff_lo = from & (priv->asize - 1);
 
 	/* Cheap hack optimization */
-	if( priv->curr_map0 != from ) {
-        	pci_write_config_dword ( priv->dev, PMC551_PCI_MEM_MAP0,
-                                 	(priv->base_map0 | soff_hi) );
+	if (priv->curr_map0 != from) {
+		pci_write_config_dword(priv->dev, PMC551_PCI_MEM_MAP0,
+					(priv->base_map0 | soff_hi));
 		priv->curr_map0 = soff_hi;
 	}
 
@@ -204,137 +204,144 @@
 	return 0;
 }
 
-
-static void pmc551_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from, size_t len)
+static void pmc551_unpoint(struct mtd_info *mtd, u_char * addr, loff_t from,
+			   size_t len)
 {
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_unpoint()\n");
 #endif
 }
 
-
-static int pmc551_read (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf)
+static int pmc551_read(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t * retlen, u_char * buf)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi, soff_lo; /* start address offset hi/lo */
-        u32 eoff_hi, eoff_lo; /* end address offset hi/lo */
-        unsigned long end;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi, soff_lo;	/* start address offset hi/lo */
+	u32 eoff_hi, eoff_lo;	/* end address offset hi/lo */
+	unsigned long end;
 	u_char *ptr;
-        u_char *copyto = buf;
+	u_char *copyto = buf;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_read(pos:%ld, len:%ld) asize: %ld\n", (long)from, (long)len, (long)priv->asize);
+	printk(KERN_DEBUG "pmc551_read(pos:%ld, len:%ld) asize: %ld\n",
+		(long)from, (long)len, (long)priv->asize);
 #endif
 
-        end = from + len - 1;
+	end = from + len - 1;
 
-        /* Is it past the end? */
-        if (end > mtd->size) {
+	/* Is it past the end? */
+	if (end > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_read() out of bounds (%ld > %ld)\n", (long) end, (long)mtd->size);
+		printk(KERN_DEBUG "pmc551_read() out of bounds (%ld > %ld)\n",
+			(long)end, (long)mtd->size);
 #endif
-                return -EINVAL;
-        }
+		return -EINVAL;
+	}
 
-        soff_hi = from & ~(priv->asize - 1);
-        eoff_hi = end & ~(priv->asize - 1);
-        soff_lo = from & (priv->asize - 1);
-        eoff_lo = end & (priv->asize - 1);
+	soff_hi = from & ~(priv->asize - 1);
+	eoff_hi = end & ~(priv->asize - 1);
+	soff_lo = from & (priv->asize - 1);
+	eoff_lo = end & (priv->asize - 1);
 
-	pmc551_point (mtd, from, len, retlen, &ptr);
+	pmc551_point(mtd, from, len, retlen, &ptr);
 
-        if (soff_hi == eoff_hi) {
-                /* The whole thing fits within one access, so just one shot
-                   will do it. */
-                memcpy(copyto, ptr, len);
-                copyto += len;
-        } else {
-                /* We have to do multiple writes to get all the data
-                   written. */
-                while (soff_hi != eoff_hi) {
+	if (soff_hi == eoff_hi) {
+		/* The whole thing fits within one access, so just one shot
+		   will do it. */
+		memcpy(copyto, ptr, len);
+		copyto += len;
+	} else {
+		/* We have to do multiple writes to get all the data
+		   written. */
+		while (soff_hi != eoff_hi) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-			printk( KERN_DEBUG "pmc551_read() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
+			printk(KERN_DEBUG "pmc551_read() soff_hi: %ld, "
+				"eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
 #endif
-                        memcpy(copyto, ptr, priv->asize);
-                        copyto += priv->asize;
-                        if (soff_hi + priv->asize >= mtd->size) {
-                                goto out;
-                        }
-                        soff_hi += priv->asize;
-			pmc551_point (mtd, soff_hi, priv->asize, retlen, &ptr);
-                }
-                memcpy(copyto, ptr, eoff_lo);
-                copyto += eoff_lo;
-        }
+			memcpy(copyto, ptr, priv->asize);
+			copyto += priv->asize;
+			if (soff_hi + priv->asize >= mtd->size) {
+				goto out;
+			}
+			soff_hi += priv->asize;
+			pmc551_point(mtd, soff_hi, priv->asize, retlen, &ptr);
+		}
+		memcpy(copyto, ptr, eoff_lo);
+		copyto += eoff_lo;
+	}
 
-out:
+      out:
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_read() done\n");
 #endif
-        *retlen = copyto - buf;
-        return 0;
+	*retlen = copyto - buf;
+	return 0;
 }
 
-static int pmc551_write (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf)
+static int pmc551_write(struct mtd_info *mtd, loff_t to, size_t len,
+			size_t * retlen, const u_char * buf)
 {
-        struct mypriv *priv = mtd->priv;
-        u32 soff_hi, soff_lo; /* start address offset hi/lo */
-        u32 eoff_hi, eoff_lo; /* end address offset hi/lo */
-        unsigned long end;
+	struct mypriv *priv = mtd->priv;
+	u32 soff_hi, soff_lo;	/* start address offset hi/lo */
+	u32 eoff_hi, eoff_lo;	/* end address offset hi/lo */
+	unsigned long end;
 	u_char *ptr;
-        const u_char *copyfrom = buf;
-
+	const u_char *copyfrom = buf;
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_write(pos:%ld, len:%ld) asize:%ld\n", (long)to, (long)len, (long)priv->asize);
+	printk(KERN_DEBUG "pmc551_write(pos:%ld, len:%ld) asize:%ld\n",
+		(long)to, (long)len, (long)priv->asize);
 #endif
 
-        end = to + len - 1;
-        /* Is it past the end?  or did the u32 wrap? */
-        if (end > mtd->size ) {
+	end = to + len - 1;
+	/* Is it past the end?  or did the u32 wrap? */
+	if (end > mtd->size) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-	printk(KERN_DEBUG "pmc551_write() out of bounds (end: %ld, size: %ld, to: %ld)\n", (long) end, (long)mtd->size, (long)to);
+		printk(KERN_DEBUG "pmc551_write() out of bounds (end: %ld, "
+			"size: %ld, to: %ld)\n", (long)end, (long)mtd->size,
+			(long)to);
 #endif
-                return -EINVAL;
-        }
+		return -EINVAL;
+	}
 
-        soff_hi = to & ~(priv->asize - 1);
-        eoff_hi = end & ~(priv->asize - 1);
-        soff_lo = to & (priv->asize - 1);
-        eoff_lo = end & (priv->asize - 1);
+	soff_hi = to & ~(priv->asize - 1);
+	eoff_hi = end & ~(priv->asize - 1);
+	soff_lo = to & (priv->asize - 1);
+	eoff_lo = end & (priv->asize - 1);
 
-	pmc551_point (mtd, to, len, retlen, &ptr);
+	pmc551_point(mtd, to, len, retlen, &ptr);
 
-        if (soff_hi == eoff_hi) {
-                /* The whole thing fits within one access, so just one shot
-                   will do it. */
-                memcpy(ptr, copyfrom, len);
-                copyfrom += len;
-        } else {
-                /* We have to do multiple writes to get all the data
-                   written. */
-                while (soff_hi != eoff_hi) {
+	if (soff_hi == eoff_hi) {
+		/* The whole thing fits within one access, so just one shot
+		   will do it. */
+		memcpy(ptr, copyfrom, len);
+		copyfrom += len;
+	} else {
+		/* We have to do multiple writes to get all the data
+		   written. */
+		while (soff_hi != eoff_hi) {
 #ifdef CONFIG_MTD_PMC551_DEBUG
-			printk( KERN_DEBUG "pmc551_write() soff_hi: %ld, eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
+			printk(KERN_DEBUG "pmc551_write() soff_hi: %ld, "
+				"eoff_hi: %ld\n", (long)soff_hi, (long)eoff_hi);
 #endif
-                	memcpy(ptr, copyfrom, priv->asize);
-                	copyfrom += priv->asize;
-                        if (soff_hi >= mtd->size) {
-                                goto out;
-                        }
-                        soff_hi += priv->asize;
-			pmc551_point (mtd, soff_hi, priv->asize, retlen, &ptr);
-                }
-                memcpy(ptr, copyfrom, eoff_lo);
-                copyfrom += eoff_lo;
-        }
+			memcpy(ptr, copyfrom, priv->asize);
+			copyfrom += priv->asize;
+			if (soff_hi >= mtd->size) {
+				goto out;
+			}
+			soff_hi += priv->asize;
+			pmc551_point(mtd, soff_hi, priv->asize, retlen, &ptr);
+		}
+		memcpy(ptr, copyfrom, eoff_lo);
+		copyfrom += eoff_lo;
+	}
 
-out:
+      out:
 #ifdef CONFIG_MTD_PMC551_DEBUG
 	printk(KERN_DEBUG "pmc551_write() done\n");
 #endif
-        *retlen = copyfrom - buf;
-        return 0;
+	*retlen = copyfrom - buf;
+	return 0;
 }
 
 /*
@@ -349,58 +356,58 @@
  * mechanism
  * returns the size of the memory region found.
  */
-static u32 fixup_pmc551 (struct pci_dev *dev)
+static u32 fixup_pmc551(struct pci_dev *dev)
 {
 #ifdef CONFIG_MTD_PMC551_BUGFIX
-        u32 dram_data;
+	u32 dram_data;
 #endif
-        u32 size, dcmd, cfg, dtmp;
-        u16 cmd, tmp, i;
+	u32 size, dcmd, cfg, dtmp;
+	u16 cmd, tmp, i;
 	u8 bcmd, counter;
 
-        /* Sanity Check */
-        if(!dev) {
-                return -ENODEV;
-        }
+	/* Sanity Check */
+	if (!dev) {
+		return -ENODEV;
+	}
 
 	/*
 	 * Attempt to reset the card
 	 * FIXME: Stop Spinning registers
 	 */
-	counter=0;
+	counter = 0;
 	/* unlock registers */
-	pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, 0xA5 );
+	pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, 0xA5);
 	/* read in old data */
-	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd );
+	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd);
 	/* bang the reset line up and down for a few */
-	for(i=0;i<10;i++) {
-		counter=0;
+	for (i = 0; i < 10; i++) {
+		counter = 0;
 		bcmd &= ~0x80;
-		while(counter++ < 100) {
+		while (counter++ < 100) {
 			pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd);
 		}
-		counter=0;
+		counter = 0;
 		bcmd |= 0x80;
-		while(counter++ < 100) {
+		while (counter++ < 100) {
 			pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd);
 		}
 	}
-	bcmd |= (0x40|0x20);
+	bcmd |= (0x40 | 0x20);
 	pci_write_config_byte(dev, PMC551_SYS_CTRL_REG, bcmd);
 
-        /*
+	/*
 	 * Take care and turn off the memory on the device while we
 	 * tweak the configurations
 	 */
-        pci_read_config_word(dev, PCI_COMMAND, &cmd);
-        tmp = cmd & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY);
-        pci_write_config_word(dev, PCI_COMMAND, tmp);
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	tmp = cmd & ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+	pci_write_config_word(dev, PCI_COMMAND, tmp);
 
 	/*
 	 * Disable existing aperture before probing memory size
 	 */
 	pci_read_config_dword(dev, PMC551_PCI_MEM_MAP0, &dcmd);
-        dtmp=(dcmd|PMC551_PCI_MEM_MAP_ENABLE|PMC551_PCI_MEM_MAP_REG_EN);
+	dtmp = (dcmd | PMC551_PCI_MEM_MAP_ENABLE | PMC551_PCI_MEM_MAP_REG_EN);
 	pci_write_config_dword(dev, PMC551_PCI_MEM_MAP0, dtmp);
 	/*
 	 * Grab old BAR0 config so that we can figure out memory size
@@ -411,220 +418,230 @@
 	 * then write all 1's to the memory space, read back the result into
 	 * "size", and then write back all the old config.
 	 */
-	pci_read_config_dword( dev, PCI_BASE_ADDRESS_0, &cfg );
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &cfg);
 #ifndef CONFIG_MTD_PMC551_BUGFIX
-	pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, ~0 );
-	pci_read_config_dword( dev, PCI_BASE_ADDRESS_0, &size );
-	size = (size&PCI_BASE_ADDRESS_MEM_MASK);
-	size &= ~(size-1);
-	pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg );
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, ~0);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &size);
+	size = (size & PCI_BASE_ADDRESS_MEM_MASK);
+	size &= ~(size - 1);
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, cfg);
 #else
-        /*
-         * Get the size of the memory by reading all the DRAM size values
-         * and adding them up.
-         *
-         * KLUDGE ALERT: the boards we are using have invalid column and
-         * row mux values.  We fix them here, but this will break other
-         * memory configurations.
-         */
-        pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dram_data);
-        size = PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK0, dram_data);
+	/*
+	 * Get the size of the memory by reading all the DRAM size values
+	 * and adding them up.
+	 *
+	 * KLUDGE ALERT: the boards we are using have invalid column and
+	 * row mux values.  We fix them here, but this will break other
+	 * memory configurations.
+	 */
+	pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dram_data);
+	size = PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK0, dram_data);
 
-        pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dram_data);
-        size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK1, dram_data);
+	pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dram_data);
+	size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK1, dram_data);
 
-        pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dram_data);
-        size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK2, dram_data);
+	pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dram_data);
+	size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK2, dram_data);
 
-        pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dram_data);
-        size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
-        dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
-        dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
-        pci_write_config_dword(dev, PMC551_DRAM_BLK3, dram_data);
+	pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dram_data);
+	size += PMC551_DRAM_BLK_GET_SIZE(dram_data);
+	dram_data = PMC551_DRAM_BLK_SET_COL_MUX(dram_data, 0x5);
+	dram_data = PMC551_DRAM_BLK_SET_ROW_MUX(dram_data, 0x9);
+	pci_write_config_dword(dev, PMC551_DRAM_BLK3, dram_data);
 
-        /*
-         * Oops .. something went wrong
-         */
-        if( (size &= PCI_BASE_ADDRESS_MEM_MASK) == 0) {
-                return -ENODEV;
-        }
-#endif /* CONFIG_MTD_PMC551_BUGFIX */
+	/*
+	 * Oops .. something went wrong
+	 */
+	if ((size &= PCI_BASE_ADDRESS_MEM_MASK) == 0) {
+		return -ENODEV;
+	}
+#endif				/* CONFIG_MTD_PMC551_BUGFIX */
 
-	if ((cfg&PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) {
-                return -ENODEV;
+	if ((cfg & PCI_BASE_ADDRESS_SPACE) != PCI_BASE_ADDRESS_SPACE_MEMORY) {
+		return -ENODEV;
 	}
 
-        /*
-         * Precharge Dram
-         */
-        pci_write_config_word( dev, PMC551_SDRAM_MA, 0x0400 );
-        pci_write_config_word( dev, PMC551_SDRAM_CMD, 0x00bf );
+	/*
+	 * Precharge Dram
+	 */
+	pci_write_config_word(dev, PMC551_SDRAM_MA, 0x0400);
+	pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x00bf);
 
-        /*
-         * Wait until command has gone through
-         * FIXME: register spinning issue
-         */
-        do {	pci_read_config_word( dev, PMC551_SDRAM_CMD, &cmd );
-		if(counter++ > 100)break;
-        } while ( (PCI_COMMAND_IO) & cmd );
+	/*
+	 * Wait until command has gone through
+	 * FIXME: register spinning issue
+	 */
+	do {
+		pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
+		if (counter++ > 100)
+			break;
+	} while ((PCI_COMMAND_IO) & cmd);
 
-        /*
+	/*
 	 * Turn on auto refresh
 	 * The loop is taken directly from Ramix's example code.  I assume that
 	 * this must be held high for some duration of time, but I can find no
 	 * documentation refrencing the reasons why.
-         */
-        for ( i = 1; i<=8 ; i++) {
-                pci_write_config_word (dev, PMC551_SDRAM_CMD, 0x0df);
+	 */
+	for (i = 1; i <= 8; i++) {
+		pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x0df);
 
-                /*
-                 * Make certain command has gone through
-                 * FIXME: register spinning issue
-                 */
-		counter=0;
-                do {	pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
-			if(counter++ > 100)break;
-                } while ( (PCI_COMMAND_IO) & cmd );
-        }
+		/*
+		 * Make certain command has gone through
+		 * FIXME: register spinning issue
+		 */
+		counter = 0;
+		do {
+			pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
+			if (counter++ > 100)
+				break;
+		} while ((PCI_COMMAND_IO) & cmd);
+	}
 
-        pci_write_config_word ( dev, PMC551_SDRAM_MA, 0x0020);
-        pci_write_config_word ( dev, PMC551_SDRAM_CMD, 0x0ff);
+	pci_write_config_word(dev, PMC551_SDRAM_MA, 0x0020);
+	pci_write_config_word(dev, PMC551_SDRAM_CMD, 0x0ff);
 
-        /*
-         * Wait until command completes
-         * FIXME: register spinning issue
-         */
-	counter=0;
-        do {	pci_read_config_word ( dev, PMC551_SDRAM_CMD, &cmd);
-		if(counter++ > 100)break;
-        } while ( (PCI_COMMAND_IO) & cmd );
+	/*
+	 * Wait until command completes
+	 * FIXME: register spinning issue
+	 */
+	counter = 0;
+	do {
+		pci_read_config_word(dev, PMC551_SDRAM_CMD, &cmd);
+		if (counter++ > 100)
+			break;
+	} while ((PCI_COMMAND_IO) & cmd);
 
-        pci_read_config_dword ( dev, PMC551_DRAM_CFG, &dcmd);
-        dcmd |= 0x02000000;
-        pci_write_config_dword ( dev, PMC551_DRAM_CFG, dcmd);
+	pci_read_config_dword(dev, PMC551_DRAM_CFG, &dcmd);
+	dcmd |= 0x02000000;
+	pci_write_config_dword(dev, PMC551_DRAM_CFG, dcmd);
 
-        /*
-         * Check to make certain fast back-to-back, if not
-         * then set it so
-         */
-        pci_read_config_word( dev, PCI_STATUS, &cmd);
-        if((cmd&PCI_COMMAND_FAST_BACK) == 0) {
-                cmd |= PCI_COMMAND_FAST_BACK;
-                pci_write_config_word( dev, PCI_STATUS, cmd);
-        }
+	/*
+	 * Check to make certain fast back-to-back, if not
+	 * then set it so
+	 */
+	pci_read_config_word(dev, PCI_STATUS, &cmd);
+	if ((cmd & PCI_COMMAND_FAST_BACK) == 0) {
+		cmd |= PCI_COMMAND_FAST_BACK;
+		pci_write_config_word(dev, PCI_STATUS, cmd);
+	}
 
-        /*
-         * Check to make certain the DEVSEL is set correctly, this device
-         * has a tendancy to assert DEVSEL and TRDY when a write is performed
-         * to the memory when memory is read-only
-         */
-        if((cmd&PCI_STATUS_DEVSEL_MASK) != 0x0) {
-                cmd &= ~PCI_STATUS_DEVSEL_MASK;
-                pci_write_config_word( dev, PCI_STATUS, cmd );
-        }
-        /*
-         * Set to be prefetchable and put everything back based on old cfg.
+	/*
+	 * Check to make certain the DEVSEL is set correctly, this device
+	 * has a tendancy to assert DEVSEL and TRDY when a write is performed
+	 * to the memory when memory is read-only
+	 */
+	if ((cmd & PCI_STATUS_DEVSEL_MASK) != 0x0) {
+		cmd &= ~PCI_STATUS_DEVSEL_MASK;
+		pci_write_config_word(dev, PCI_STATUS, cmd);
+	}
+	/*
+	 * Set to be prefetchable and put everything back based on old cfg.
 	 * it's possible that the reset of the V370PDC nuked the original
 	 * setup
-         */
+	 */
 	/*
-        cfg |= PCI_BASE_ADDRESS_MEM_PREFETCH;
-	pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg );
-	*/
+	   cfg |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+	   pci_write_config_dword( dev, PCI_BASE_ADDRESS_0, cfg );
+	 */
 
-        /*
-         * Turn PCI memory and I/O bus access back on
-         */
-        pci_write_config_word( dev, PCI_COMMAND,
-                               PCI_COMMAND_MEMORY | PCI_COMMAND_IO );
+	/*
+	 * Turn PCI memory and I/O bus access back on
+	 */
+	pci_write_config_word(dev, PCI_COMMAND,
+			      PCI_COMMAND_MEMORY | PCI_COMMAND_IO);
 #ifdef CONFIG_MTD_PMC551_DEBUG
-        /*
-         * Some screen fun
-         */
-        printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at 0x%llx\n",
-	       (size<1024)?size:(size<1048576)?size>>10:size>>20,
-               (size<1024)?'B':(size<1048576)?'K':'M',
-	       size, ((dcmd&(0x1<<3)) == 0)?"non-":"",
-               (unsigned long long)((dev->resource[0].start)&PCI_BASE_ADDRESS_MEM_MASK));
+	/*
+	 * Some screen fun
+	 */
+	printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at "
+		"0x%llx\n", (size < 1024) ? size : (size < 1048576) ?
+		size >> 10 : size >> 20,
+		(size < 1024) ? 'B' : (size < 1048576) ? 'K' : 'M', size,
+		((dcmd & (0x1 << 3)) == 0) ? "non-" : "",
+		(unsigned long long)pci_resource_start(dev, 0));
 
-        /*
-         * Check to see the state of the memory
-         */
-        pci_read_config_dword( dev, PMC551_DRAM_BLK0, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK0 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK0 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK0 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
+	/*
+	 * Check to see the state of the memory
+	 */
+	pci_read_config_dword(dev, PMC551_DRAM_BLK0, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK0 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK0 Size: %d at %d\n"
+		"pmc551: DRAM_BLK0 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
 
-        pci_read_config_dword( dev, PMC551_DRAM_BLK1, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK1 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK1 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK1 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
+	pci_read_config_dword(dev, PMC551_DRAM_BLK1, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK1 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK1 Size: %d at %d\n"
+		"pmc551: DRAM_BLK1 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
 
-        pci_read_config_dword( dev, PMC551_DRAM_BLK2, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK2 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK2 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK2 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
+	pci_read_config_dword(dev, PMC551_DRAM_BLK2, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK2 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK2 Size: %d at %d\n"
+		"pmc551: DRAM_BLK2 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
 
-        pci_read_config_dword( dev, PMC551_DRAM_BLK3, &dcmd );
-        printk(KERN_DEBUG "pmc551: DRAM_BLK3 Flags: %s,%s\n"
-			  "pmc551: DRAM_BLK3 Size: %d at %d\n"
-			  "pmc551: DRAM_BLK3 Row MUX: %d, Col MUX: %d\n",
-               (((0x1<<1)&dcmd) == 0)?"RW":"RO",
-               (((0x1<<0)&dcmd) == 0)?"Off":"On",
-	       PMC551_DRAM_BLK_GET_SIZE(dcmd),
-	       ((dcmd>>20)&0x7FF), ((dcmd>>13)&0x7), ((dcmd>>9)&0xF) );
+	pci_read_config_dword(dev, PMC551_DRAM_BLK3, &dcmd);
+	printk(KERN_DEBUG "pmc551: DRAM_BLK3 Flags: %s,%s\n"
+		"pmc551: DRAM_BLK3 Size: %d at %d\n"
+		"pmc551: DRAM_BLK3 Row MUX: %d, Col MUX: %d\n",
+		(((0x1 << 1) & dcmd) == 0) ? "RW" : "RO",
+		(((0x1 << 0) & dcmd) == 0) ? "Off" : "On",
+		PMC551_DRAM_BLK_GET_SIZE(dcmd),
+		((dcmd >> 20) & 0x7FF), ((dcmd >> 13) & 0x7),
+		((dcmd >> 9) & 0xF));
 
-        pci_read_config_word( dev, PCI_COMMAND, &cmd );
-        printk( KERN_DEBUG "pmc551: Memory Access %s\n",
-                (((0x1<<1)&cmd) == 0)?"off":"on" );
-        printk( KERN_DEBUG "pmc551: I/O Access %s\n",
-                (((0x1<<0)&cmd) == 0)?"off":"on" );
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	printk(KERN_DEBUG "pmc551: Memory Access %s\n",
+		(((0x1 << 1) & cmd) == 0) ? "off" : "on");
+	printk(KERN_DEBUG "pmc551: I/O Access %s\n",
+		(((0x1 << 0) & cmd) == 0) ? "off" : "on");
 
-        pci_read_config_word( dev, PCI_STATUS, &cmd );
-        printk( KERN_DEBUG "pmc551: Devsel %s\n",
-                ((PCI_STATUS_DEVSEL_MASK&cmd)==0x000)?"Fast":
-                ((PCI_STATUS_DEVSEL_MASK&cmd)==0x200)?"Medium":
-                ((PCI_STATUS_DEVSEL_MASK&cmd)==0x400)?"Slow":"Invalid" );
+	pci_read_config_word(dev, PCI_STATUS, &cmd);
+	printk(KERN_DEBUG "pmc551: Devsel %s\n",
+		((PCI_STATUS_DEVSEL_MASK & cmd) == 0x000) ? "Fast" :
+		((PCI_STATUS_DEVSEL_MASK & cmd) == 0x200) ? "Medium" :
+		((PCI_STATUS_DEVSEL_MASK & cmd) == 0x400) ? "Slow" : "Invalid");
 
-        printk( KERN_DEBUG "pmc551: %sFast Back-to-Back\n",
-                ((PCI_COMMAND_FAST_BACK&cmd) == 0)?"Not ":"" );
+	printk(KERN_DEBUG "pmc551: %sFast Back-to-Back\n",
+		((PCI_COMMAND_FAST_BACK & cmd) == 0) ? "Not " : "");
 
-	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd );
-	printk( KERN_DEBUG "pmc551: EEPROM is under %s control\n"
-			   "pmc551: System Control Register is %slocked to PCI access\n"
-			   "pmc551: System Control Register is %slocked to EEPROM access\n",
-		(bcmd&0x1)?"software":"hardware",
-		(bcmd&0x20)?"":"un", (bcmd&0x40)?"":"un");
+	pci_read_config_byte(dev, PMC551_SYS_CTRL_REG, &bcmd);
+	printk(KERN_DEBUG "pmc551: EEPROM is under %s control\n"
+		"pmc551: System Control Register is %slocked to PCI access\n"
+		"pmc551: System Control Register is %slocked to EEPROM access\n",
+		(bcmd & 0x1) ? "software" : "hardware",
+		(bcmd & 0x20) ? "" : "un", (bcmd & 0x40) ? "" : "un");
 #endif
-        return size;
+	return size;
 }
 
 /*
  * Kernel version specific module stuffages
  */
 
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mark Ferrell <mferrell@mvista.com>");
 MODULE_DESCRIPTION(PMC551_VERSION);
@@ -632,11 +649,11 @@
 /*
  * Stuff these outside the ifdef so as to not bust compiled in driver support
  */
-static int msize=0;
+static int msize = 0;
 #if defined(CONFIG_MTD_PMC551_APERTURE_SIZE)
-static int asize=CONFIG_MTD_PMC551_APERTURE_SIZE
+static int asize = CONFIG_MTD_PMC551_APERTURE_SIZE
 #else
-static int asize=0;
+static int asize = 0;
 #endif
 
 module_param(msize, int, 0);
@@ -649,164 +666,174 @@
  */
 static int __init init_pmc551(void)
 {
-        struct pci_dev *PCI_Device = NULL;
-        struct mypriv *priv;
-        int count, found=0;
-        struct mtd_info *mtd;
-        u32 length = 0;
+	struct pci_dev *PCI_Device = NULL;
+	struct mypriv *priv;
+	int count, found = 0;
+	struct mtd_info *mtd;
+	u32 length = 0;
 
-	if(msize) {
-		msize = (1 << (ffs(msize) - 1))<<20;
-		if (msize > (1<<30)) {
-			printk(KERN_NOTICE "pmc551: Invalid memory size [%d]\n", msize);
+	if (msize) {
+		msize = (1 << (ffs(msize) - 1)) << 20;
+		if (msize > (1 << 30)) {
+			printk(KERN_NOTICE "pmc551: Invalid memory size [%d]\n",
+				msize);
 			return -EINVAL;
 		}
 	}
 
-	if(asize) {
-		asize = (1 << (ffs(asize) - 1))<<20;
-		if (asize > (1<<30) ) {
-			printk(KERN_NOTICE "pmc551: Invalid aperture size [%d]\n", asize);
+	if (asize) {
+		asize = (1 << (ffs(asize) - 1)) << 20;
+		if (asize > (1 << 30)) {
+			printk(KERN_NOTICE "pmc551: Invalid aperture size "
+				"[%d]\n", asize);
 			return -EINVAL;
 		}
 	}
 
-        printk(KERN_INFO PMC551_VERSION);
+	printk(KERN_INFO PMC551_VERSION);
 
-        /*
-         * PCU-bus chipset probe.
-         */
-        for( count = 0; count < MAX_MTD_DEVICES; count++ ) {
+	/*
+	 * PCU-bus chipset probe.
+	 */
+	for (count = 0; count < MAX_MTD_DEVICES; count++) {
 
-                if ((PCI_Device = pci_find_device(PCI_VENDOR_ID_V3_SEMI,
-                                                  PCI_DEVICE_ID_V3_SEMI_V370PDC,
-						  PCI_Device ) ) == NULL) {
-                        break;
-                }
+		if ((PCI_Device = pci_get_device(PCI_VENDOR_ID_V3_SEMI,
+						  PCI_DEVICE_ID_V3_SEMI_V370PDC,
+						  PCI_Device)) == NULL) {
+			break;
+		}
 
-                printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n",
-				    (unsigned long long)PCI_Device->resource[0].start);
+		printk(KERN_NOTICE "pmc551: Found PCI V370PDC at 0x%llx\n",
+			(unsigned long long)pci_resource_start(PCI_Device, 0));
 
-                /*
-                 * The PMC551 device acts VERY weird if you don't init it
-                 * first.  i.e. it will not correctly report devsel.  If for
-                 * some reason the sdram is in a wrote-protected state the
-                 * device will DEVSEL when it is written to causing problems
-                 * with the oldproc.c driver in
-                 * some kernels (2.2.*)
-                 */
-                if((length = fixup_pmc551(PCI_Device)) <= 0) {
-                        printk(KERN_NOTICE "pmc551: Cannot init SDRAM\n");
-                        break;
-                }
+		/*
+		 * The PMC551 device acts VERY weird if you don't init it
+		 * first.  i.e. it will not correctly report devsel.  If for
+		 * some reason the sdram is in a wrote-protected state the
+		 * device will DEVSEL when it is written to causing problems
+		 * with the oldproc.c driver in
+		 * some kernels (2.2.*)
+		 */
+		if ((length = fixup_pmc551(PCI_Device)) <= 0) {
+			printk(KERN_NOTICE "pmc551: Cannot init SDRAM\n");
+			break;
+		}
 
 		/*
 		 * This is needed until the driver is capable of reading the
 		 * onboard I2C SROM to discover the "real" memory size.
 		 */
-		if(msize) {
+		if (msize) {
 			length = msize;
-			printk(KERN_NOTICE "pmc551: Using specified memory size 0x%x\n", length);
+			printk(KERN_NOTICE "pmc551: Using specified memory "
+				"size 0x%x\n", length);
 		} else {
 			msize = length;
 		}
 
-                mtd = kmalloc(sizeof(struct mtd_info), GFP_KERNEL);
-                if (!mtd) {
-                        printk(KERN_NOTICE "pmc551: Cannot allocate new MTD device.\n");
-                        break;
-                }
-
-                memset(mtd, 0, sizeof(struct mtd_info));
-
-                priv = kmalloc (sizeof(struct mypriv), GFP_KERNEL);
-                if (!priv) {
-                        printk(KERN_NOTICE "pmc551: Cannot allocate new MTD device.\n");
-                        kfree(mtd);
-                        break;
-                }
-                memset(priv, 0, sizeof(*priv));
-                mtd->priv = priv;
-                priv->dev = PCI_Device;
-
-		if(asize > length) {
-			printk(KERN_NOTICE "pmc551: reducing aperture size to fit %dM\n",length>>20);
-			priv->asize = asize = length;
-		} else if (asize == 0 || asize == length) {
-			printk(KERN_NOTICE "pmc551: Using existing aperture size %dM\n", length>>20);
-			priv->asize = asize = length;
-		} else {
-			printk(KERN_NOTICE "pmc551: Using specified aperture size %dM\n", asize>>20);
-			priv->asize = asize;
-		}
-                priv->start = ioremap(((PCI_Device->resource[0].start)
-                                       & PCI_BASE_ADDRESS_MEM_MASK),
-                                      priv->asize);
-
-		if (!priv->start) {
-			printk(KERN_NOTICE "pmc551: Unable to map IO space\n");
-                        kfree(mtd->priv);
-                        kfree(mtd);
+		mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL);
+		if (!mtd) {
+			printk(KERN_NOTICE "pmc551: Cannot allocate new MTD "
+				"device.\n");
 			break;
 		}
 
+		priv = kzalloc(sizeof(struct mypriv), GFP_KERNEL);
+		if (!priv) {
+			printk(KERN_NOTICE "pmc551: Cannot allocate new MTD "
+				"device.\n");
+			kfree(mtd);
+			break;
+		}
+		mtd->priv = priv;
+		priv->dev = PCI_Device;
+
+		if (asize > length) {
+			printk(KERN_NOTICE "pmc551: reducing aperture size to "
+				"fit %dM\n", length >> 20);
+			priv->asize = asize = length;
+		} else if (asize == 0 || asize == length) {
+			printk(KERN_NOTICE "pmc551: Using existing aperture "
+				"size %dM\n", length >> 20);
+			priv->asize = asize = length;
+		} else {
+			printk(KERN_NOTICE "pmc551: Using specified aperture "
+				"size %dM\n", asize >> 20);
+			priv->asize = asize;
+		}
+		priv->start = pci_iomap(PCI_Device, 0, priv->asize);
+
+		if (!priv->start) {
+			printk(KERN_NOTICE "pmc551: Unable to map IO space\n");
+			kfree(mtd->priv);
+			kfree(mtd);
+			break;
+		}
 #ifdef CONFIG_MTD_PMC551_DEBUG
-		printk( KERN_DEBUG "pmc551: setting aperture to %d\n",
-			ffs(priv->asize>>20)-1);
+		printk(KERN_DEBUG "pmc551: setting aperture to %d\n",
+			ffs(priv->asize >> 20) - 1);
 #endif
 
-                priv->base_map0 = ( PMC551_PCI_MEM_MAP_REG_EN
-				  | PMC551_PCI_MEM_MAP_ENABLE
-				  | (ffs(priv->asize>>20)-1)<<4 );
-                priv->curr_map0 = priv->base_map0;
-                pci_write_config_dword ( priv->dev, PMC551_PCI_MEM_MAP0,
-                                         priv->curr_map0 );
+		priv->base_map0 = (PMC551_PCI_MEM_MAP_REG_EN
+				   | PMC551_PCI_MEM_MAP_ENABLE
+				   | (ffs(priv->asize >> 20) - 1) << 4);
+		priv->curr_map0 = priv->base_map0;
+		pci_write_config_dword(priv->dev, PMC551_PCI_MEM_MAP0,
+					priv->curr_map0);
 
 #ifdef CONFIG_MTD_PMC551_DEBUG
-		printk( KERN_DEBUG "pmc551: aperture set to %d\n",
-			(priv->base_map0 & 0xF0)>>4 );
+		printk(KERN_DEBUG "pmc551: aperture set to %d\n",
+			(priv->base_map0 & 0xF0) >> 4);
 #endif
 
-                mtd->size 	= msize;
-                mtd->flags 	= MTD_CAP_RAM;
-                mtd->erase 	= pmc551_erase;
-                mtd->read 	= pmc551_read;
-                mtd->write 	= pmc551_write;
-                mtd->point 	= pmc551_point;
-                mtd->unpoint 	= pmc551_unpoint;
-                mtd->type 	= MTD_RAM;
-                mtd->name 	= "PMC551 RAM board";
-                mtd->erasesize 	= 0x10000;
-                mtd->writesize  = 1;
-                mtd->owner = THIS_MODULE;
+		mtd->size = msize;
+		mtd->flags = MTD_CAP_RAM;
+		mtd->erase = pmc551_erase;
+		mtd->read = pmc551_read;
+		mtd->write = pmc551_write;
+		mtd->point = pmc551_point;
+		mtd->unpoint = pmc551_unpoint;
+		mtd->type = MTD_RAM;
+		mtd->name = "PMC551 RAM board";
+		mtd->erasesize = 0x10000;
+		mtd->writesize = 1;
+		mtd->owner = THIS_MODULE;
 
-                if (add_mtd_device(mtd)) {
-                        printk(KERN_NOTICE "pmc551: Failed to register new device\n");
-			iounmap(priv->start);
-                        kfree(mtd->priv);
-                        kfree(mtd);
-                        break;
-                }
-                printk(KERN_NOTICE "Registered pmc551 memory device.\n");
-                printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n",
-                       priv->asize>>20,
-                       priv->start,
-                       priv->start + priv->asize);
-                printk(KERN_NOTICE "Total memory is %d%c\n",
-	       		(length<1024)?length:
-				(length<1048576)?length>>10:length>>20,
-               		(length<1024)?'B':(length<1048576)?'K':'M');
+		if (add_mtd_device(mtd)) {
+			printk(KERN_NOTICE "pmc551: Failed to register new "
+				"device\n");
+			pci_iounmap(PCI_Device, priv->start);
+			kfree(mtd->priv);
+			kfree(mtd);
+			break;
+		}
+
+		/* Keep a reference as the add_mtd_device worked */
+		pci_dev_get(PCI_Device);
+
+		printk(KERN_NOTICE "Registered pmc551 memory device.\n");
+		printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n",
+			priv->asize >> 20,
+			priv->start, priv->start + priv->asize);
+		printk(KERN_NOTICE "Total memory is %d%c\n",
+			(length < 1024) ? length :
+			(length < 1048576) ? length >> 10 : length >> 20,
+			(length < 1024) ? 'B' : (length < 1048576) ? 'K' : 'M');
 		priv->nextpmc551 = pmc551list;
 		pmc551list = mtd;
 		found++;
-        }
+	}
 
-        if( !pmc551list ) {
-                printk(KERN_NOTICE "pmc551: not detected\n");
-                return -ENODEV;
-        } else {
+	/* Exited early, reference left over */
+	if (PCI_Device)
+		pci_dev_put(PCI_Device);
+
+	if (!pmc551list) {
+		printk(KERN_NOTICE "pmc551: not detected\n");
+		return -ENODEV;
+	} else {
 		printk(KERN_NOTICE "pmc551: %d pmc551 devices loaded\n", found);
-                return 0;
+		return 0;
 	}
 }
 
@@ -815,23 +842,24 @@
  */
 static void __exit cleanup_pmc551(void)
 {
-        int found=0;
-        struct mtd_info *mtd;
+	int found = 0;
+	struct mtd_info *mtd;
 	struct mypriv *priv;
 
-	while((mtd=pmc551list)) {
+	while ((mtd = pmc551list)) {
 		priv = mtd->priv;
 		pmc551list = priv->nextpmc551;
 
-		if(priv->start) {
-			printk (KERN_DEBUG "pmc551: unmapping %dM starting at 0x%p\n",
-				priv->asize>>20, priv->start);
-			iounmap (priv->start);
+		if (priv->start) {
+			printk(KERN_DEBUG "pmc551: unmapping %dM starting at "
+				"0x%p\n", priv->asize >> 20, priv->start);
+			pci_iounmap(priv->dev, priv->start);
 		}
+		pci_dev_put(priv->dev);
 
-		kfree (mtd->priv);
-		del_mtd_device (mtd);
-		kfree (mtd);
+		kfree(mtd->priv);
+		del_mtd_device(mtd);
+		kfree(mtd);
 		found++;
 	}
 
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 83d0b2a..24747bd 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -13,13 +13,13 @@
 
 config MTD_PHYSMAP
 	tristate "CFI Flash device in physical memory map"
-	depends on MTD_CFI
+	depends on MTD_CFI || MTD_JEDECPROBE || MTD_ROM
 	help
-	  This provides a 'mapping' driver which allows the CFI probe and
-	  command set driver code to communicate with flash chips which
-	  are mapped physically into the CPU's memory. You will need to
-	  configure the physical address and size of the flash chips on
-	  your particular board as well as the bus width, either statically
+	  This provides a 'mapping' driver which allows the NOR Flash and
+	  ROM driver code to communicate with chips which are mapped
+	  physically into the CPU's memory. You will need to configure
+	  the physical address and size of the flash chips on your
+	  particular board as well as the bus width, either statically
 	  with config options or at run-time.
 
 config MTD_PHYSMAP_START
@@ -447,14 +447,6 @@
 	  21285 bridge used with Intel's StrongARM processors. More info at
 	  <http://www.intel.com/design/bridge/docs/21285_documentation.htm>.
 
-config MTD_IQ80310
-	tristate "CFI Flash device mapped on the XScale IQ80310 board"
-	depends on MTD_CFI && ARCH_IQ80310
-	help
-	  This enables access routines for the flash chips on the Intel XScale
-	  IQ80310 evaluation board. If you have one of these boards and would
-	  like to use the flash chips on it, say 'Y'.
-
 config MTD_IXP4XX
 	tristate "CFI Flash device mapped on Intel IXP4xx based systems"
 	depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index ab71f17..191c192 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -15,7 +15,6 @@
 obj-$(CONFIG_MTD_CSTM_MIPS_IXX)	+= cstm_mips_ixx.o
 obj-$(CONFIG_MTD_DC21285)	+= dc21285.o
 obj-$(CONFIG_MTD_DILNETPC)	+= dilnetpc.o
-obj-$(CONFIG_MTD_IQ80310)	+= iq80310.o
 obj-$(CONFIG_MTD_L440GX)	+= l440gx.o
 obj-$(CONFIG_MTD_AMD76XROM)	+= amd76xrom.o
 obj-$(CONFIG_MTD_ICHXROM)	+= ichxrom.o
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 447955b..797caff 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -57,6 +57,7 @@
 		/* Disable writes through the rom window */
 		pci_read_config_byte(window->pdev, 0x40, &byte);
 		pci_write_config_byte(window->pdev, 0x40, byte & ~1);
+		pci_dev_put(window->pdev);
 	}
 
 	/* Free all of the mtd devices */
@@ -91,7 +92,7 @@
 	struct amd76xrom_map_info *map = NULL;
 	unsigned long map_top;
 
-	/* Remember the pci dev I find the window in */
+	/* Remember the pci dev I find the window in - already have a ref */
 	window->pdev = pdev;
 
 	/* Assume the rom window is properly setup, and find it's size */
@@ -302,7 +303,7 @@
 	struct pci_device_id *id;
 	pdev = NULL;
 	for(id = amd76xrom_pci_tbl; id->vendor; id++) {
-		pdev = pci_find_device(id->vendor, id->device, NULL);
+		pdev = pci_get_device(id->vendor, id->device, NULL);
 		if (pdev) {
 			break;
 		}
diff --git a/drivers/mtd/maps/arctic-mtd.c b/drivers/mtd/maps/arctic-mtd.c
index d95ae58..642d96b 100644
--- a/drivers/mtd/maps/arctic-mtd.c
+++ b/drivers/mtd/maps/arctic-mtd.c
@@ -96,6 +96,8 @@
 static int __init
 init_arctic_mtd(void)
 {
+	int err = 0;
+
 	printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR);
 
 	arctic_mtd_map.virt = ioremap(PADDR, SIZE);
@@ -109,12 +111,20 @@
 	printk("%s: probing %d-bit flash bus\n", NAME, BUSWIDTH * 8);
 	arctic_mtd = do_map_probe("cfi_probe", &arctic_mtd_map);
 
-	if (!arctic_mtd)
+	if (!arctic_mtd) {
+		iounmap((void *) arctic_mtd_map.virt);
 		return -ENXIO;
+	}
 
 	arctic_mtd->owner = THIS_MODULE;
 
-	return add_mtd_partitions(arctic_mtd, arctic_partitions, PARTITIONS);
+	err = add_mtd_partitions(arctic_mtd, arctic_partitions, PARTITIONS);
+	if (err) {
+		printk("%s: add_mtd_partitions failed\n", NAME);
+		iounmap((void *) arctic_mtd_map.virt);
+	}
+
+	return err;
 }
 
 static void __exit
diff --git a/drivers/mtd/maps/beech-mtd.c b/drivers/mtd/maps/beech-mtd.c
index 5df7361..a64b1a5 100644
--- a/drivers/mtd/maps/beech-mtd.c
+++ b/drivers/mtd/maps/beech-mtd.c
@@ -72,6 +72,8 @@
 static int __init
 init_beech_mtd(void)
 {
+	int err = 0;
+
 	printk("%s: 0x%08x at 0x%08x\n", NAME, SIZE, PADDR);
 
 	beech_mtd_map.virt = ioremap(PADDR, SIZE);
@@ -86,12 +88,20 @@
 	printk("%s: probing %d-bit flash bus\n", NAME, BUSWIDTH * 8);
 	beech_mtd = do_map_probe("cfi_probe", &beech_mtd_map);
 
-	if (!beech_mtd)
+	if (!beech_mtd) {
+		iounmap((void *) beech_mtd_map.virt);
 		return -ENXIO;
+	}
 
 	beech_mtd->owner = THIS_MODULE;
 
-	return add_mtd_partitions(beech_mtd, beech_partitions, 2);
+	err = add_mtd_partitions(beech_mtd, beech_partitions, 2);
+	if (err) {
+		printk("%s: add_mtd_partitions failed\n", NAME);
+		iounmap((void *) beech_mtd_map.virt);
+	}
+
+	return err;
 }
 
 static void __exit
diff --git a/drivers/mtd/maps/cstm_mips_ixx.c b/drivers/mtd/maps/cstm_mips_ixx.c
index aa56def..d6bef10 100644
--- a/drivers/mtd/maps/cstm_mips_ixx.c
+++ b/drivers/mtd/maps/cstm_mips_ixx.c
@@ -171,7 +171,14 @@
 		cstm_mips_ixx_map[i].phys = cstm_mips_ixx_board_desc[i].window_addr;
 		cstm_mips_ixx_map[i].virt = ioremap(cstm_mips_ixx_board_desc[i].window_addr, cstm_mips_ixx_board_desc[i].window_size);
 		if (!cstm_mips_ixx_map[i].virt) {
+			int j = 0;
 			printk(KERN_WARNING "Failed to ioremap\n");
+			for (j = 0; j < i; j++) {
+				if (cstm_mips_ixx_map[j].virt) {
+					iounmap((void *)cstm_mips_ixx_map[j].virt);
+					cstm_mips_ixx_map[j].virt = 0;
+				}
+			}
 			return -EIO;
 	        }
 		cstm_mips_ixx_map[i].name = cstm_mips_ixx_board_desc[i].name;
@@ -204,8 +211,15 @@
 	                cstm_mips_ixx_map[i].map_priv_2 = (unsigned long)mymtd;
 		        add_mtd_partitions(mymtd, parts, cstm_mips_ixx_board_desc[i].num_partitions);
 		}
-		else
-	           return -ENXIO;
+		else {
+			for (i = 0; i < PHYSMAP_NUMBER; i++) {
+				if (cstm_mips_ixx_map[i].virt) {
+					iounmap((void *)cstm_mips_ixx_map[i].virt);
+					cstm_mips_ixx_map[i].virt = 0;
+				}
+			}
+			return -ENXIO;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/mtd/maps/ebony.c b/drivers/mtd/maps/ebony.c
index 641e1dd..1488bb9 100644
--- a/drivers/mtd/maps/ebony.c
+++ b/drivers/mtd/maps/ebony.c
@@ -108,6 +108,7 @@
 					ARRAY_SIZE(ebony_small_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ebony_small_map.virt);
 		return -ENXIO;
 	}
 
@@ -117,6 +118,7 @@
 
 	if (!ebony_large_map.virt) {
 		printk("Failed to ioremap flash\n");
+		iounmap(ebony_small_map.virt);
 		return -EIO;
 	}
 
@@ -129,6 +131,8 @@
 					ARRAY_SIZE(ebony_large_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ebony_small_map.virt);
+		iounmap(ebony_large_map.virt);
 		return -ENXIO;
 	}
 
diff --git a/drivers/mtd/maps/fortunet.c b/drivers/mtd/maps/fortunet.c
index c6bf4e1..7c50c27 100644
--- a/drivers/mtd/maps/fortunet.c
+++ b/drivers/mtd/maps/fortunet.c
@@ -218,8 +218,11 @@
 				map_regions[ix].map_info.size);
 			if(!map_regions[ix].map_info.virt)
 			{
+				int j = 0;
 				printk(MTD_FORTUNET_PK "%s flash failed to ioremap!\n",
 					map_regions[ix].map_info.name);
+				for (j = 0 ; j < ix; j++)
+					iounmap(map_regions[j].map_info.virt);
 				return -ENXIO;
 			}
 			simple_map_init(&map_regions[ix].map_info);
diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c
index db4b570..2bb3e63 100644
--- a/drivers/mtd/maps/ichxrom.c
+++ b/drivers/mtd/maps/ichxrom.c
@@ -61,6 +61,7 @@
 	/* Disable writes through the rom window */
 	pci_read_config_word(window->pdev, BIOS_CNTL, &word);
 	pci_write_config_word(window->pdev, BIOS_CNTL, word & ~1);
+	pci_dev_put(window->pdev);
 
 	/* Free all of the mtd devices */
 	list_for_each_entry_safe(map, scratch, &window->maps, list) {
@@ -355,7 +356,7 @@
 
 	pdev = NULL;
 	for (id = ichxrom_pci_tbl; id->vendor; id++) {
-		pdev = pci_find_device(id->vendor, id->device, NULL);
+		pdev = pci_get_device(id->vendor, id->device, NULL);
 		if (pdev) {
 			break;
 		}
diff --git a/drivers/mtd/maps/iq80310.c b/drivers/mtd/maps/iq80310.c
deleted file mode 100644
index 62d9e87..0000000
--- a/drivers/mtd/maps/iq80310.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * $Id: iq80310.c,v 1.21 2005/11/07 11:14:27 gleixner Exp $
- *
- * Mapping for the Intel XScale IQ80310 evaluation board
- *
- * Author:	Nicolas Pitre
- * Copyright:	(C) 2001 MontaVista Software Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-
-
-#define WINDOW_ADDR 	0
-#define WINDOW_SIZE 	8*1024*1024
-#define BUSWIDTH 	1
-
-static struct mtd_info *mymtd;
-
-static struct map_info iq80310_map = {
-	.name = "IQ80310 flash",
-	.size = WINDOW_SIZE,
-	.bankwidth = BUSWIDTH,
-	.phys = WINDOW_ADDR
-};
-
-static struct mtd_partition iq80310_partitions[4] = {
-	{
-		.name =		"Firmware",
-		.size =		0x00080000,
-		.offset =	0,
-		.mask_flags =	MTD_WRITEABLE  /* force read-only */
-	},{
-		.name =		"Kernel",
-		.size =		0x000a0000,
-		.offset =	0x00080000,
-	},{
-		.name =		"Filesystem",
-		.size =		0x00600000,
-		.offset =	0x00120000
-	},{
-		.name =		"RedBoot",
-		.size =		0x000e0000,
-		.offset =	0x00720000,
-		.mask_flags =	MTD_WRITEABLE
-	}
-};
-
-static struct mtd_info *mymtd;
-static struct mtd_partition *parsed_parts;
-static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
-
-static int __init init_iq80310(void)
-{
-	struct mtd_partition *parts;
-	int nb_parts = 0;
-	int parsed_nr_parts = 0;
-	int ret;
-
-	iq80310_map.virt = ioremap(WINDOW_ADDR, WINDOW_SIZE);
-	if (!iq80310_map.virt) {
-		printk("Failed to ioremap\n");
-		return -EIO;
-	}
-	simple_map_init(&iq80310_map);
-
-	mymtd = do_map_probe("cfi_probe", &iq80310_map);
-	if (!mymtd) {
-		iounmap((void *)iq80310_map.virt);
-		return -ENXIO;
-	}
-	mymtd->owner = THIS_MODULE;
-
-	ret = parse_mtd_partitions(mymtd, probes, &parsed_parts, 0);
-
-	if (ret > 0)
-		parsed_nr_parts = ret;
-
-	if (parsed_nr_parts > 0) {
-		parts = parsed_parts;
-		nb_parts = parsed_nr_parts;
-	} else {
-		parts = iq80310_partitions;
-		nb_parts = ARRAY_SIZE(iq80310_partitions);
-	}
-	add_mtd_partitions(mymtd, parts, nb_parts);
-	return 0;
-}
-
-static void __exit cleanup_iq80310(void)
-{
-	if (mymtd) {
-		del_mtd_partitions(mymtd);
-		map_destroy(mymtd);
-		kfree(parsed_parts);
-	}
-	if (iq80310_map.virt)
-		iounmap((void *)iq80310_map.virt);
-}
-
-module_init(init_iq80310);
-module_exit(cleanup_iq80310);
-
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
-MODULE_DESCRIPTION("MTD map driver for Intel XScale IQ80310 evaluation board");
diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
index 986c586..7a828e3 100644
--- a/drivers/mtd/maps/ixp4xx.c
+++ b/drivers/mtd/maps/ixp4xx.c
@@ -253,7 +253,7 @@
 	/* Use the fast version */
 	info->map.write = ixp4xx_write16,
 
-	err = parse_mtd_partitions(info->mtd, probes, &info->partitions, 0);
+	err = parse_mtd_partitions(info->mtd, probes, &info->partitions, dev->resource->start);
 	if (err > 0) {
 		err = add_mtd_partitions(info->mtd, info->partitions, err);
 		if(err)
diff --git a/drivers/mtd/maps/l440gx.c b/drivers/mtd/maps/l440gx.c
index 6b784ef..67620ad 100644
--- a/drivers/mtd/maps/l440gx.c
+++ b/drivers/mtd/maps/l440gx.c
@@ -61,14 +61,17 @@
 	struct resource *pm_iobase;
 	__u16 word;
 
-	dev = pci_find_device(PCI_VENDOR_ID_INTEL,
+	dev = pci_get_device(PCI_VENDOR_ID_INTEL,
 		PCI_DEVICE_ID_INTEL_82371AB_0, NULL);
 
-	pm_dev = pci_find_device(PCI_VENDOR_ID_INTEL,
+	pm_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
 		PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
 
+	pci_dev_put(dev);
+
 	if (!dev || !pm_dev) {
 		printk(KERN_NOTICE "L440GX flash mapping: failed to find PIIX4 ISA bridge, cannot continue\n");
+		pci_dev_put(pm_dev);
 		return -ENODEV;
 	}
 
@@ -76,6 +79,7 @@
 
 	if (!l440gx_map.virt) {
 		printk(KERN_WARNING "Failed to ioremap L440GX flash region\n");
+		pci_dev_put(pm_dev);
 		return -ENOMEM;
 	}
 	simple_map_init(&l440gx_map);
@@ -99,8 +103,12 @@
 		pm_iobase->start += iobase & ~1;
 		pm_iobase->end += iobase & ~1;
 
+		pci_dev_put(pm_dev);
+
 		/* Allocate the resource region */
 		if (pci_assign_resource(pm_dev, PIIXE_IOBASE_RESOURCE) != 0) {
+			pci_dev_put(dev);
+			pci_dev_put(pm_dev);
 			printk(KERN_WARNING "Could not allocate pm iobase resource\n");
 			iounmap(l440gx_map.virt);
 			return -ENXIO;
diff --git a/drivers/mtd/maps/lasat.c b/drivers/mtd/maps/lasat.c
index 1c13d2d..e343763 100644
--- a/drivers/mtd/maps/lasat.c
+++ b/drivers/mtd/maps/lasat.c
@@ -79,6 +79,7 @@
 		return 0;
 	}
 
+	iounmap(lasat_map.virt);
 	return -ENXIO;
 }
 
@@ -89,6 +90,7 @@
 		map_destroy(lasat_mtd);
 	}
 	if (lasat_map.virt) {
+		iounmap(lasat_map.virt);
 		lasat_map.virt = 0;
 	}
 }
diff --git a/drivers/mtd/maps/nettel.c b/drivers/mtd/maps/nettel.c
index 0994b5b..198e840 100644
--- a/drivers/mtd/maps/nettel.c
+++ b/drivers/mtd/maps/nettel.c
@@ -277,6 +277,7 @@
 	nettel_amd_map.virt = ioremap_nocache(amdaddr, maxsize);
 	if (!nettel_amd_map.virt) {
 		printk("SNAPGEAR: failed to ioremap() BOOTCS\n");
+		iounmap(nettel_mmcrp);
 		return(-EIO);
 	}
 	simple_map_init(&nettel_amd_map);
@@ -337,7 +338,8 @@
 		nettel_amd_map.virt = NULL;
 #else
 		/* Only AMD flash supported */
-		return(-ENXIO);
+		rc = -ENXIO;
+		goto out_unmap2;
 #endif
 	}
 
@@ -361,14 +363,15 @@
 	nettel_intel_map.virt = ioremap_nocache(intel0addr, maxsize);
 	if (!nettel_intel_map.virt) {
 		printk("SNAPGEAR: failed to ioremap() ROMCS1\n");
-		return(-EIO);
+		rc = -EIO;
+		goto out_unmap2;
 	}
 	simple_map_init(&nettel_intel_map);
 
 	intel_mtd = do_map_probe("cfi_probe", &nettel_intel_map);
 	if (!intel_mtd) {
-		iounmap(nettel_intel_map.virt);
-		return(-ENXIO);
+		rc = -ENXIO;
+		goto out_unmap1;
 	}
 
 	/* Set PAR to the detected size */
@@ -394,13 +397,14 @@
 	nettel_intel_map.virt = ioremap_nocache(intel0addr, maxsize);
 	if (!nettel_intel_map.virt) {
 		printk("SNAPGEAR: failed to ioremap() ROMCS1/2\n");
-		return(-EIO);
+		rc = -EIO;
+		goto out_unmap2;
 	}
 
 	intel_mtd = do_map_probe("cfi_probe", &nettel_intel_map);
 	if (! intel_mtd) {
-		iounmap((void *) nettel_intel_map.virt);
-		return(-ENXIO);
+		rc = -ENXIO;
+		goto out_unmap1;
 	}
 
 	intel1size = intel_mtd->size - intel0size;
@@ -456,6 +460,18 @@
 #endif
 
 	return(rc);
+
+#ifdef CONFIG_MTD_CFI_INTELEXT
+out_unmap1:
+	iounmap((void *) nettel_intel_map.virt);
+#endif
+
+out_unmap2:
+	iounmap(nettel_mmcrp);
+	iounmap(nettel_amd_map.virt);
+
+	return(rc);
+		
 }
 
 /****************************************************************************/
@@ -469,6 +485,10 @@
 		del_mtd_partitions(amd_mtd);
 		map_destroy(amd_mtd);
 	}
+	if (nettel_mmcrp) {
+		iounmap(nettel_mmcrp);
+		nettel_mmcrp = NULL;
+	}
 	if (nettel_amd_map.virt) {
 		iounmap(nettel_amd_map.virt);
 		nettel_amd_map.virt = NULL;
diff --git a/drivers/mtd/maps/ocotea.c b/drivers/mtd/maps/ocotea.c
index 2f07602..5522eac 100644
--- a/drivers/mtd/maps/ocotea.c
+++ b/drivers/mtd/maps/ocotea.c
@@ -97,6 +97,7 @@
 					ARRAY_SIZE(ocotea_small_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ocotea_small_map.virt);
 		return -ENXIO;
 	}
 
@@ -106,6 +107,7 @@
 
 	if (!ocotea_large_map.virt) {
 		printk("Failed to ioremap flash\n");
+		iounmap(ocotea_small_map.virt);
 		return -EIO;
 	}
 
@@ -118,6 +120,8 @@
 					ARRAY_SIZE(ocotea_large_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(ocotea_small_map.virt);
+		iounmap(ocotea_large_map.virt);
 		return -ENXIO;
 	}
 
diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c
index c861134..995347b 100644
--- a/drivers/mtd/maps/pcmciamtd.c
+++ b/drivers/mtd/maps/pcmciamtd.c
@@ -602,6 +602,10 @@
 	ret = pcmcia_request_configuration(link, &link->conf);
 	if(ret != CS_SUCCESS) {
 		cs_error(link, RequestConfiguration, ret);
+		if (dev->win_base) {
+			iounmap(dev->win_base);
+			dev->win_base = NULL;
+		}
 		return -ENODEV;
 	}
 
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 7799a25..bc7cc71 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -158,9 +158,42 @@
 	return err;
 }
 
+#ifdef CONFIG_PM
+static int physmap_flash_suspend(struct platform_device *dev, pm_message_t state)
+{
+	struct physmap_flash_info *info = platform_get_drvdata(dev);
+	int ret = 0;
+
+	if (info)
+		ret = info->mtd->suspend(info->mtd);
+
+	return ret;
+}
+
+static int physmap_flash_resume(struct platform_device *dev)
+{
+	struct physmap_flash_info *info = platform_get_drvdata(dev);
+	if (info)
+		info->mtd->resume(info->mtd);
+	return 0;
+}
+
+static void physmap_flash_shutdown(struct platform_device *dev)
+{
+	struct physmap_flash_info *info = platform_get_drvdata(dev);
+	if (info && info->mtd->suspend(info->mtd) == 0)
+		info->mtd->resume(info->mtd);
+}
+#endif
+
 static struct platform_driver physmap_flash_driver = {
 	.probe		= physmap_flash_probe,
 	.remove		= physmap_flash_remove,
+#ifdef CONFIG_PM
+	.suspend	= physmap_flash_suspend,
+	.resume		= physmap_flash_resume,
+	.shutdown	= physmap_flash_shutdown,
+#endif
 	.driver		= {
 		.name	= "physmap-flash",
 	},
diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
index ec8fdae..2257d2b 100644
--- a/drivers/mtd/maps/redwood.c
+++ b/drivers/mtd/maps/redwood.c
@@ -126,6 +126,8 @@
 
 int __init init_redwood_flash(void)
 {
+	int err = 0;
+
 	printk(KERN_NOTICE "redwood: flash mapping: %x at %x\n",
 			WINDOW_SIZE, WINDOW_ADDR);
 
@@ -141,11 +143,18 @@
 
 	if (redwood_mtd) {
 		redwood_mtd->owner = THIS_MODULE;
-		return add_mtd_partitions(redwood_mtd,
+		err = add_mtd_partitions(redwood_mtd,
 				redwood_flash_partitions,
 				NUM_REDWOOD_FLASH_PARTITIONS);
+		if (err) {
+			printk("init_redwood_flash: add_mtd_partitions failed\n");
+			iounmap(redwood_flash_map.virt);
+		}
+		return err;
+
 	}
 
+	iounmap(redwood_flash_map.virt);
 	return -ENXIO;
 }
 
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
index 7d0fcf8..b8c1331 100644
--- a/drivers/mtd/maps/sbc8240.c
+++ b/drivers/mtd/maps/sbc8240.c
@@ -156,7 +156,7 @@
 	};
 
 	int devicesfound = 0;
-	int i;
+	int i,j;
 
 	for (i = 0; i < NUM_FLASH_BANKS; i++) {
 		printk (KERN_NOTICE MSG_PREFIX
@@ -166,6 +166,10 @@
 			(unsigned long) ioremap (pt[i].addr, pt[i].size);
 		if (!sbc8240_map[i].map_priv_1) {
 			printk (MSG_PREFIX "failed to ioremap\n");
+			for (j = 0; j < i; j++) {
+				iounmap((void *) sbc8240_map[j].map_priv_1);
+				sbc8240_map[j].map_priv_1 = 0;
+			}
 			return -EIO;
 		}
 		simple_map_init(&sbc8240_mtd[i]);
@@ -175,6 +179,11 @@
 		if (sbc8240_mtd[i]) {
 			sbc8240_mtd[i]->module = THIS_MODULE;
 			devicesfound++;
+		} else {
+			if (sbc8240_map[i].map_priv_1) {
+				iounmap((void *) sbc8240_map[i].map_priv_1);
+				sbc8240_map[i].map_priv_1 = 0;
+			}
 		}
 	}
 
diff --git a/drivers/mtd/maps/scx200_docflash.c b/drivers/mtd/maps/scx200_docflash.c
index 7391fd5..5e2bce2 100644
--- a/drivers/mtd/maps/scx200_docflash.c
+++ b/drivers/mtd/maps/scx200_docflash.c
@@ -87,19 +87,23 @@
 
 	printk(KERN_DEBUG NAME ": NatSemi SCx200 DOCCS Flash Driver\n");
 
-	if ((bridge = pci_find_device(PCI_VENDOR_ID_NS,
+	if ((bridge = pci_get_device(PCI_VENDOR_ID_NS,
 				      PCI_DEVICE_ID_NS_SCx200_BRIDGE,
 				      NULL)) == NULL)
 		return -ENODEV;
 
 	/* check that we have found the configuration block */
-	if (!scx200_cb_present())
+	if (!scx200_cb_present()) {
+		pci_dev_put(bridge);
 		return -ENODEV;
+	}
 
 	if (probe) {
 		/* Try to use the present flash mapping if any */
 		pci_read_config_dword(bridge, SCx200_DOCCS_BASE, &base);
 		pci_read_config_dword(bridge, SCx200_DOCCS_CTRL, &ctrl);
+		pci_dev_put(bridge);
+
 		pmr = inl(scx200_cb_base + SCx200_PMR);
 
 		if (base == 0
@@ -127,6 +131,7 @@
 			return -ENOMEM;
 		}
 	} else {
+		pci_dev_put(bridge);
 		for (u = size; u > 1; u >>= 1)
 			;
 		if (u != 1) {
diff --git a/drivers/mtd/maps/walnut.c b/drivers/mtd/maps/walnut.c
index ec80eec..ca93212 100644
--- a/drivers/mtd/maps/walnut.c
+++ b/drivers/mtd/maps/walnut.c
@@ -68,6 +68,7 @@
 
 	if (WALNUT_FLASH_ONBD_N(fpga_brds1)) {
 		printk("The on-board flash is disabled (U79 sw 5)!");
+		iounmap(fpga_status_adr);
 		return -EIO;
 	}
 	if (WALNUT_FLASH_SRAM_SEL(fpga_brds1))
@@ -81,6 +82,7 @@
 
 	if (!walnut_map.virt) {
 		printk("Failed to ioremap flash.\n");
+		iounmap(fpga_status_adr);
 		return -EIO;
 	}
 
@@ -93,9 +95,11 @@
 					ARRAY_SIZE(walnut_partitions));
 	} else {
 		printk("map probe failed for flash\n");
+		iounmap(fpga_status_adr);
 		return -ENXIO;
 	}
 
+	iounmap(fpga_status_adr);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index fb8b4f7..5b6acfc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -62,15 +62,12 @@
 	struct mtd_info *mtd = mfi->mtd;
 
 	switch (orig) {
-	case 0:
-		/* SEEK_SET */
+	case SEEK_SET:
 		break;
-	case 1:
-		/* SEEK_CUR */
+	case SEEK_CUR:
 		offset += file->f_pos;
 		break;
-	case 2:
-		/* SEEK_END */
+	case SEEK_END:
 		offset += mtd->size;
 		break;
 	default:
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 168d3ba..c4d26de 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -57,6 +57,16 @@
 			mtd->index = i;
 			mtd->usecount = 0;
 
+			/* Some chips always power up locked. Unlock them now */
+			if ((mtd->flags & MTD_WRITEABLE)
+			    && (mtd->flags & MTD_STUPID_LOCK) && mtd->unlock) {
+				if (mtd->unlock(mtd, 0, mtd->size))
+					printk(KERN_WARNING
+					       "%s: unlock failed, "
+					       "writes may not work\n",
+					       mtd->name);
+			}
+
 			DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
 			/* No need to get a refcount on the module containing
 			   the notifier, since we hold the mtd_table_mutex */
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 3db77ee..c99302e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -11,7 +11,7 @@
 	help
 	  This enables support for accessing all type of NAND flash
 	  devices. For further information see
-	  <http://www.linux-mtd.infradead.org/tech/nand.html>.
+	  <http://www.linux-mtd.infradead.org/doc/nand.html>.
 
 config MTD_NAND_VERIFY_WRITE
 	bool "Verify NAND page writes"
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 3122833..09e421a 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -21,18 +21,7 @@
 #include <linux/version.h>
 #include <asm/io.h>
 
-/* fixme: this is ugly */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 0)
 #include <asm/mach-au1x00/au1xxx.h>
-#else
-#include <asm/au1000.h>
-#ifdef CONFIG_MIPS_PB1550
-#include <asm/pb1550.h>
-#endif
-#ifdef CONFIG_MIPS_DB1550
-#include <asm/db1x00.h>
-#endif
-#endif
 
 /*
  * MTD structure for NAND controller
diff --git a/drivers/mtd/nand/edb7312.c b/drivers/mtd/nand/edb7312.c
index 516c0e5..12017f3 100644
--- a/drivers/mtd/nand/edb7312.c
+++ b/drivers/mtd/nand/edb7312.c
@@ -198,6 +198,9 @@
 	/* Release resources, unregister device */
 	nand_release(ap7312_mtd);
 
+	/* Release io resource */
+	iounmap((void *)this->IO_ADDR_R);
+
 	/* Free the MTD device structure */
 	kfree(ep7312_mtd);
 }
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 0a54d00..975b2ef 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2224,7 +2224,7 @@
 	}
 
 	/* Try to identify manufacturer */
-	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_id++) {
+	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
 		if (nand_manuf_ids[maf_idx].id == *maf_id)
 			break;
 	}
diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index e5bd88f..039c759 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@ -168,7 +168,7 @@
 	chip->ecc.mode = NAND_ECC_HW;
 	chip->ecc.size = 256;
 	chip->ecc.bytes = 3;
-	chip->ecclayout = mtd->pl_chip->ecclayout;
+	chip->ecclayout = chip->ecc.layout = mtd->pl_chip->ecclayout;
 	mtd->mtd.priv = chip;
 	mtd->mtd.owner = THIS_MODULE;
 }
diff --git a/drivers/mtd/nand/ppchameleonevb.c b/drivers/mtd/nand/ppchameleonevb.c
index 22fa65c..eb7d4d4 100644
--- a/drivers/mtd/nand/ppchameleonevb.c
+++ b/drivers/mtd/nand/ppchameleonevb.c
@@ -276,6 +276,7 @@
 	/* Scan to find existence of the device (it could not be mounted) */
 	if (nand_scan(ppchameleon_mtd, 1)) {
 		iounmap((void *)ppchameleon_fio_base);
+		ppchameleon_fio_base = NULL;
 		kfree(ppchameleon_mtd);
 		goto nand_evb_init;
 	}
@@ -314,6 +315,8 @@
 	ppchameleonevb_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
 	if (!ppchameleonevb_mtd) {
 		printk("Unable to allocate PPChameleonEVB NAND MTD device structure.\n");
+		if (ppchameleon_fio_base)
+			iounmap(ppchameleon_fio_base);
 		return -ENOMEM;
 	}
 
@@ -322,6 +325,8 @@
 	if (!ppchameleonevb_fio_base) {
 		printk("ioremap PPChameleonEVB NAND flash failed\n");
 		kfree(ppchameleonevb_mtd);
+		if (ppchameleon_fio_base)
+			iounmap(ppchameleon_fio_base);
 		return -EIO;
 	}
 
@@ -378,6 +383,8 @@
 	if (nand_scan(ppchameleonevb_mtd, 1)) {
 		iounmap((void *)ppchameleonevb_fio_base);
 		kfree(ppchameleonevb_mtd);
+		if (ppchameleon_fio_base)
+			iounmap(ppchameleon_fio_base);
 		return -ENXIO;
 	}
 #ifdef CONFIG_MTD_PARTITIONS
diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index fbeedc3..51c7288 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c
@@ -78,7 +78,7 @@
 /*
  *	hardware specific access to control-lines
  *	ctrl:
- *	NAND_CNE: bit 0 -> bit 0 & 4
+ *	NAND_CNE: bit 0 -> ! bit 0 & 4
  *	NAND_CLE: bit 1 -> bit 1
  *	NAND_ALE: bit 2 -> bit 2
  *
@@ -92,7 +92,10 @@
 		unsigned char bits = ctrl & 0x07;
 
 		bits |= (ctrl & 0x01) << 4;
-		writeb((readb(FLASHCTL) & 0x17) | bits, FLASHCTL);
+
+		bits ^= 0x11;
+
+		writeb((readb(FLASHCTL) & ~0x17) | bits, FLASHCTL);
 	}
 
 	if (cmd != NAND_CMD_NONE)
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
new file mode 100644
index 0000000..79d3bb6
--- /dev/null
+++ b/drivers/mtd/ssfdc.c
@@ -0,0 +1,474 @@
+/*
+ * Linux driver for SSFDC Flash Translation Layer (Read only)
+ * (c) 2005 Eptar srl
+ * Author: Claudio Lanconelli <lanconelli.claudio@eptar.com>
+ *
+ * Based on NTFL and MTDBLOCK_RO drivers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/hdreg.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/blktrans.h>
+
+struct ssfdcr_record {
+	struct mtd_blktrans_dev mbd;
+	int usecount;
+	unsigned char heads;
+	unsigned char sectors;
+	unsigned short cylinders;
+	int cis_block;			/* block n. containing CIS/IDI */
+	int erase_size;			/* phys_block_size */
+	unsigned short *logic_block_map; /* all zones (max 8192 phys blocks on
+					    the 128MiB) */
+	int map_len;			/* n. phys_blocks on the card */
+};
+
+#define SSFDCR_MAJOR		257
+#define SSFDCR_PARTN_BITS	3
+
+#define SECTOR_SIZE		512
+#define SECTOR_SHIFT		9
+#define OOB_SIZE		16
+
+#define MAX_LOGIC_BLK_PER_ZONE	1000
+#define MAX_PHYS_BLK_PER_ZONE	1024
+
+#define KiB(x)	( (x) * 1024L )
+#define MiB(x)	( KiB(x) * 1024L )
+
+/** CHS Table
+		1MiB	2MiB	4MiB	8MiB	16MiB	32MiB	64MiB	128MiB
+NCylinder	125	125	250	250	500	500	500	500
+NHead		4	4	4	4	4	8	8	16
+NSector		4	8	8	16	16	16	32	32
+SumSector	2,000	4,000	8,000	16,000	32,000	64,000	128,000	256,000
+SectorSize	512	512	512	512	512	512	512	512
+**/
+
+typedef struct {
+	unsigned long size;
+	unsigned short cyl;
+	unsigned char head;
+	unsigned char sec;
+} chs_entry_t;
+
+/* Must be ordered by size */
+static const chs_entry_t chs_table[] = {
+	{ MiB(  1), 125,  4,  4 },
+	{ MiB(  2), 125,  4,  8 },
+	{ MiB(  4), 250,  4,  8 },
+	{ MiB(  8), 250,  4, 16 },
+	{ MiB( 16), 500,  4, 16 },
+	{ MiB( 32), 500,  8, 16 },
+	{ MiB( 64), 500,  8, 32 },
+	{ MiB(128), 500, 16, 32 },
+	{ 0 },
+};
+
+static int get_chs(unsigned long size, unsigned short *cyl, unsigned char *head,
+			unsigned char *sec)
+{
+	int k;
+	int found = 0;
+
+	k = 0;
+	while (chs_table[k].size > 0 && size > chs_table[k].size)
+		k++;
+
+	if (chs_table[k].size > 0) {
+		if (cyl)
+			*cyl = chs_table[k].cyl;
+		if (head)
+			*head = chs_table[k].head;
+		if (sec)
+			*sec = chs_table[k].sec;
+		found = 1;
+	}
+
+	return found;
+}
+
+/* These bytes are the signature for the CIS/IDI sector */
+static const uint8_t cis_numbers[] = {
+	0x01, 0x03, 0xD9, 0x01, 0xFF, 0x18, 0x02, 0xDF, 0x01, 0x20
+};
+
+/* Read and check for a valid CIS sector */
+static int get_valid_cis_sector(struct mtd_info *mtd)
+{
+	int ret, k, cis_sector;
+	size_t retlen;
+	loff_t offset;
+	uint8_t *sect_buf;
+
+	cis_sector = -1;
+
+	sect_buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (!sect_buf)
+		goto out;
+
+	/*
+	 * Look for CIS/IDI sector on the first GOOD block (give up after 4 bad
+	 * blocks). If the first good block doesn't contain CIS number the flash
+	 * is not SSFDC formatted
+	 */
+	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
+		if (!mtd->block_isbad(mtd, offset)) {
+			ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen,
+				sect_buf);
+
+			/* CIS pattern match on the sector buffer */
+			if (ret < 0 || retlen != SECTOR_SIZE) {
+				printk(KERN_WARNING
+					"SSFDC_RO:can't read CIS/IDI sector\n");
+			} else if (!memcmp(sect_buf, cis_numbers,
+					sizeof(cis_numbers))) {
+				/* Found */
+				cis_sector = (int)(offset >> SECTOR_SHIFT);
+			} else {
+				DEBUG(MTD_DEBUG_LEVEL1,
+					"SSFDC_RO: CIS/IDI sector not found"
+					" on %s (mtd%d)\n", mtd->name,
+					mtd->index);
+			}
+			break;
+		}
+	}
+
+	kfree(sect_buf);
+ out:
+	return cis_sector;
+}
+
+/* Read physical sector (wrapper to MTD_READ) */
+static int read_physical_sector(struct mtd_info *mtd, uint8_t *sect_buf,
+				int sect_no)
+{
+	int ret;
+	size_t retlen;
+	loff_t offset = (loff_t)sect_no << SECTOR_SHIFT;
+
+	ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf);
+	if (ret < 0 || retlen != SECTOR_SIZE)
+		return -1;
+
+	return 0;
+}
+
+/* Read redundancy area (wrapper to MTD_READ_OOB */
+static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf)
+{
+	struct mtd_oob_ops ops;
+	int ret;
+
+	ops.mode = MTD_OOB_RAW;
+	ops.ooboffs = 0;
+	ops.ooblen = mtd->oobsize;
+	ops.len = OOB_SIZE;
+	ops.oobbuf = buf;
+	ops.datbuf = NULL;
+
+	ret = mtd->read_oob(mtd, offs, &ops);
+	if (ret < 0 || ops.retlen != OOB_SIZE)
+		return -1;
+
+	return 0;
+}
+
+/* Parity calculator on a word of n bit size */
+static int get_parity(int number, int size)
+{
+ 	int k;
+	int parity;
+
+	parity = 1;
+	for (k = 0; k < size; k++) {
+		parity += (number >> k);
+		parity &= 1;
+	}
+	return parity;
+}
+
+/* Read and validate the logical block address field stored in the OOB */
+static int get_logical_address(uint8_t *oob_buf)
+{
+	int block_address, parity;
+	int offset[2] = {6, 11}; /* offset of the 2 address fields within OOB */
+	int j;
+	int ok = 0;
+
+	/*
+	 * Look for the first valid logical address
+	 * Valid address has fixed pattern on most significant bits and
+	 * parity check
+	 */
+	for (j = 0; j < ARRAY_SIZE(offset); j++) {
+		block_address = ((int)oob_buf[offset[j]] << 8) |
+			oob_buf[offset[j]+1];
+
+		/* Check for the signature bits in the address field (MSBits) */
+		if ((block_address & ~0x7FF) == 0x1000) {
+			parity = block_address & 0x01;
+			block_address &= 0x7FF;
+			block_address >>= 1;
+
+			if (get_parity(block_address, 10) != parity) {
+				DEBUG(MTD_DEBUG_LEVEL0,
+					"SSFDC_RO: logical address field%d"
+					"parity error(0x%04X)\n", j+1,
+					block_address);
+			} else {
+				ok = 1;
+				break;
+			}
+		}
+	}
+
+	if (!ok)
+		block_address = -2;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "SSFDC_RO: get_logical_address() %d\n",
+		block_address);
+
+	return block_address;
+}
+
+/* Build the logic block map */
+static int build_logical_block_map(struct ssfdcr_record *ssfdc)
+{
+	unsigned long offset;
+	uint8_t oob_buf[OOB_SIZE];
+	int ret, block_address, phys_block;
+	struct mtd_info *mtd = ssfdc->mbd.mtd;
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: build_block_map() nblks=%d (%luK)\n",
+	      ssfdc->map_len,
+	      (unsigned long)ssfdc->map_len * ssfdc->erase_size / 1024);
+
+	/* Scan every physical block, skip CIS block */
+	for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len;
+			phys_block++) {
+		offset = (unsigned long)phys_block * ssfdc->erase_size;
+		if (mtd->block_isbad(mtd, offset))
+			continue;	/* skip bad blocks */
+
+		ret = read_raw_oob(mtd, offset, oob_buf);
+		if (ret < 0) {
+			DEBUG(MTD_DEBUG_LEVEL0,
+				"SSFDC_RO: mtd read_oob() failed at %lu\n",
+				offset);
+			return -1;
+		}
+		block_address = get_logical_address(oob_buf);
+
+		/* Skip invalid addresses */
+		if (block_address >= 0 &&
+				block_address < MAX_LOGIC_BLK_PER_ZONE) {
+			int zone_index;
+
+			zone_index = phys_block / MAX_PHYS_BLK_PER_ZONE;
+			block_address += zone_index * MAX_LOGIC_BLK_PER_ZONE;
+			ssfdc->logic_block_map[block_address] =
+				(unsigned short)phys_block;
+
+			DEBUG(MTD_DEBUG_LEVEL2,
+				"SSFDC_RO: build_block_map() phys_block=%d,"
+				"logic_block_addr=%d, zone=%d\n",
+				phys_block, block_address, zone_index);
+		}
+	}
+	return 0;
+}
+
+static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
+{
+	struct ssfdcr_record *ssfdc;
+	int cis_sector;
+
+	/* Check for small page NAND flash */
+	if (mtd->type != MTD_NANDFLASH || mtd->oobsize != OOB_SIZE)
+		return;
+
+	/* Check for SSDFC format by reading CIS/IDI sector */
+	cis_sector = get_valid_cis_sector(mtd);
+	if (cis_sector == -1)
+		return;
+
+	ssfdc = kzalloc(sizeof(struct ssfdcr_record), GFP_KERNEL);
+	if (!ssfdc) {
+		printk(KERN_WARNING
+			"SSFDC_RO: out of memory for data structures\n");
+		return;
+	}
+
+	ssfdc->mbd.mtd = mtd;
+	ssfdc->mbd.devnum = -1;
+	ssfdc->mbd.blksize = SECTOR_SIZE;
+	ssfdc->mbd.tr = tr;
+	ssfdc->mbd.readonly = 1;
+
+	ssfdc->cis_block = cis_sector / (mtd->erasesize >> SECTOR_SHIFT);
+	ssfdc->erase_size = mtd->erasesize;
+	ssfdc->map_len = mtd->size / mtd->erasesize;
+
+	DEBUG(MTD_DEBUG_LEVEL1,
+		"SSFDC_RO: cis_block=%d,erase_size=%d,map_len=%d,n_zones=%d\n",
+		ssfdc->cis_block, ssfdc->erase_size, ssfdc->map_len,
+		(ssfdc->map_len + MAX_PHYS_BLK_PER_ZONE - 1) /
+		MAX_PHYS_BLK_PER_ZONE);
+
+	/* Set geometry */
+	ssfdc->heads = 16;
+	ssfdc->sectors = 32;
+	get_chs(mtd->size, NULL, &ssfdc->heads, &ssfdc->sectors);
+	ssfdc->cylinders = (unsigned short)((mtd->size >> SECTOR_SHIFT) /
+			((long)ssfdc->sectors * (long)ssfdc->heads));
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: using C:%d H:%d S:%d == %ld sects\n",
+		ssfdc->cylinders, ssfdc->heads , ssfdc->sectors,
+		(long)ssfdc->cylinders * (long)ssfdc->heads *
+		(long)ssfdc->sectors);
+
+	ssfdc->mbd.size = (long)ssfdc->heads * (long)ssfdc->cylinders *
+				(long)ssfdc->sectors;
+
+	/* Allocate logical block map */
+	ssfdc->logic_block_map = kmalloc(sizeof(ssfdc->logic_block_map[0]) *
+					 ssfdc->map_len, GFP_KERNEL);
+	if (!ssfdc->logic_block_map) {
+		printk(KERN_WARNING
+			"SSFDC_RO: out of memory for data structures\n");
+		goto out_err;
+	}
+	memset(ssfdc->logic_block_map, 0xff, sizeof(ssfdc->logic_block_map[0]) *
+		ssfdc->map_len);
+
+	/* Build logical block map */
+	if (build_logical_block_map(ssfdc) < 0)
+		goto out_err;
+
+	/* Register device + partitions */
+	if (add_mtd_blktrans_dev(&ssfdc->mbd))
+		goto out_err;
+
+	printk(KERN_INFO "SSFDC_RO: Found ssfdc%c on mtd%d (%s)\n",
+		ssfdc->mbd.devnum + 'a', mtd->index, mtd->name);
+	return;
+
+out_err:
+	kfree(ssfdc->logic_block_map);
+        kfree(ssfdc);
+}
+
+static void ssfdcr_remove_dev(struct mtd_blktrans_dev *dev)
+{
+	struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev;
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: remove_dev (i=%d)\n", dev->devnum);
+
+	del_mtd_blktrans_dev(dev);
+	kfree(ssfdc->logic_block_map);
+	kfree(ssfdc);
+}
+
+static int ssfdcr_readsect(struct mtd_blktrans_dev *dev,
+				unsigned long logic_sect_no, char *buf)
+{
+	struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev;
+	int sectors_per_block, offset, block_address;
+
+	sectors_per_block = ssfdc->erase_size >> SECTOR_SHIFT;
+	offset = (int)(logic_sect_no % sectors_per_block);
+	block_address = (int)(logic_sect_no / sectors_per_block);
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+		"SSFDC_RO: ssfdcr_readsect(%lu) sec_per_blk=%d, ofst=%d,"
+		" block_addr=%d\n", logic_sect_no, sectors_per_block, offset,
+		block_address);
+
+	if (block_address >= ssfdc->map_len)
+		BUG();
+
+	block_address = ssfdc->logic_block_map[block_address];
+
+	DEBUG(MTD_DEBUG_LEVEL3,
+		"SSFDC_RO: ssfdcr_readsect() phys_block_addr=%d\n",
+		block_address);
+
+	if (block_address < 0xffff) {
+		unsigned long sect_no;
+
+		sect_no = (unsigned long)block_address * sectors_per_block +
+				offset;
+
+		DEBUG(MTD_DEBUG_LEVEL3,
+			"SSFDC_RO: ssfdcr_readsect() phys_sect_no=%lu\n",
+			sect_no);
+
+		if (read_physical_sector(ssfdc->mbd.mtd, buf, sect_no) < 0)
+			return -EIO;
+	} else {
+		memset(buf, 0xff, SECTOR_SIZE);
+	}
+
+	return 0;
+}
+
+static int ssfdcr_getgeo(struct mtd_blktrans_dev *dev,  struct hd_geometry *geo)
+{
+	struct ssfdcr_record *ssfdc = (struct ssfdcr_record *)dev;
+
+	DEBUG(MTD_DEBUG_LEVEL1, "SSFDC_RO: ssfdcr_getgeo() C=%d, H=%d, S=%d\n",
+			ssfdc->cylinders, ssfdc->heads, ssfdc->sectors);
+
+	geo->heads = ssfdc->heads;
+	geo->sectors = ssfdc->sectors;
+	geo->cylinders = ssfdc->cylinders;
+
+	return 0;
+}
+
+/****************************************************************************
+ *
+ * Module stuff
+ *
+ ****************************************************************************/
+
+static struct mtd_blktrans_ops ssfdcr_tr = {
+	.name		= "ssfdc",
+	.major		= SSFDCR_MAJOR,
+	.part_bits	= SSFDCR_PARTN_BITS,
+	.getgeo		= ssfdcr_getgeo,
+	.readsect	= ssfdcr_readsect,
+	.add_mtd	= ssfdcr_add_mtd,
+	.remove_dev	= ssfdcr_remove_dev,
+	.owner		= THIS_MODULE,
+};
+
+static int __init init_ssfdcr(void)
+{
+	printk(KERN_INFO "SSFDC read-only Flash Translation layer\n");
+
+	return register_mtd_blktrans(&ssfdcr_tr);
+}
+
+static void __exit cleanup_ssfdcr(void)
+{
+	deregister_mtd_blktrans(&ssfdcr_tr);
+}
+
+module_init(init_ssfdcr);
+module_exit(cleanup_ssfdcr);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
+MODULE_DESCRIPTION("Flash Translation Layer for read-only SSFDC SmartMedia card");
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 80e8ca0..29dede2 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -2077,7 +2077,7 @@
 
 	vp->tx_ring[entry].next = 0;
 #if DO_ZEROCOPY
-	if (skb->ip_summed != CHECKSUM_HW)
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
 	else
 			vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum);
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 1428bb7..a48b211 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -813,7 +813,7 @@
 
 		if (mss)
 			flags |= LargeSend | ((mss & MSSMask) << MSSShift);
-		else if (skb->ip_summed == CHECKSUM_HW) {
+		else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			const struct iphdr *ip = skb->nh.iph;
 			if (ip->protocol == IPPROTO_TCP)
 				flags |= IPCS | TCPCS;
@@ -867,7 +867,7 @@
 			if (mss)
 				ctrl |= LargeSend |
 					((mss & MSSMask) << MSSShift);
-			else if (skb->ip_summed == CHECKSUM_HW) {
+			else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				if (ip->protocol == IPPROTO_TCP)
 					ctrl |= IPCS | TCPCS;
 				else if (ip->protocol == IPPROTO_UDP)
@@ -898,7 +898,7 @@
 		txd->addr = cpu_to_le64(first_mapping);
 		wmb();
 
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			if (ip->protocol == IPPROTO_TCP)
 				txd->opts1 = cpu_to_le32(first_eor | first_len |
 							 FirstFrag | DescOwn |
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 1c01e9b..8265486 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2040,7 +2040,7 @@
 		 */
 		if (bd_flags & BD_FLG_TCP_UDP_SUM) {
 			skb->csum = htons(csum);
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else {
 			skb->ip_summed = CHECKSUM_NONE;
 		}
@@ -2511,7 +2511,7 @@
 
 		mapping = ace_map_tx_skb(ap, skb, skb, idx);
 		flagsize = (skb->len << 16) | (BD_FLG_END);
-		if (skb->ip_summed == CHECKSUM_HW)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
 #if ACENIC_DO_VLAN
 		if (vlan_tx_tag_present(skb)) {
@@ -2534,7 +2534,7 @@
 
 		mapping = ace_map_tx_skb(ap, skb, NULL, idx);
 		flagsize = (skb_headlen(skb) << 16);
-		if (skb->ip_summed == CHECKSUM_HW)
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
 #if ACENIC_DO_VLAN
 		if (vlan_tx_tag_present(skb)) {
@@ -2560,7 +2560,7 @@
 					       PCI_DMA_TODEVICE);
 
 			flagsize = (frag->size << 16);
-			if (skb->ip_summed == CHECKSUM_HW)
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				flagsize |= BD_FLG_TCP_UDP_SUM;
 			idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
 
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 979a33d..96d8a69 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -161,6 +161,7 @@
 	{ 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
+	{ 0x10B5, 0x9030, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{ 0x10B5, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
 	{0,}
 };
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index 6fad83f..7116096 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -1264,7 +1264,8 @@
 {
 	int j, rev, ret;
 	struct bmac_data *bp;
-	unsigned char *addr;
+	const unsigned char *prop_addr;
+	unsigned char addr[6];
 	struct net_device *dev;
 	int is_bmac_plus = ((int)match->data) != 0;
 
@@ -1272,14 +1273,16 @@
 		printk(KERN_ERR "BMAC: can't use, need 3 addrs and 3 intrs\n");
 		return -ENODEV;
 	}
-	addr = get_property(macio_get_of_node(mdev), "mac-address", NULL);
-	if (addr == NULL) {
-		addr = get_property(macio_get_of_node(mdev), "local-mac-address", NULL);
-		if (addr == NULL) {
+	prop_addr = get_property(macio_get_of_node(mdev), "mac-address", NULL);
+	if (prop_addr == NULL) {
+		prop_addr = get_property(macio_get_of_node(mdev),
+				"local-mac-address", NULL);
+		if (prop_addr == NULL) {
 			printk(KERN_ERR "BMAC: Can't get mac-address\n");
 			return -ENODEV;
 		}
 	}
+	memcpy(addr, prop_addr, sizeof(addr));
 
 	dev = alloc_etherdev(PRIV_BYTES);
 	if (!dev) {
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 652eb05..7857b46 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4423,7 +4423,7 @@
 	ring_prod = TX_RING_IDX(prod);
 
 	vlan_tag_flags = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM;
 	}
 
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index a31544c..558fdb8 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2167,7 +2167,7 @@
 			cas_page_unmap(addr);
 	}
 	skb->csum = ntohs(i ^ 0xffff);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_COMPLETE;
 	skb->protocol = eth_type_trans(skb, cp->dev);
 	return len;
 }
@@ -2821,7 +2821,7 @@
 	}
 
 	ctrl = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u64 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u64) (skb->h.raw - skb->data);
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 61b3754..ddd0bdb 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1470,9 +1470,9 @@
 		}
 
 		if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
-		    skb->ip_summed == CHECKSUM_HW &&
+		    skb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb->nh.iph->protocol == IPPROTO_UDP)
-			if (unlikely(skb_checksum_help(skb, 0))) {
+			if (unlikely(skb_checksum_help(skb))) {
 				dev_kfree_skb_any(skb);
 				return NETDEV_TX_OK;
 			}
@@ -1495,11 +1495,11 @@
 		cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl));
 		cpl->opcode = CPL_TX_PKT;
 		cpl->ip_csum_dis = 1;    /* SW calculates IP csum */
-		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1;
+		cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1;
 		/* the length field isn't used so don't bother setting it */
 
-		st->tx_cso += (skb->ip_summed == CHECKSUM_HW);
-		sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW);
+		st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL);
+		sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL);
 		sge->stats.tx_reg_pkts++;
 	}
 	cpl->iff = dev->if_port;
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 402961e..b74e676 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -611,7 +611,7 @@
 	txdesc = &np->tx_ring[entry];
 
 #if 0
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		txdesc->status |=
 		    cpu_to_le64 (TCPChecksumEnable | UDPChecksumEnable |
 				 IPChecksumEnable);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 98ef9f8..2ab9f96 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2600,7 +2600,7 @@
 	unsigned int i;
 	uint8_t css;
 
-	if (likely(skb->ip_summed == CHECKSUM_HW)) {
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		css = skb->h.raw - skb->data;
 
 		i = tx_ring->next_to_use;
@@ -2927,11 +2927,11 @@
 	}
 
 	/* reserve a descriptor for the offload context */
-	if ((mss) || (skb->ip_summed == CHECKSUM_HW))
+	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
 		count++;
 	count++;
 #else
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		count++;
 #endif
 
@@ -3608,7 +3608,7 @@
 		 */
 		csum = ntohl(csum ^ 0xFFFF);
 		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 	adapter->hw_csum_good++;
 }
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 11b8f1b..32cacf1 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1503,7 +1503,8 @@
 		tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
 	else
 #endif
-	tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+	tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+			 NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
 	/* vlan tag */
 	if (np->vlangrp && vlan_tx_tag_present(skb)) {
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ebbbd6c..ba96091 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -947,7 +947,7 @@
 
 	/* Set up checksumming */
 	if (likely((dev->features & NETIF_F_IP_CSUM)
-			&& (CHECKSUM_HW == skb->ip_summed))) {
+			&& (CHECKSUM_PARTIAL == skb->ip_summed))) {
 		fcb = gfar_add_fcb(skb, txbdp);
 		status |= TXBD_TOE;
 		gfar_tx_checksum(skb, fcb);
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 409c6aa..763373a 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1648,7 +1648,7 @@
 						* could do the pseudo myself and return
 						* CHECKSUM_UNNECESSARY
 						*/
-						skb->ip_summed = CHECKSUM_HW;
+						skb->ip_summed = CHECKSUM_COMPLETE;
 					}
 				}	
 			}
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index 82468e2..57e214d 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1036,7 +1036,7 @@
 			       struct sk_buff *skb)
 {
 #if defined(CONFIG_IBM_EMAC_TAH)
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		++dev->stats.tx_packets_csum;
 		return EMAC_TX_CTRL_TAH_CSUM;
 	}
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 0464e78..e56eac8 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -702,7 +702,8 @@
 					     desc[3].desc,
 					     desc[4].desc,
 					     desc[5].desc,
-					     correlator);
+					     correlator,
+					     &correlator);
 	} while ((lpar_rc == H_BUSY) && (retry_count--));
 
 	if(lpar_rc != H_SUCCESS && lpar_rc != H_DROPPED) {
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index 8385bf8..f5b25bf 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -41,16 +41,6 @@
 #define IbmVethMcastRemoveFilter     0x2UL
 #define IbmVethMcastClearFilterTable 0x3UL
 
-/* hcall numbers */
-#define H_VIO_SIGNAL             0x104
-#define H_REGISTER_LOGICAL_LAN   0x114
-#define H_FREE_LOGICAL_LAN       0x118
-#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
-#define H_SEND_LOGICAL_LAN       0x120
-#define H_MULTICAST_CTRL         0x130
-#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
-#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
-
 /* hcall macros */
 #define h_register_logical_lan(ua, buflst, rxq, fltlst, mac) \
   plpar_hcall_norets(H_REGISTER_LOGICAL_LAN, ua, buflst, rxq, fltlst, mac)
@@ -61,8 +51,21 @@
 #define h_add_logical_lan_buffer(ua, buf) \
   plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
 
-#define h_send_logical_lan(ua, buf1, buf2, buf3, buf4, buf5, buf6, correlator) \
-  plpar_hcall_8arg_2ret(H_SEND_LOGICAL_LAN, ua, buf1, buf2, buf3, buf4, buf5, buf6, correlator, &correlator)
+static inline long h_send_logical_lan(unsigned long unit_address,
+		unsigned long desc1, unsigned long desc2, unsigned long desc3,
+		unsigned long desc4, unsigned long desc5, unsigned long desc6,
+		unsigned long corellator_in, unsigned long *corellator_out)
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, desc1,
+			desc2, desc3, desc4, desc5, desc6, corellator_in);
+
+	*corellator_out = retbuf[0];
+
+	return rc;
+}
 
 #define h_multicast_ctrl(ua, cmd, mac) \
   plpar_hcall_norets(H_MULTICAST_CTRL, ua, cmd, mac)
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 68d8af7..65f897dd 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1387,7 +1387,7 @@
 	 * MAC header which should not be summed and the TCP/UDP pseudo headers
 	 * manually.
 	 */
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int proto = ntohs(skb->nh.iph->protocol);
 		unsigned int csoff;
 		struct iphdr *ih = skb->nh.iph;
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index e3c8cd5..68d4c41 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -249,7 +249,7 @@
 
 	IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__);	
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			ali_ircc_close(dev_self[i]);
 	}
@@ -273,6 +273,12 @@
 	int err;
 			
 	IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__);	
+
+	if (i >= ARRAY_SIZE(dev_self)) {
+		IRDA_ERROR("%s(), maximum number of supported chips reached!\n",
+			   __FUNCTION__);
+		return -ENOMEM;
+	}
 	
 	/* Set FIR FIFO and DMA Threshold */
 	if ((ali_ircc_setup(info)) == -1)
diff --git a/drivers/net/irda/irport.c b/drivers/net/irda/irport.c
index 44efd49..ba4f3eb 100644
--- a/drivers/net/irda/irport.c
+++ b/drivers/net/irda/irport.c
@@ -1090,7 +1090,7 @@
 {
  	int i;
 
- 	for (i=0; (io[i] < 2000) && (i < 4); i++) {
+ 	for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) {
  		if (irport_open(i, io[i], irq[i]) != NULL)
  			return 0;
  	}
@@ -1112,7 +1112,7 @@
 
         IRDA_DEBUG( 4, "%s()\n", __FUNCTION__);
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
  		if (dev_self[i])
  			irport_close(dev_self[i]);
  	}
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 8bafb45..79b85f3 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -279,7 +279,7 @@
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			via_ircc_close(dev_self[i]);
 	}
@@ -327,6 +327,9 @@
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
+	if (i >= ARRAY_SIZE(dev_self))
+		return -ENOMEM;
+
 	/* Allocate new instance of the driver */
 	dev = alloc_irdadev(sizeof(struct via_ircc_cb));
 	if (dev == NULL) 
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 0ea65c4..8421597 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -117,7 +117,7 @@
 
 	IRDA_DEBUG(0, "%s()\n", __FUNCTION__ );
 
-	for (i=0; (io[i] < 2000) && (i < 4); i++) { 
+	for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) {
 		if (w83977af_open(i, io[i], irq[i], dma[i]) == 0)
 			return 0;
 	}
@@ -136,7 +136,7 @@
 
         IRDA_DEBUG(4, "%s()\n", __FUNCTION__ );
 
-	for (i=0; i < 4; i++) {
+	for (i=0; i < ARRAY_SIZE(dev_self); i++) {
 		if (dev_self[i])
 			w83977af_close(dev_self[i]);
 	}
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 7bbd447..9405b44 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1232,7 +1232,7 @@
 	unsigned int i;
 	uint8_t css, cso;
 
-	if(likely(skb->ip_summed == CHECKSUM_HW)) {
+	if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		css = skb->h.raw - skb->data;
 		cso = (skb->h.raw + skb->csum) - skb->data;
 
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index b783a69..393aba9 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -442,16 +442,16 @@
 		if (rbd) {
 			rbd->pad = 0;
 			rbd->count = 0;
-			rbd->skb = dev_alloc_skb(RX_SKB_SIZE);
+			rbd->skb = dev_alloc_skb(RX_SKBSIZE);
 			if (!rbd->skb) {
 				printk("dev_alloc_skb failed");
 			}
 			rbd->next = rfd->rbd;
 			if (i) {
 				rfd->rbd->prev = rbd;
-				rbd->size = RX_SKB_SIZE;
+				rbd->size = RX_SKBSIZE;
 			} else {
-				rbd->size = (RX_SKB_SIZE | RBD_EL);
+				rbd->size = (RX_SKBSIZE | RBD_EL);
 				lp->rbd_tail = rbd;
 			}
 
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index 29e4b5a..5d80e0e 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -113,7 +113,7 @@
 	struct device_node *mace = macio_get_of_node(mdev);
 	struct net_device *dev;
 	struct mace_data *mp;
-	unsigned char *addr;
+	const unsigned char *addr;
 	int j, rev, rc = -EBUSY;
 
 	if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index eeab1df..d4dcc85 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -385,7 +385,7 @@
 	struct pkt_info pkt_info;
 
 	while (budget-- > 0 && eth_port_receive(mp, &pkt_info) == ETH_OK) {
-		dma_unmap_single(NULL, pkt_info.buf_ptr, RX_SKB_SIZE,
+		dma_unmap_single(NULL, pkt_info.buf_ptr, ETH_RX_SKB_SIZE,
 							DMA_FROM_DEVICE);
 		mp->rx_desc_count--;
 		received_packets++;
@@ -1147,7 +1147,7 @@
 	desc->byte_cnt = length;
 	desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE);
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		BUG_ON(skb->protocol != ETH_P_IP);
 
 		cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 9bdd43a..9f16681 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -930,7 +930,7 @@
 	    (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) ||
 	     vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) {
 		skb->csum = hw_csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 }
 
@@ -973,7 +973,7 @@
 		if ((skb->protocol == ntohs(ETH_P_IP)) ||
 		    (skb->protocol == ntohs(ETH_P_IPV6))) {
 			skb->csum = ntohs((u16) csum);
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 		} else
 			myri10ge_vlan_ip_csum(skb, ntohs((u16) csum));
 	}
@@ -1897,13 +1897,13 @@
 	pseudo_hdr_offset = 0;
 	odd_flag = 0;
 	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
-	if (likely(skb->ip_summed == CHECKSUM_HW)) {
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		cksum_offset = (skb->h.raw - skb->data);
 		pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
 		if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) {
-			if (skb_checksum_help(skb, 0))
+			if (skb_checksum_help(skb))
 				goto drop;
 			cksum_offset = 0;
 			pseudo_hdr_offset = 0;
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 0e76859..5143f5d 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -1153,7 +1153,7 @@
 	if (!nr_frags)
 		frag = NULL;
 	extsts = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		extsts |= EXTSTS_IPPKT;
 		if (IPPROTO_TCP == skb->nh.iph->protocol)
 			extsts |= EXTSTS_TCPPKT;
diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c
index 51ff9a9..f3655fd 100644
--- a/drivers/net/ppp_mppe.c
+++ b/drivers/net/ppp_mppe.c
@@ -43,6 +43,7 @@
  *                    deprecated in 2.6
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/version.h>
@@ -64,12 +65,13 @@
 MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
 MODULE_VERSION("1.0.2");
 
-static void
+static unsigned int
 setup_sg(struct scatterlist *sg, const void *address, unsigned int length)
 {
 	sg[0].page = virt_to_page(address);
 	sg[0].offset = offset_in_page(address);
 	sg[0].length = length;
+	return length;
 }
 
 #define SHA1_PAD_SIZE 40
@@ -95,8 +97,8 @@
  * State for an MPPE (de)compressor.
  */
 struct ppp_mppe_state {
-	struct crypto_tfm *arc4;
-	struct crypto_tfm *sha1;
+	struct crypto_blkcipher *arc4;
+	struct crypto_hash *sha1;
 	unsigned char *sha1_digest;
 	unsigned char master_key[MPPE_MAX_KEY_LEN];
 	unsigned char session_key[MPPE_MAX_KEY_LEN];
@@ -136,14 +138,21 @@
  */
 static void get_new_key_from_sha(struct ppp_mppe_state * state, unsigned char *InterimKey)
 {
+	struct hash_desc desc;
 	struct scatterlist sg[4];
+	unsigned int nbytes;
 
-	setup_sg(&sg[0], state->master_key, state->keylen);
-	setup_sg(&sg[1], sha_pad->sha_pad1, sizeof(sha_pad->sha_pad1));
-	setup_sg(&sg[2], state->session_key, state->keylen);
-	setup_sg(&sg[3], sha_pad->sha_pad2, sizeof(sha_pad->sha_pad2));
+	nbytes = setup_sg(&sg[0], state->master_key, state->keylen);
+	nbytes += setup_sg(&sg[1], sha_pad->sha_pad1,
+			   sizeof(sha_pad->sha_pad1));
+	nbytes += setup_sg(&sg[2], state->session_key, state->keylen);
+	nbytes += setup_sg(&sg[3], sha_pad->sha_pad2,
+			   sizeof(sha_pad->sha_pad2));
 
-	crypto_digest_digest (state->sha1, sg, 4, state->sha1_digest);
+	desc.tfm = state->sha1;
+	desc.flags = 0;
+
+	crypto_hash_digest(&desc, sg, nbytes, state->sha1_digest);
 
 	memcpy(InterimKey, state->sha1_digest, state->keylen);
 }
@@ -156,14 +165,15 @@
 {
 	unsigned char InterimKey[MPPE_MAX_KEY_LEN];
 	struct scatterlist sg_in[1], sg_out[1];
+	struct blkcipher_desc desc = { .tfm = state->arc4 };
 
 	get_new_key_from_sha(state, InterimKey);
 	if (!initial_key) {
-		crypto_cipher_setkey(state->arc4, InterimKey, state->keylen);
+		crypto_blkcipher_setkey(state->arc4, InterimKey, state->keylen);
 		setup_sg(sg_in, InterimKey, state->keylen);
 		setup_sg(sg_out, state->session_key, state->keylen);
-		if (crypto_cipher_encrypt(state->arc4, sg_out, sg_in,
-				      state->keylen) != 0) {
+		if (crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
+					     state->keylen) != 0) {
     		    printk(KERN_WARNING "mppe_rekey: cipher_encrypt failed\n");
 		}
 	} else {
@@ -175,7 +185,7 @@
 		state->session_key[1] = 0x26;
 		state->session_key[2] = 0x9e;
 	}
-	crypto_cipher_setkey(state->arc4, state->session_key, state->keylen);
+	crypto_blkcipher_setkey(state->arc4, state->session_key, state->keylen);
 }
 
 /*
@@ -196,15 +206,19 @@
 
 	memset(state, 0, sizeof(*state));
 
-	state->arc4 = crypto_alloc_tfm("arc4", 0);
-	if (!state->arc4)
+	state->arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(state->arc4)) {
+		state->arc4 = NULL;
 		goto out_free;
+	}
 
-	state->sha1 = crypto_alloc_tfm("sha1", 0);
-	if (!state->sha1)
+	state->sha1 = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(state->sha1)) {
+		state->sha1 = NULL;
 		goto out_free;
+	}
 
-	digestsize = crypto_tfm_alg_digestsize(state->sha1);
+	digestsize = crypto_hash_digestsize(state->sha1);
 	if (digestsize < MPPE_MAX_KEY_LEN)
 		goto out_free;
 
@@ -229,9 +243,9 @@
 	    if (state->sha1_digest)
 		kfree(state->sha1_digest);
 	    if (state->sha1)
-		crypto_free_tfm(state->sha1);
+		crypto_free_hash(state->sha1);
 	    if (state->arc4)
-		crypto_free_tfm(state->arc4);
+		crypto_free_blkcipher(state->arc4);
 	    kfree(state);
 	out:
 	return NULL;
@@ -247,9 +261,9 @@
 	    if (state->sha1_digest)
 		kfree(state->sha1_digest);
 	    if (state->sha1)
-		crypto_free_tfm(state->sha1);
+		crypto_free_hash(state->sha1);
 	    if (state->arc4)
-		crypto_free_tfm(state->arc4);
+		crypto_free_blkcipher(state->arc4);
 	    kfree(state);
 	}
 }
@@ -356,6 +370,7 @@
 	      int isize, int osize)
 {
 	struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
+	struct blkcipher_desc desc = { .tfm = state->arc4 };
 	int proto;
 	struct scatterlist sg_in[1], sg_out[1];
 
@@ -413,7 +428,7 @@
 	/* Encrypt packet */
 	setup_sg(sg_in, ibuf, isize);
 	setup_sg(sg_out, obuf, osize);
-	if (crypto_cipher_encrypt(state->arc4, sg_out, sg_in, isize) != 0) {
+	if (crypto_blkcipher_encrypt(&desc, sg_out, sg_in, isize) != 0) {
 		printk(KERN_DEBUG "crypto_cypher_encrypt failed\n");
 		return -1;
 	}
@@ -462,6 +477,7 @@
 		int osize)
 {
 	struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
+	struct blkcipher_desc desc = { .tfm = state->arc4 };
 	unsigned ccount;
 	int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED;
 	int sanity = 0;
@@ -599,7 +615,7 @@
 	 */
 	setup_sg(sg_in, ibuf, 1);
 	setup_sg(sg_out, obuf, 1);
-	if (crypto_cipher_decrypt(state->arc4, sg_out, sg_in, 1) != 0) {
+	if (crypto_blkcipher_decrypt(&desc, sg_out, sg_in, 1) != 0) {
 		printk(KERN_DEBUG "crypto_cypher_decrypt failed\n");
 		return DECOMP_ERROR;
 	}
@@ -619,7 +635,7 @@
 	/* And finally, decrypt the rest of the packet. */
 	setup_sg(sg_in, ibuf + 1, isize - 1);
 	setup_sg(sg_out, obuf + 1, osize - 1);
-	if (crypto_cipher_decrypt(state->arc4, sg_out, sg_in, isize - 1) != 0) {
+	if (crypto_blkcipher_decrypt(&desc, sg_out, sg_in, isize - 1)) {
 		printk(KERN_DEBUG "crypto_cypher_decrypt failed\n");
 		return DECOMP_ERROR;
 	}
@@ -694,8 +710,8 @@
 static int __init ppp_mppe_init(void)
 {
 	int answer;
-	if (!(crypto_alg_available("arc4", 0) &&
-	      crypto_alg_available("sha1", 0)))
+	if (!(crypto_has_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC) &&
+	      crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC)))
 		return -ENODEV;
 
 	sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 4c2f575..d9b960a 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2169,7 +2169,7 @@
 		if (mss)
 			return LargeSend | ((mss & MSSMask) << MSSShift);
 	}
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const struct iphdr *ip = skb->nh.iph;
 
 		if (ip->protocol == IPPROTO_TCP)
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index e72e0e0..5b3713f 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -3893,7 +3893,7 @@
 		txdp->Control_1 |= TXD_TCP_LSO_MSS(s2io_tcp_mss(skb));
 	}
 #endif
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		txdp->Control_2 |=
 		    (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN |
 		     TXD_TX_CKO_UDP_EN);
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index ee62845..eb3b351 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1559,7 +1559,7 @@
 	pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32);
 	pTxd->pMBuf     = pMessage;
 
-	if (pMessage->ip_summed == CHECKSUM_HW) {
+	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdrlen = pMessage->h.raw - pMessage->data;
 		u16 offset = hdrlen + pMessage->csum;
 
@@ -1678,7 +1678,7 @@
 	/* 
 	** Does the HW need to evaluate checksum for TCP or UDP packets? 
 	*/
-	if (pMessage->ip_summed == CHECKSUM_HW) {
+	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdrlen = pMessage->h.raw - pMessage->data;
 		u16 offset = hdrlen + pMessage->csum;
 
@@ -2158,7 +2158,7 @@
 
 #ifdef USE_SK_RX_CHECKSUM
 		pMsg->csum = pRxd->TcpSums & 0xffff;
-		pMsg->ip_summed = CHECKSUM_HW;
+		pMsg->ip_summed = CHECKSUM_COMPLETE;
 #else
 		pMsg->ip_summed = CHECKSUM_NONE;
 #endif
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index ad878df..b3d6fa3 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2338,7 +2338,7 @@
 	td->dma_lo = map;
 	td->dma_hi = map >> 32;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int offset = skb->h.raw - skb->data;
 
 		/* This seems backwards, but it is what the sk98lin
@@ -2642,7 +2642,7 @@
 	skb->dev = skge->netdev;
 	if (skge->rx_csum) {
 		skb->csum = csum;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 	}
 
 	skb->protocol = eth_type_trans(skb, skge->netdev);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 933e87f..8e92566 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1163,7 +1163,7 @@
 	if (skb_is_gso(skb))
 		++count;
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		++count;
 
 	return count;
@@ -1272,7 +1272,7 @@
 #endif
 
 	/* Handle TCP checksum offload */
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u16 hdr = skb->h.raw - skb->data;
 		u16 offset = hdr + skb->csum;
 
@@ -2000,7 +2000,7 @@
 #endif
 		case OP_RXCHKS:
 			skb = sky2->rx_ring[sky2->rx_next].skb;
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = le16_to_cpu(status);
 			break;
 
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 8890721..d64e718 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1697,10 +1697,10 @@
  */
 static int
 spider_net_download_firmware(struct spider_net_card *card,
-			     u8 *firmware_ptr)
+			     const void *firmware_ptr)
 {
 	int sequencer, i;
-	u32 *fw_ptr = (u32 *)firmware_ptr;
+	const u32 *fw_ptr = firmware_ptr;
 
 	/* stop sequencers */
 	spider_net_write_reg(card, SPIDER_NET_GSINIT,
@@ -1757,7 +1757,7 @@
 {
 	struct firmware *firmware = NULL;
 	struct device_node *dn;
-	u8 *fw_prop = NULL;
+	const u8 *fw_prop = NULL;
 	int err = -ENOENT;
 	int fw_size;
 
@@ -1783,7 +1783,7 @@
 	if (!dn)
 		goto out_err;
 
-	fw_prop = (u8 *)get_property(dn, "firmware", &fw_size);
+	fw_prop = get_property(dn, "firmware", &fw_size);
 	if (!fw_prop)
 		goto out_err;
 
@@ -1986,7 +1986,7 @@
 	struct net_device *netdev = card->netdev;
 	struct device_node *dn;
 	struct sockaddr addr;
-	u8 *mac;
+	const u8 *mac;
 
 	SET_MODULE_OWNER(netdev);
 	SET_NETDEV_DEV(netdev, &card->pdev->dev);
@@ -2019,7 +2019,7 @@
 	if (!dn)
 		return -EIO;
 
-	mac = (u8 *)get_property(dn, "local-mac-address", NULL);
+	mac = get_property(dn, "local-mac-address", NULL);
 	if (!mac)
 		return -EIO;
 	memcpy(addr.sa_data, mac, ETH_ALEN);
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index c0a62b0..2607aa5 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1230,7 +1230,7 @@
 	}
 
 #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE)
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK))
 			return NETDEV_TX_OK;
 	}
@@ -1252,7 +1252,7 @@
 				status |= TxDescIntr;
 				np->reap_tx = 0;
 			}
-			if (skb->ip_summed == CHECKSUM_HW) {
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				status |= TxCalTCP;
 				np->stats.tx_compressed++;
 			}
@@ -1499,7 +1499,7 @@
 		 * Until then, the printk stays. :-) -Ion
 		 */
 		else if (le16_to_cpu(desc->status2) & 0x0040) {
-			skb->ip_summed = CHECKSUM_HW;
+			skb->ip_summed = CHECKSUM_COMPLETE;
 			skb->csum = le16_to_cpu(desc->csum);
 			printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2));
 		}
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index b70bbd7..b388651 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -855,7 +855,7 @@
 		}
 
 		skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff);
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->protocol = eth_type_trans(skb, gp->dev);
 
 		netif_receive_skb(skb);
@@ -1026,7 +1026,7 @@
 	unsigned long flags;
 
 	ctrl = 0;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u64 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u64) (skb->h.raw - skb->data);
@@ -2896,7 +2896,7 @@
 	if (use_idprom)
 		memcpy(dev->dev_addr, idprom->id_ethaddr, 6);
 #elif defined(CONFIG_PPC_PMAC)
-	unsigned char *addr;
+	const unsigned char *addr;
 
 	addr = get_property(gp->of_node, "local-mac-address", NULL);
 	if (addr == NULL) {
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index c6f5bc3..17981da 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -1207,7 +1207,7 @@
  * flags, thus:
  *
  * 	skb->csum = rxd->rx_flags & 0xffff;
- * 	skb->ip_summed = CHECKSUM_HW;
+ * 	skb->ip_summed = CHECKSUM_COMPLETE;
  *
  * before sending off the skb to the protocols, and we are good as gold.
  */
@@ -2074,7 +2074,7 @@
 
 		/* This card is _fucking_ hot... */
 		skb->csum = ntohs(csum ^ 0xffff);
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_COMPLETE;
 
 		RXD(("len=%d csum=%4x]", len, csum));
 		skb->protocol = eth_type_trans(skb, dev);
@@ -2268,7 +2268,7 @@
  	u32 tx_flags;
 
 	tx_flags = TXFLAG_OWN;
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u32 csum_start_off, csum_stuff_off;
 
 		csum_start_off = (u32) (skb->h.raw - skb->data);
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index eafabb2..fb70261 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -149,122 +149,67 @@
 MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value");
 
 static struct pci_device_id tg3_pci_tbl[] = {
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3,
-	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
-	{ 0, }
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781)},
+	{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)},
+	{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100)},
+	{PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3)},
+	{}
 };
 
 MODULE_DEVICE_TABLE(pci, tg3_pci_tbl);
 
-static struct {
+static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_stats_keys[TG3_NUM_STATS] = {
 	{ "rx_octets" },
@@ -345,7 +290,7 @@
 	{ "nic_tx_threshold_hit" }
 };
 
-static struct {
+static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_test_keys[TG3_NUM_TEST] = {
 	{ "nvram test     (online) " },
@@ -3851,11 +3796,11 @@
 		skb->h.th->check = 0;
 
 	}
-	else if (skb->ip_summed == CHECKSUM_HW)
+	else if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #else
 	mss = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #endif
 #if TG3_VLAN_TAG_USED
@@ -3981,7 +3926,7 @@
 
 	entry = tp->tx_prod;
 	base_flags = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
 #if TG3_TSO_SUPPORT != 0
 	mss = 0;
@@ -4969,7 +4914,7 @@
 #define TG3_FW_BSS_ADDR		0x08000a70
 #define TG3_FW_BSS_LEN		0x10
 
-static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
+static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = {
 	0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800,
 	0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000,
 	0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034,
@@ -5063,7 +5008,7 @@
 	0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000
 };
 
-static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
+static const u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = {
 	0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430,
 	0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
@@ -5128,13 +5073,13 @@
 struct fw_info {
 	unsigned int text_base;
 	unsigned int text_len;
-	u32 *text_data;
+	const u32 *text_data;
 	unsigned int rodata_base;
 	unsigned int rodata_len;
-	u32 *rodata_data;
+	const u32 *rodata_data;
 	unsigned int data_base;
 	unsigned int data_len;
-	u32 *data_data;
+	const u32 *data_data;
 };
 
 /* tp->lock is held. */
@@ -5266,7 +5211,7 @@
 #define TG3_TSO_FW_BSS_ADDR		0x08001b80
 #define TG3_TSO_FW_BSS_LEN		0x894
 
-static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
+static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = {
 	0x0e000003, 0x00000000, 0x08001b24, 0x00000000, 0x10000003, 0x00000000,
 	0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd4000, 0x03a0f021, 0x3c100800,
 	0x26100000, 0x0e000010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
@@ -5553,7 +5498,7 @@
 	0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000,
 };
 
-static u32 tg3TsoFwRodata[] = {
+static const u32 tg3TsoFwRodata[] = {
 	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
 	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f,
 	0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000,
@@ -5561,7 +5506,7 @@
 	0x00000000,
 };
 
-static u32 tg3TsoFwData[] = {
+static const u32 tg3TsoFwData[] = {
 	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000,
@@ -5583,7 +5528,7 @@
 #define TG3_TSO5_FW_BSS_ADDR		0x00010f50
 #define TG3_TSO5_FW_BSS_LEN		0x88
 
-static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = {
 	0x0c004003, 0x00000000, 0x00010f04, 0x00000000, 0x10000003, 0x00000000,
 	0x0000000d, 0x0000000d, 0x3c1d0001, 0x37bde000, 0x03a0f021, 0x3c100001,
 	0x26100000, 0x0c004010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe,
@@ -5742,14 +5687,14 @@
 	0x00000000, 0x00000000, 0x00000000,
 };
 
-static u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = {
 	0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000,
 	0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000,
 	0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272,
 	0x00000000, 0x00000000, 0x00000000,
 };
 
-static u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
+static const u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = {
 	0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000,
 };
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 4103c37..c6e601d 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -830,7 +830,7 @@
 	first_txd->addrHi = (u64)((unsigned long) skb) >> 32;
 	first_txd->processFlags = 0;
 
-	if(skb->ip_summed == CHECKSUM_HW) {
+	if(skb->ip_summed == CHECKSUM_PARTIAL) {
 		/* The 3XP will figure out if this is UDP/TCP */
 		first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM;
 		first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM;
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index ae97108..6654715 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1230,7 +1230,7 @@
 	rp->tx_skbuff[entry] = skb;
 
 	if ((rp->quirks & rqRhineI) &&
-	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_HW)) {
+	    (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) {
 		/* Must use alignment buffer. */
 		if (skb->len > PKT_BUF_SZ) {
 			/* packet too long, drop it */
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index aa9cd92..f1e0c74 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2002,7 +2002,7 @@
 	 *	Handle hardware checksum
 	 */
 	if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
-				 && (skb->ip_summed == CHECKSUM_HW)) {
+				 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
 		struct iphdr *ip = skb->nh.iph;
 		if (ip->protocol == IPPROTO_TCP)
 			td_ptr->tdesc1.TCR |= TCR0_TCPCK;
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index a4dd139..170c500 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -19,6 +19,7 @@
 
 ======================================================================*/
 
+#include <linux/err.h>
 #include <linux/init.h>
 
 #include <linux/kernel.h>
@@ -1203,7 +1204,7 @@
 	struct iw_spy_data	spy_data;
 	struct iw_public_data	wireless_data;
 	/* MIC stuff */
-	struct crypto_tfm	*tfm;
+	struct crypto_cipher	*tfm;
 	mic_module		mod[2];
 	mic_statistics		micstats;
 	HostRxDesc rxfids[MPI_MAX_FIDS]; // rx/tx/config MPI350 descriptors
@@ -1271,7 +1272,8 @@
 
 static int RxSeqValid (struct airo_info *ai,miccntx *context,int mcast,u32 micSeq);
 static void MoveWindow(miccntx *context, u32 micSeq);
-static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct crypto_tfm *);
+static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen,
+			   struct crypto_cipher *tfm);
 static void emmh32_init(emmh32_context *context);
 static void emmh32_update(emmh32_context *context, u8 *pOctets, int len);
 static void emmh32_final(emmh32_context *context, u8 digest[4]);
@@ -1339,10 +1341,11 @@
 	int i;
 
 	if (ai->tfm == NULL)
-	        ai->tfm = crypto_alloc_tfm("aes", CRYPTO_TFM_REQ_MAY_SLEEP);
+	        ai->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 
-        if (ai->tfm == NULL) {
+        if (IS_ERR(ai->tfm)) {
                 airo_print_err(ai->dev->name, "failed to load transform for AES");
+                ai->tfm = NULL;
                 return ERROR;
         }
 
@@ -1608,7 +1611,8 @@
 static unsigned char aes_counter[16];
 
 /* expand the key to fill the MMH coefficient array */
-static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen, struct crypto_tfm *tfm)
+static void emmh32_setseed(emmh32_context *context, u8 *pkey, int keylen,
+			   struct crypto_cipher *tfm)
 {
   /* take the keying material, expand if necessary, truncate at 16-bytes */
   /* run through AES counter mode to generate context->coeff[] */
@@ -1616,7 +1620,6 @@
 	int i,j;
 	u32 counter;
 	u8 *cipher, plain[16];
-	struct scatterlist sg[1];
 
 	crypto_cipher_setkey(tfm, pkey, 16);
 	counter = 0;
@@ -1627,9 +1630,8 @@
 		aes_counter[12] = (u8)(counter >> 24);
 		counter++;
 		memcpy (plain, aes_counter, 16);
-		sg_set_buf(sg, plain, 16);
-		crypto_cipher_encrypt(tfm, sg, sg, 16);
-		cipher = kmap(sg->page) + sg->offset;
+		crypto_cipher_encrypt_one(tfm, plain, plain);
+		cipher = plain;
 		for (j=0; (j<16) && (i< (sizeof(context->coeff)/sizeof(context->coeff[0]))); ) {
 			context->coeff[i++] = ntohl(*(u32 *)&cipher[j]);
 			j += 4;
@@ -2432,7 +2434,7 @@
 				ai->shared, ai->shared_dma);
 		}
         }
-	crypto_free_tfm(ai->tfm);
+	crypto_free_cipher(ai->tfm);
 	del_airo_dev( dev );
 	free_netdev( dev );
 }
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 076bd6d..7288a3e 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -176,16 +176,16 @@
 	return 0;
 }
 
-static int get_children_props(struct device_node *dn, int **drc_indexes,
-		int **drc_names, int **drc_types, int **drc_power_domains)
+static int get_children_props(struct device_node *dn, const int **drc_indexes,
+		const int **drc_names, const int **drc_types,
+		const int **drc_power_domains)
 {
-	int *indexes, *names;
-	int *types, *domains;
+	const int *indexes, *names, *types, *domains;
 
-	indexes = (int *) get_property(dn, "ibm,drc-indexes", NULL);
-	names = (int *) get_property(dn, "ibm,drc-names", NULL);
-	types = (int *) get_property(dn, "ibm,drc-types", NULL);
-	domains = (int *) get_property(dn, "ibm,drc-power-domains", NULL);
+	indexes = get_property(dn, "ibm,drc-indexes", NULL);
+	names = get_property(dn, "ibm,drc-names", NULL);
+	types = get_property(dn, "ibm,drc-types", NULL);
+	domains = get_property(dn, "ibm,drc-power-domains", NULL);
 
 	if (!indexes || !names || !types || !domains) {
 		/* Slot does not have dynamically-removable children */
@@ -212,13 +212,13 @@
 int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
 		char **drc_name, char **drc_type, int *drc_power_domain)
 {
-	int *indexes, *names;
-	int *types, *domains;
-	unsigned int *my_index;
+	const int *indexes, *names;
+	const int *types, *domains;
+	const unsigned int *my_index;
 	char *name_tmp, *type_tmp;
 	int i, rc;
 
-	my_index = (int *) get_property(dn, "ibm,my-drc-index", NULL);
+	my_index = get_property(dn, "ibm,my-drc-index", NULL);
 	if (!my_index) {
 		/* Node isn't DLPAR/hotplug capable */
 		return -EINVAL;
@@ -265,10 +265,10 @@
 	return 1;
 }
 
-static int is_php_dn(struct device_node *dn, int **indexes, int **names,
-		int **types, int **power_domains)
+static int is_php_dn(struct device_node *dn, const int **indexes,
+		const int **names, const int **types, const int **power_domains)
 {
-	int *drc_types;
+	const int *drc_types;
 	int rc;
 
 	rc = get_children_props(dn, indexes, names, &drc_types, power_domains);
@@ -296,7 +296,7 @@
 	struct slot *slot;
 	int retval = 0;
 	int i;
-	int *indexes, *names, *types, *power_domains;
+	const int *indexes, *names, *types, *power_domains;
 	char *name, *type;
 
 	dbg("Entry %s: dn->full_name=%s\n", __FUNCTION__, dn->full_name);
diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig
index 4d36208..ae89b9b 100644
--- a/drivers/s390/Kconfig
+++ b/drivers/s390/Kconfig
@@ -213,17 +213,35 @@
 	help
 	  Character device driver for reading z/VM monitor service records
 
+config MONWRITER
+	tristate "API for writing z/VM monitor service records"
+	default "m"
+	help
+	  Character device driver for writing z/VM monitor service records
+
 endmenu
 
 menu "Cryptographic devices"
 
-config Z90CRYPT
+config ZCRYPT
 	tristate "Support for PCI-attached cryptographic adapters"
-        default "m"
-        help
+	select ZCRYPT_MONOLITHIC if ZCRYPT="y"
+	default "m"
+	help
 	  Select this option if you want to use a PCI-attached cryptographic
-	  adapter like the PCI Cryptographic Accelerator (PCICA) or the PCI
-	  Cryptographic Coprocessor (PCICC).  This option is also available
-	  as a module called z90crypt.ko.
+	  adapter like:
+	  + PCI Cryptographic Accelerator (PCICA)
+	  + PCI Cryptographic Coprocessor (PCICC)
+	  + PCI-X Cryptographic Coprocessor (PCIXCC)
+	  + Crypto Express2 Coprocessor (CEX2C)
+	  + Crypto Express2 Accelerator (CEX2A)
+
+config ZCRYPT_MONOLITHIC
+	bool "Monolithic zcrypt module"
+	depends on ZCRYPT="m"
+	help
+	  Select this option if you want to have a single module z90crypt.ko
+	  that contains all parts of the crypto device driver (ap bus,
+	  request router and all the card drivers).
 
 endmenu
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 25c1ef6..d0647d1 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -184,7 +184,7 @@
 	device->debug_area = debug_register(device->cdev->dev.bus_id, 1, 2,
 					    8 * sizeof (long));
 	debug_register_view(device->debug_area, &debug_sprintf_view);
-	debug_set_level(device->debug_area, DBF_EMERG);
+	debug_set_level(device->debug_area, DBF_WARNING);
 	DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created");
 
 	device->state = DASD_STATE_BASIC;
@@ -893,7 +893,7 @@
 
 	device = (struct dasd_device *) cqr->device;
 	if (device == NULL ||
-	    device != dasd_device_from_cdev(cdev) ||
+	    device != dasd_device_from_cdev_locked(cdev) ||
 	    strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) {
 		MESSAGE(KERN_DEBUG, "invalid device in request: bus_id %s",
 			cdev->dev.bus_id);
@@ -970,7 +970,7 @@
 	/* first of all check for state change pending interrupt */
 	mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP;
 	if ((irb->scsw.dstat & mask) == mask) {
-		device = dasd_device_from_cdev(cdev);
+		device = dasd_device_from_cdev_locked(cdev);
 		if (!IS_ERR(device)) {
 			dasd_handle_state_change_pending(device);
 			dasd_put_device(device);
@@ -2169,7 +2169,7 @@
 		goto failed;
 	}
 	debug_register_view(dasd_debug_area, &debug_sprintf_view);
-	debug_set_level(dasd_debug_area, DBF_EMERG);
+	debug_set_level(dasd_debug_area, DBF_WARNING);
 
 	DBF_EVENT(DBF_EMERG, "%s", "debug area created");
 
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 9af02c7..91cf971 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -258,8 +258,12 @@
                 return residual_str;
         }
 	if (strncmp("nopav", parsestring, length) == 0) {
-		dasd_nopav = 1;
-		MESSAGE(KERN_INFO, "%s", "disable PAV mode");
+		if (MACHINE_IS_VM)
+			MESSAGE(KERN_INFO, "%s", "'nopav' not supported on VM");
+		else {
+			dasd_nopav = 1;
+			MESSAGE(KERN_INFO, "%s", "disable PAV mode");
+		}
 		return residual_str;
 	}
 	if (strncmp("fixedbuffers", parsestring, length) == 0) {
@@ -523,17 +527,17 @@
 {
 	struct dasd_devmap *devmap;
 	struct dasd_device *device;
+	unsigned long flags;
 	int rc;
 
 	devmap = dasd_devmap_from_cdev(cdev);
 	if (IS_ERR(devmap))
 		return (void *) devmap;
-	cdev->dev.driver_data = devmap;
 
 	device = dasd_alloc_device();
 	if (IS_ERR(device))
 		return device;
-	atomic_set(&device->ref_count, 2);
+	atomic_set(&device->ref_count, 3);
 
 	spin_lock(&dasd_devmap_lock);
 	if (!devmap->device) {
@@ -552,6 +556,11 @@
 		dasd_free_device(device);
 		return ERR_PTR(rc);
 	}
+
+	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	cdev->dev.driver_data = device;
+	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+
 	return device;
 }
 
@@ -569,6 +578,7 @@
 {
 	struct ccw_device *cdev;
 	struct dasd_devmap *devmap;
+	unsigned long flags;
 
 	/* First remove device pointer from devmap. */
 	devmap = dasd_find_busid(device->cdev->dev.bus_id);
@@ -582,9 +592,16 @@
 	devmap->device = NULL;
 	spin_unlock(&dasd_devmap_lock);
 
-	/* Drop ref_count by 2, one for the devmap reference and
-	 * one for the passed reference. */
-	atomic_sub(2, &device->ref_count);
+	/* Disconnect dasd_device structure from ccw_device structure. */
+	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+	device->cdev->dev.driver_data = NULL;
+	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+
+	/*
+	 * Drop ref_count by 3, one for the devmap reference, one for
+	 * the cdev reference and one for the passed reference.
+	 */
+	atomic_sub(3, &device->ref_count);
 
 	/* Wait for reference counter to drop to zero. */
 	wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0);
@@ -593,9 +610,6 @@
 	cdev = device->cdev;
 	device->cdev = NULL;
 
-	/* Disconnect dasd_devmap structure from ccw_device structure. */
-	cdev->dev.driver_data = NULL;
-
 	/* Put ccw_device structure. */
 	put_device(&cdev->dev);
 
@@ -615,21 +629,32 @@
 
 /*
  * Return dasd_device structure associated with cdev.
+ * This function needs to be called with the ccw device
+ * lock held. It can be used from interrupt context.
+ */
+struct dasd_device *
+dasd_device_from_cdev_locked(struct ccw_device *cdev)
+{
+	struct dasd_device *device = cdev->dev.driver_data;
+
+	if (!device)
+		return ERR_PTR(-ENODEV);
+	dasd_get_device(device);
+	return device;
+}
+
+/*
+ * Return dasd_device structure associated with cdev.
  */
 struct dasd_device *
 dasd_device_from_cdev(struct ccw_device *cdev)
 {
-	struct dasd_devmap *devmap;
 	struct dasd_device *device;
+	unsigned long flags;
 
-	device = ERR_PTR(-ENODEV);
-	spin_lock(&dasd_devmap_lock);
-	devmap = cdev->dev.driver_data;
-	if (devmap && devmap->device) {
-		device = devmap->device;
-		dasd_get_device(device);
-	}
-	spin_unlock(&dasd_devmap_lock);
+	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	device = dasd_device_from_cdev_locked(cdev);
+	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
 	return device;
 }
 
@@ -730,16 +755,17 @@
 dasd_discipline_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
-	struct dasd_devmap *devmap;
-	char *dname;
+	struct dasd_device *device;
+	ssize_t len;
 
-	spin_lock(&dasd_devmap_lock);
-	dname = "none";
-	devmap = dev->driver_data;
-	if (devmap && devmap->device && devmap->device->discipline)
-		dname = devmap->device->discipline->name;
-	spin_unlock(&dasd_devmap_lock);
-	return snprintf(buf, PAGE_SIZE, "%s\n", dname);
+	device = dasd_device_from_cdev(to_ccwdev(dev));
+	if (!IS_ERR(device) && device->discipline) {
+		len = snprintf(buf, PAGE_SIZE, "%s\n",
+			       device->discipline->name);
+		dasd_put_device(device);
+	} else
+		len = snprintf(buf, PAGE_SIZE, "none\n");
+	return len;
 }
 
 static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL);
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index da65f1b..e0bf30e 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -678,7 +678,7 @@
 	return 0;
 }
 
-void __exit dasd_eer_exit(void)
+void dasd_eer_exit(void)
 {
 	WARN_ON(misc_deregister(&dasd_eer_dev) != 0);
 }
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 3ccf06d..9f52004 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -534,6 +534,7 @@
 void dasd_remove_sysfs_files(struct ccw_device *);
 
 struct dasd_device *dasd_device_from_cdev(struct ccw_device *);
+struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *);
 struct dasd_device *dasd_device_from_devindex(int);
 
 int dasd_parse(void);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index ca7d51f..cab2c73 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -453,7 +453,7 @@
 		PRINT_WARN("No expanded memory available\n");
 		return -ENODEV;
 	}
-	xpram_pages = xpram_highest_page_index();
+	xpram_pages = xpram_highest_page_index() + 1;
 	PRINT_INFO("  %u pages expanded memory found (%lu KB).\n",
 		   xpram_pages, (unsigned long) xpram_pages*4);
 	rc = xpram_setup_sizes(xpram_pages);
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 0c0162ff..c3e97b4 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -28,3 +28,4 @@
 obj-$(CONFIG_S390_TAPE_34XX) += tape_34xx.o
 obj-$(CONFIG_S390_TAPE_3590) += tape_3590.o
 obj-$(CONFIG_MONREADER) += monreader.o
+obj-$(CONFIG_MONWRITER) += monwriter.o
diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c
new file mode 100644
index 0000000..1e3939a
--- /dev/null
+++ b/drivers/s390/char/monwriter.c
@@ -0,0 +1,292 @@
+/*
+ * drivers/s390/char/monwriter.c
+ *
+ * Character device driver for writing z/VM *MONITOR service records.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <Melissa.Howland@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/ctype.h>
+#include <linux/poll.h>
+#include <asm/uaccess.h>
+#include <asm/ebcdic.h>
+#include <asm/io.h>
+#include <asm/appldata.h>
+#include <asm/monwriter.h>
+
+#define MONWRITE_MAX_DATALEN	4024
+
+static int mon_max_bufs = 255;
+
+struct mon_buf {
+	struct list_head list;
+	struct monwrite_hdr hdr;
+	int diag_done;
+	char *data;
+};
+
+struct mon_private {
+	struct list_head list;
+	struct monwrite_hdr hdr;
+	size_t hdr_to_read;
+	size_t data_to_read;
+	struct mon_buf *current_buf;
+	int mon_buf_count;
+};
+
+/*
+ * helper functions
+ */
+
+static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn)
+{
+	struct appldata_product_id id;
+	int rc;
+
+	strcpy(id.prod_nr, "LNXAPPL");
+	id.prod_fn = myhdr->applid;
+	id.record_nr = myhdr->record_num;
+	id.version_nr = myhdr->version;
+	id.release_nr = myhdr->release;
+	id.mod_lvl = myhdr->mod_level;
+	rc = appldata_asm(&id, fcn, (void *) buffer, myhdr->datalen);
+	if (rc <= 0)
+		return rc;
+	if (rc == 5)
+		return -EPERM;
+	printk("DIAG X'DC' error with return code: %i\n", rc);
+	return -EINVAL;
+}
+
+static inline struct mon_buf *monwrite_find_hdr(struct mon_private *monpriv,
+						struct monwrite_hdr *monhdr)
+{
+	struct mon_buf *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &monpriv->list, list)
+		if (entry->hdr.applid == monhdr->applid &&
+		    entry->hdr.record_num == monhdr->record_num &&
+		    entry->hdr.version == monhdr->version &&
+		    entry->hdr.release == monhdr->release &&
+		    entry->hdr.mod_level == monhdr->mod_level)
+			return entry;
+	return NULL;
+}
+
+static int monwrite_new_hdr(struct mon_private *monpriv)
+{
+	struct monwrite_hdr *monhdr = &monpriv->hdr;
+	struct mon_buf *monbuf;
+	int rc;
+
+	if (monhdr->datalen > MONWRITE_MAX_DATALEN ||
+	    monhdr->mon_function > MONWRITE_START_CONFIG ||
+	    monhdr->hdrlen != sizeof(struct monwrite_hdr))
+		return -EINVAL;
+	monbuf = monwrite_find_hdr(monpriv, monhdr);
+	if (monbuf) {
+		if (monhdr->mon_function == MONWRITE_STOP_INTERVAL) {
+			monhdr->datalen = monbuf->hdr.datalen;
+			rc = monwrite_diag(monhdr, monbuf->data,
+					   APPLDATA_STOP_REC);
+			list_del(&monbuf->list);
+			monpriv->mon_buf_count--;
+			kfree(monbuf->data);
+			kfree(monbuf);
+			monbuf = NULL;
+		}
+	} else {
+		if (monpriv->mon_buf_count >= mon_max_bufs)
+			return -ENOSPC;
+		monbuf = kzalloc(sizeof(struct mon_buf), GFP_KERNEL);
+		if (!monbuf)
+			return -ENOMEM;
+		monbuf->data = kzalloc(monbuf->hdr.datalen,
+				       GFP_KERNEL | GFP_DMA);
+		if (!monbuf->data) {
+			kfree(monbuf);
+			return -ENOMEM;
+		}
+		monbuf->hdr = *monhdr;
+		list_add_tail(&monbuf->list, &monpriv->list);
+		monpriv->mon_buf_count++;
+	}
+	monpriv->current_buf = monbuf;
+	return 0;
+}
+
+static int monwrite_new_data(struct mon_private *monpriv)
+{
+	struct monwrite_hdr *monhdr = &monpriv->hdr;
+	struct mon_buf *monbuf = monpriv->current_buf;
+	int rc = 0;
+
+	switch (monhdr->mon_function) {
+	case MONWRITE_START_INTERVAL:
+		if (!monbuf->diag_done) {
+			rc = monwrite_diag(monhdr, monbuf->data,
+					   APPLDATA_START_INTERVAL_REC);
+			monbuf->diag_done = 1;
+		}
+		break;
+	case MONWRITE_START_CONFIG:
+		if (!monbuf->diag_done) {
+			rc = monwrite_diag(monhdr, monbuf->data,
+					   APPLDATA_START_CONFIG_REC);
+			monbuf->diag_done = 1;
+		}
+		break;
+	case MONWRITE_GEN_EVENT:
+		rc = monwrite_diag(monhdr, monbuf->data,
+				   APPLDATA_GEN_EVENT_REC);
+		list_del(&monpriv->current_buf->list);
+		kfree(monpriv->current_buf->data);
+		kfree(monpriv->current_buf);
+		monpriv->current_buf = NULL;
+		break;
+	default:
+		/* monhdr->mon_function is checked in monwrite_new_hdr */
+		BUG();
+	}
+	return rc;
+}
+
+/*
+ * file operations
+ */
+
+static int monwrite_open(struct inode *inode, struct file *filp)
+{
+	struct mon_private *monpriv;
+
+	monpriv = kzalloc(sizeof(struct mon_private), GFP_KERNEL);
+	if (!monpriv)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&monpriv->list);
+	monpriv->hdr_to_read = sizeof(monpriv->hdr);
+	filp->private_data = monpriv;
+	return nonseekable_open(inode, filp);
+}
+
+static int monwrite_close(struct inode *inode, struct file *filp)
+{
+	struct mon_private *monpriv = filp->private_data;
+	struct mon_buf *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &monpriv->list, list) {
+		if (entry->hdr.mon_function != MONWRITE_GEN_EVENT)
+			monwrite_diag(&entry->hdr, entry->data,
+				      APPLDATA_STOP_REC);
+		monpriv->mon_buf_count--;
+		list_del(&entry->list);
+		kfree(entry->data);
+		kfree(entry);
+	}
+	kfree(monpriv);
+	return 0;
+}
+
+static ssize_t monwrite_write(struct file *filp, const char __user *data,
+			      size_t count, loff_t *ppos)
+{
+	struct mon_private *monpriv = filp->private_data;
+	size_t len, written;
+	void *to;
+	int rc;
+
+	for (written = 0; written < count; ) {
+		if (monpriv->hdr_to_read) {
+			len = min(count - written, monpriv->hdr_to_read);
+			to = (char *) &monpriv->hdr +
+				sizeof(monpriv->hdr) - monpriv->hdr_to_read;
+			if (copy_from_user(to, data + written, len)) {
+				rc = -EFAULT;
+				goto out_error;
+			}
+			monpriv->hdr_to_read -= len;
+			written += len;
+			if (monpriv->hdr_to_read > 0)
+				continue;
+			rc = monwrite_new_hdr(monpriv);
+			if (rc)
+				goto out_error;
+			monpriv->data_to_read = monpriv->current_buf ?
+				monpriv->current_buf->hdr.datalen : 0;
+		}
+
+		if (monpriv->data_to_read) {
+			len = min(count - written, monpriv->data_to_read);
+			to = monpriv->current_buf->data +
+				monpriv->hdr.datalen - monpriv->data_to_read;
+			if (copy_from_user(to, data + written, len)) {
+				rc = -EFAULT;
+				goto out_error;
+			}
+			monpriv->data_to_read -= len;
+			written += len;
+			if (monpriv->data_to_read > 0)
+				continue;
+			rc = monwrite_new_data(monpriv);
+			if (rc)
+				goto out_error;
+		}
+		monpriv->hdr_to_read = sizeof(monpriv->hdr);
+	}
+	return written;
+
+out_error:
+	monpriv->data_to_read = 0;
+	monpriv->hdr_to_read = sizeof(struct monwrite_hdr);
+	return rc;
+}
+
+static struct file_operations monwrite_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = &monwrite_open,
+	.release = &monwrite_close,
+	.write	 = &monwrite_write,
+};
+
+static struct miscdevice mon_dev = {
+	.name	= "monwriter",
+	.fops	= &monwrite_fops,
+	.minor	= MISC_DYNAMIC_MINOR,
+};
+
+/*
+ * module init/exit
+ */
+
+static int __init mon_init(void)
+{
+	if (MACHINE_IS_VM)
+		return misc_register(&mon_dev);
+	else
+		return -ENODEV;
+}
+
+static void __exit mon_exit(void)
+{
+	WARN_ON(misc_deregister(&mon_dev) != 0);
+}
+
+module_init(mon_init);
+module_exit(mon_exit);
+
+module_param_named(max_bufs, mon_max_bufs, int, 0644);
+MODULE_PARM_DESC(max_bufs, "Maximum number of sample monitor data buffers"
+		 "that can be active at one time");
+
+MODULE_AUTHOR("Melissa Howland <Melissa.Howland@us.ibm.com>");
+MODULE_DESCRIPTION("Character device driver for writing z/VM "
+		   "APPLDATA monitor records.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 19762f3..1678b6c 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2004,2005 IBM Corporation
- * Interface implementation for communication with the v/VM control program
+ * Interface implementation for communication with the z/VM control program
  * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
  *
  *
diff --git a/drivers/s390/char/vmcp.h b/drivers/s390/char/vmcp.h
index 87389e7..8a5975f 100644
--- a/drivers/s390/char/vmcp.h
+++ b/drivers/s390/char/vmcp.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2004, 2005 IBM Corporation
- * Interface implementation for communication with the v/VM control program
+ * Interface implementation for communication with the z/VM control program
  * Version 1.0
  * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
  *
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index c28444a..3bb4e47 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -256,7 +256,7 @@
 	/* trigger path verification. */
 	if (sch->driver && sch->driver->verify)
 		sch->driver->verify(&sch->dev);
-	else if (sch->vpm == mask)
+	else if (sch->lpm == mask)
 		goto out_unreg;
 out_unlock:
 	spin_unlock_irq(&sch->lock);
@@ -378,6 +378,7 @@
 
 	if (chp_mask == 0) {
 		spin_unlock_irq(&sch->lock);
+		put_device(&sch->dev);
 		return 0;
 	}
 	old_lpm = sch->lpm;
@@ -392,7 +393,7 @@
 
 	spin_unlock_irq(&sch->lock);
 	put_device(&sch->dev);
-	return (res_data->fla_mask == 0xffff) ? -ENODEV : 0;
+	return 0;
 }
 
 
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 89320c1..2e2882d 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -16,11 +16,10 @@
 #include <linux/device.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-
 #include <asm/cio.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
-
+#include <asm/setup.h>
 #include "airq.h"
 #include "cio.h"
 #include "css.h"
@@ -192,7 +191,7 @@
 	sch->orb.pfch = sch->options.prefetch == 0;
 	sch->orb.spnd = sch->options.suspend;
 	sch->orb.ssic = sch->options.suspend && sch->options.inter;
-	sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm;
+	sch->orb.lpm = (lpm != 0) ? lpm : sch->lpm;
 #ifdef CONFIG_64BIT
 	/*
 	 * for 64 bit we always support 64 bit IDAWs with 4k page size only
@@ -570,10 +569,7 @@
 	sch->opm = 0xff;
 	if (!cio_is_console(sch->schid))
 		chsc_validate_chpids(sch);
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 
 	CIO_DEBUG(KERN_INFO, 0,
 		  "Detected device %04x on subchannel 0.%x.%04X"
@@ -841,14 +837,26 @@
 	return -EBUSY;
 }
 
-extern void do_reipl(unsigned long devno);
-static int
-__shutdown_subchannel_easy(struct subchannel_id schid, void *data)
+struct sch_match_id {
+	struct subchannel_id schid;
+	struct ccw_dev_id devid;
+	int rc;
+};
+
+static int __shutdown_subchannel_easy_and_match(struct subchannel_id schid,
+	void *data)
 {
 	struct schib schib;
+	struct sch_match_id *match_id = data;
 
 	if (stsch_err(schid, &schib))
 		return -ENXIO;
+	if (match_id && schib.pmcw.dnv &&
+		(schib.pmcw.dev == match_id->devid.devno) &&
+		(schid.ssid == match_id->devid.ssid)) {
+		match_id->schid = schid;
+		match_id->rc = 0;
+	}
 	if (!schib.pmcw.ena)
 		return 0;
 	switch(__disable_subchannel_easy(schid, &schib)) {
@@ -864,18 +872,71 @@
 	return 0;
 }
 
-void
-clear_all_subchannels(void)
+static int clear_all_subchannels_and_match(struct ccw_dev_id *devid,
+	struct subchannel_id *schid)
 {
+	struct sch_match_id match_id;
+
+	match_id.devid = *devid;
+	match_id.rc = -ENODEV;
 	local_irq_disable();
-	for_each_subchannel(__shutdown_subchannel_easy, NULL);
+	for_each_subchannel(__shutdown_subchannel_easy_and_match, &match_id);
+	if (match_id.rc == 0)
+		*schid = match_id.schid;
+	return match_id.rc;
 }
 
-/* Make sure all subchannels are quiet before we re-ipl an lpar. */
-void
-reipl(unsigned long devno)
+
+void clear_all_subchannels(void)
 {
-	clear_all_subchannels();
+	local_irq_disable();
+	for_each_subchannel(__shutdown_subchannel_easy_and_match, NULL);
+}
+
+extern void do_reipl_asm(__u32 schid);
+
+/* Make sure all subchannels are quiet before we re-ipl an lpar. */
+void reipl_ccw_dev(struct ccw_dev_id *devid)
+{
+	struct subchannel_id schid;
+
+	if (clear_all_subchannels_and_match(devid, &schid))
+		panic("IPL Device not found\n");
 	cio_reset_channel_paths();
-	do_reipl(devno);
+	do_reipl_asm(*((__u32*)&schid));
+}
+
+extern struct schib ipl_schib;
+
+/*
+ * ipl_save_parameters gets called very early. It is not allowed to access
+ * anything in the bss section at all. The bss section is not cleared yet,
+ * but may contain some ipl parameters written by the firmware.
+ * These parameters (if present) are copied to 0x2000.
+ * To avoid corruption of the ipl parameters, all variables used by this
+ * function must reside on the stack or in the data section.
+ */
+void ipl_save_parameters(void)
+{
+	struct subchannel_id schid;
+	unsigned int *ipl_ptr;
+	void *src, *dst;
+
+	schid = *(struct subchannel_id *)__LC_SUBCHANNEL_ID;
+	if (!schid.one)
+		return;
+	if (stsch(schid, &ipl_schib))
+		return;
+	if (!ipl_schib.pmcw.dnv)
+		return;
+	ipl_devno = ipl_schib.pmcw.dev;
+	ipl_flags |= IPL_DEVNO_VALID;
+	if (!ipl_schib.pmcw.qf)
+		return;
+	ipl_flags |= IPL_PARMBLOCK_VALID;
+	ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR;
+	src = (void *)(unsigned long)*ipl_ptr;
+	dst = (void *)IPL_PARMBLOCK_ORIGIN;
+	memmove(dst, src, PAGE_SIZE);
+	*ipl_ptr = IPL_PARMBLOCK_ORIGIN;
 }
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 13eeea3..7086a74 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -182,136 +182,141 @@
 	return dev ? to_subchannel(dev) : NULL;
 }
 
-
-static inline int
-css_get_subchannel_status(struct subchannel *sch, struct subchannel_id schid)
+static inline int css_get_subchannel_status(struct subchannel *sch)
 {
 	struct schib schib;
-	int cc;
 
-	cc = stsch(schid, &schib);
-	if (cc)
+	if (stsch(sch->schid, &schib) || !schib.pmcw.dnv)
 		return CIO_GONE;
-	if (!schib.pmcw.dnv)
-		return CIO_GONE;
-	if (sch && sch->schib.pmcw.dnv &&
-	    (schib.pmcw.dev != sch->schib.pmcw.dev))
+	if (sch->schib.pmcw.dnv && (schib.pmcw.dev != sch->schib.pmcw.dev))
 		return CIO_REVALIDATE;
-	if (sch && !sch->lpm)
+	if (!sch->lpm)
 		return CIO_NO_PATH;
 	return CIO_OPER;
 }
-	
-static int
-css_evaluate_subchannel(struct subchannel_id schid, int slow)
+
+static int css_evaluate_known_subchannel(struct subchannel *sch, int slow)
 {
 	int event, ret, disc;
-	struct subchannel *sch;
 	unsigned long flags;
+	enum { NONE, UNREGISTER, UNREGISTER_PROBE, REPROBE } action;
 
-	sch = get_subchannel_by_schid(schid);
-	disc = sch ? device_is_disconnected(sch) : 0;
+	spin_lock_irqsave(&sch->lock, flags);
+	disc = device_is_disconnected(sch);
 	if (disc && slow) {
-		if (sch)
-			put_device(&sch->dev);
-		return 0; /* Already processed. */
+		/* Disconnected devices are evaluated directly only.*/
+		spin_unlock_irqrestore(&sch->lock, flags);
+		return 0;
 	}
-	/*
-	 * We've got a machine check, so running I/O won't get an interrupt.
-	 * Kill any pending timers.
-	 */
-	if (sch)
-		device_kill_pending_timer(sch);
+	/* No interrupt after machine check - kill pending timers. */
+	device_kill_pending_timer(sch);
 	if (!disc && !slow) {
-		if (sch)
-			put_device(&sch->dev);
-		return -EAGAIN; /* Will be done on the slow path. */
+		/* Non-disconnected devices are evaluated on the slow path. */
+		spin_unlock_irqrestore(&sch->lock, flags);
+		return -EAGAIN;
 	}
-	event = css_get_subchannel_status(sch, schid);
+	event = css_get_subchannel_status(sch);
 	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, %s, %s path.\n",
-		      schid.ssid, schid.sch_no, event,
-		      sch?(disc?"disconnected":"normal"):"unknown",
-		      slow?"slow":"fast");
+		      sch->schid.ssid, sch->schid.sch_no, event,
+		      disc ? "disconnected" : "normal",
+		      slow ? "slow" : "fast");
+	/* Analyze subchannel status. */
+	action = NONE;
 	switch (event) {
 	case CIO_NO_PATH:
+		if (disc) {
+			/* Check if paths have become available. */
+			action = REPROBE;
+			break;
+		}
+		/* fall through */
 	case CIO_GONE:
-		if (!sch) {
-			/* Never used this subchannel. Ignore. */
-			ret = 0;
-			break;
-		}
-		if (disc && (event == CIO_NO_PATH)) {
-			/*
-			 * Uargh, hack again. Because we don't get a machine
-			 * check on configure on, our path bookkeeping can
-			 * be out of date here (it's fine while we only do
-			 * logical varying or get chsc machine checks). We
-			 * need to force reprobing or we might miss devices
-			 * coming operational again. It won't do harm in real
-			 * no path situations.
-			 */
-			spin_lock_irqsave(&sch->lock, flags);
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-			ret = 0;
-			break;
-		}
-		if (sch->driver && sch->driver->notify &&
-		    sch->driver->notify(&sch->dev, event)) {
-			cio_disable_subchannel(sch);
-			device_set_disconnected(sch);
-			ret = 0;
-			break;
-		}
-		/*
-		 * Unregister subchannel.
-		 * The device will be killed automatically.
-		 */
+		/* Prevent unwanted effects when opening lock. */
 		cio_disable_subchannel(sch);
+		device_set_disconnected(sch);
+		/* Ask driver what to do with device. */
+		action = UNREGISTER;
+		if (sch->driver && sch->driver->notify) {
+			spin_unlock_irqrestore(&sch->lock, flags);
+			ret = sch->driver->notify(&sch->dev, event);
+			spin_lock_irqsave(&sch->lock, flags);
+			if (ret)
+				action = NONE;
+		}
+		break;
+	case CIO_REVALIDATE:
+		/* Device will be removed, so no notify necessary. */
+		if (disc)
+			/* Reprobe because immediate unregister might block. */
+			action = REPROBE;
+		else
+			action = UNREGISTER_PROBE;
+		break;
+	case CIO_OPER:
+		if (disc)
+			/* Get device operational again. */
+			action = REPROBE;
+		break;
+	}
+	/* Perform action. */
+	ret = 0;
+	switch (action) {
+	case UNREGISTER:
+	case UNREGISTER_PROBE:
+		/* Unregister device (will use subchannel lock). */
+		spin_unlock_irqrestore(&sch->lock, flags);
 		css_sch_device_unregister(sch);
+		spin_lock_irqsave(&sch->lock, flags);
+
 		/* Reset intparm to zeroes. */
 		sch->schib.pmcw.intparm = 0;
 		cio_modify(sch);
-		put_device(&sch->dev);
-		ret = 0;
+
+		/* Probe if necessary. */
+		if (action == UNREGISTER_PROBE)
+			ret = css_probe_device(sch->schid);
 		break;
-	case CIO_REVALIDATE:
-		/* 
-		 * Revalidation machine check. Sick.
-		 * We don't notify the driver since we have to throw the device
-		 * away in any case.
-		 */
-		if (!disc) {
-			css_sch_device_unregister(sch);
-			/* Reset intparm to zeroes. */
-			sch->schib.pmcw.intparm = 0;
-			cio_modify(sch);
-			put_device(&sch->dev);
-			ret = css_probe_device(schid);
-		} else {
-			/*
-			 * We can't immediately deregister the disconnected
-			 * device since it might block.
-			 */
-			spin_lock_irqsave(&sch->lock, flags);
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-			ret = 0;
-		}
-		break;
-	case CIO_OPER:
-		if (disc) {
-			spin_lock_irqsave(&sch->lock, flags);
-			/* Get device operational again. */
-			device_trigger_reprobe(sch);
-			spin_unlock_irqrestore(&sch->lock, flags);
-		}
-		ret = sch ? 0 : css_probe_device(schid);
+	case REPROBE:
+		device_trigger_reprobe(sch);
 		break;
 	default:
-		BUG();
-		ret = 0;
+		break;
 	}
+	spin_unlock_irqrestore(&sch->lock, flags);
+
+	return ret;
+}
+
+static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
+{
+	struct schib schib;
+
+	if (!slow) {
+		/* Will be done on the slow path. */
+		return -EAGAIN;
+	}
+	if (stsch(schid, &schib) || !schib.pmcw.dnv) {
+		/* Unusable - ignore. */
+		return 0;
+	}
+	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, unknown, "
+			 "slow path.\n", schid.ssid, schid.sch_no, CIO_OPER);
+
+	return css_probe_device(schid);
+}
+
+static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
+{
+	struct subchannel *sch;
+	int ret;
+
+	sch = get_subchannel_by_schid(schid);
+	if (sch) {
+		ret = css_evaluate_known_subchannel(sch, slow);
+		put_device(&sch->dev);
+	} else
+		ret = css_evaluate_new_subchannel(schid, slow);
+
 	return ret;
 }
 
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 646da56..6889456 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -52,53 +52,81 @@
 	return 1;
 }
 
-/*
- * Hotplugging interface for ccw devices.
- * Heavily modeled on pci and usb hotplug.
- */
-static int
-ccw_uevent (struct device *dev, char **envp, int num_envp,
-	     char *buffer, int buffer_size)
+/* Store modalias string delimited by prefix/suffix string into buffer with
+ * specified size. Return length of resulting string (excluding trailing '\0')
+ * even if string doesn't fit buffer (snprintf semantics). */
+static int snprint_alias(char *buf, size_t size, const char *prefix,
+			 struct ccw_device_id *id, const char *suffix)
+{
+	int len;
+
+	len = snprintf(buf, size, "%sccw:t%04Xm%02X", prefix, id->cu_type,
+		       id->cu_model);
+	if (len > size)
+		return len;
+	buf += len;
+	size -= len;
+
+	if (id->dev_type != 0)
+		len += snprintf(buf, size, "dt%04Xdm%02X%s", id->dev_type,
+				id->dev_model, suffix);
+	else
+		len += snprintf(buf, size, "dtdm%s", suffix);
+
+	return len;
+}
+
+/* Set up environment variables for ccw device uevent. Return 0 on success,
+ * non-zero otherwise. */
+static int ccw_uevent(struct device *dev, char **envp, int num_envp,
+		      char *buffer, int buffer_size)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
+	struct ccw_device_id *id = &(cdev->id);
 	int i = 0;
-	int length = 0;
+	int len;
 
-	if (!cdev)
-		return -ENODEV;
-
-	/* what we want to pass to /sbin/hotplug */
-
-	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "CU_TYPE=%04X",
-			   cdev->id.cu_type);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	/* CU_TYPE= */
+	len = snprintf(buffer, buffer_size, "CU_TYPE=%04X", id->cu_type) + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
-	++length;
-	buffer += length;
-
 	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "CU_MODEL=%02X",
-			   cdev->id.cu_model);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	buffer += len;
+	buffer_size -= len;
+
+	/* CU_MODEL= */
+	len = snprintf(buffer, buffer_size, "CU_MODEL=%02X", id->cu_model) + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
-	++length;
-	buffer += length;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
 
 	/* The next two can be zero, that's ok for us */
-	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "DEV_TYPE=%04X",
-			   cdev->id.dev_type);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	/* DEV_TYPE= */
+	len = snprintf(buffer, buffer_size, "DEV_TYPE=%04X", id->dev_type) + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
-	++length;
-	buffer += length;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
 
-	envp[i++] = buffer;
-	length += scnprintf(buffer, buffer_size - length, "DEV_MODEL=%02X",
-			   cdev->id.dev_model);
-	if ((buffer_size - length <= 0) || (i >= num_envp))
+	/* DEV_MODEL= */
+	len = snprintf(buffer, buffer_size, "DEV_MODEL=%02X",
+			(unsigned char) id->dev_model) + 1;
+	if (len > buffer_size || i >= num_envp)
 		return -ENOMEM;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
+
+	/* MODALIAS=  */
+	len = snprint_alias(buffer, buffer_size, "MODALIAS=", id, "") + 1;
+	if (len > buffer_size || i >= num_envp)
+		return -ENOMEM;
+	envp[i++] = buffer;
+	buffer += len;
+	buffer_size -= len;
 
 	envp[i] = NULL;
 
@@ -251,16 +279,11 @@
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
 	struct ccw_device_id *id = &(cdev->id);
-	int ret;
+	int len;
 
-	ret = sprintf(buf, "ccw:t%04Xm%02X",
-			id->cu_type, id->cu_model);
-	if (id->dev_type != 0)
-		ret += sprintf(buf + ret, "dt%04Xdm%02X\n",
-				id->dev_type, id->dev_model);
-	else
-		ret += sprintf(buf + ret, "dtdm\n");
-	return ret;
+	len = snprint_alias(buf, PAGE_SIZE, "", id, "\n") + 1;
+
+	return len > PAGE_SIZE ? PAGE_SIZE : len;
 }
 
 static ssize_t
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 35e162b..dace46f 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -232,10 +232,7 @@
 	 */
 	old_lpm = sch->lpm;
 	stsch(sch->schid, &sch->schib);
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Check since device may again have become not operational. */
 	if (!sch->schib.pmcw.dnv)
 		state = DEV_STATE_NOT_OPER;
@@ -267,6 +264,7 @@
 			notify = 1;
 		}
 		/* fill out sense information */
+		memset(&cdev->id, 0, sizeof(cdev->id));
 		cdev->id.cu_type   = cdev->private->senseid.cu_type;
 		cdev->id.cu_model  = cdev->private->senseid.cu_model;
 		cdev->id.dev_type  = cdev->private->senseid.dev_type;
@@ -454,8 +452,8 @@
 		return;
 	}
 	/* Start Path Group verification. */
-	sch->vpm = 0;	/* Start with no path groups set. */
 	cdev->private->state = DEV_STATE_VERIFY;
+	cdev->private->flags.doverify = 0;
 	ccw_device_verify_start(cdev);
 }
 
@@ -555,7 +553,19 @@
 void
 ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
-	cdev->private->flags.doverify = 0;
+	struct subchannel *sch;
+
+	sch = to_subchannel(cdev->dev.parent);
+	/* Update schib - pom may have changed. */
+	stsch(sch->schid, &sch->schib);
+	/* Update lpm with verified path mask. */
+	sch->lpm = sch->vpm;
+	/* Repeat path verification? */
+	if (cdev->private->flags.doverify) {
+		cdev->private->flags.doverify = 0;
+		ccw_device_verify_start(cdev);
+		return;
+	}
 	switch (err) {
 	case -EOPNOTSUPP: /* path grouping not supported, just set online. */
 		cdev->private->options.pgroup = 0;
@@ -613,6 +623,7 @@
 	if (!cdev->private->options.pgroup) {
 		/* Start initial path verification. */
 		cdev->private->state = DEV_STATE_VERIFY;
+		cdev->private->flags.doverify = 0;
 		ccw_device_verify_start(cdev);
 		return 0;
 	}
@@ -659,7 +670,6 @@
 	/* Are we doing path grouping? */
 	if (!cdev->private->options.pgroup) {
 		/* No, set state offline immediately. */
-		sch->vpm = 0;
 		ccw_device_done(cdev, DEV_STATE_OFFLINE);
 		return 0;
 	}
@@ -780,6 +790,7 @@
 	}
 	/* Device is idle, we can do the path verification. */
 	cdev->private->state = DEV_STATE_VERIFY;
+	cdev->private->flags.doverify = 0;
 	ccw_device_verify_start(cdev);
 }
 
@@ -1042,9 +1053,9 @@
 }
 
 static void
-ccw_device_wait4io_verify(struct ccw_device *cdev, enum dev_event dev_event)
+ccw_device_delay_verify(struct ccw_device *cdev, enum dev_event dev_event)
 {
-	/* When the I/O has terminated, we have to start verification. */
+	/* Start verification after current task finished. */
 	cdev->private->flags.doverify = 1;
 }
 
@@ -1110,10 +1121,7 @@
 	 * The pim, pam, pom values may not be accurate, but they are the best
 	 * we have before performing device selection :/
 	 */
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
+	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 	/* Re-set some bits in the pmcw that were lost. */
 	sch->schib.pmcw.isc = 3;
 	sch->schib.pmcw.csense = 1;
@@ -1237,7 +1245,7 @@
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_verify_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_onoff_timeout,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_ONLINE] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
@@ -1280,7 +1288,7 @@
 		[DEV_EVENT_NOTOPER]	= ccw_device_online_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_wait4io_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_wait4io_timeout,
-		[DEV_EVENT_VERIFY]	= ccw_device_wait4io_verify,
+		[DEV_EVENT_VERIFY]	= ccw_device_delay_verify,
 	},
 	[DEV_STATE_QUIESCE] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_quiesce_done,
@@ -1293,7 +1301,7 @@
 		[DEV_EVENT_NOTOPER]	= ccw_device_nop,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_start_id,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_bug,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_start_id,
 	},
 	[DEV_STATE_DISCONNECTED_SENSE_ID] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_recog_notoper,
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 9e3de0b..93a897e 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -96,6 +96,12 @@
 	ret = cio_set_options (sch, flags);
 	if (ret)
 		return ret;
+	/* Adjust requested path mask to excluded varied off paths. */
+	if (lpm) {
+		lpm &= sch->opm;
+		if (lpm == 0)
+			return -EACCES;
+	}
 	ret = cio_start_key (sch, cpa, lpm, key);
 	if (ret == 0)
 		cdev->private->intparm = intparm;
@@ -250,7 +256,7 @@
 	if (!sch)
 		return 0;
 	else
-		return sch->vpm;
+		return sch->lpm;
 }
 
 static void
@@ -304,7 +310,7 @@
 	sch = to_subchannel(cdev->dev.parent);
 	do {
 		ret = cio_start (sch, ccw, lpm);
-		if ((ret == -EBUSY) || (ret == -EACCES)) {
+		if (ret == -EBUSY) {
 			/* Try again later. */
 			spin_unlock_irq(&sch->lock);
 			msleep(10);
@@ -433,6 +439,13 @@
 	if (!ciw || ciw->cmd == 0)
 		return -EOPNOTSUPP;
 
+	/* Adjust requested path mask to excluded varied off paths. */
+	if (lpm) {
+		lpm &= sch->opm;
+		if (lpm == 0)
+			return -EACCES;
+	}
+
 	rcd_ccw = kzalloc(sizeof(struct ccw1), GFP_KERNEL | GFP_DMA);
 	if (!rcd_ccw)
 		return -ENOMEM;
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index 1693a10..8ca2d07 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -245,18 +245,17 @@
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
 
 	/* Try multiple times. */
-	ret = -ENODEV;
+	ret = -EACCES;
 	if (cdev->private->iretry > 0) {
 		cdev->private->iretry--;
 		ret = cio_start (sch, cdev->private->iccws,
 				 cdev->private->imask);
-		/* ret is 0, -EBUSY, -EACCES or -ENODEV */
-		if ((ret != -EACCES) && (ret != -ENODEV))
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
+		if ((ret == 0) || (ret == -EBUSY))
 			return ret;
 	}
-	/* PGID command failed on this path. Switch it off. */
-	sch->lpm &= ~cdev->private->imask;
-	sch->vpm &= ~cdev->private->imask;
+	/* PGID command failed on this path. */
 	CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
 		      cdev->private->devno, sch->schid.ssid,
@@ -286,18 +285,17 @@
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
 
 	/* Try multiple times. */
-	ret = -ENODEV;
+	ret = -EACCES;
 	if (cdev->private->iretry > 0) {
 		cdev->private->iretry--;
 		ret = cio_start (sch, cdev->private->iccws,
 				 cdev->private->imask);
-		/* ret is 0, -EBUSY, -EACCES or -ENODEV */
-		if ((ret != -EACCES) && (ret != -ENODEV))
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
+		if ((ret == 0) || (ret == -EBUSY))
 			return ret;
 	}
-	/* nop command failed on this path. Switch it off. */
-	sch->lpm &= ~cdev->private->imask;
-	sch->vpm &= ~cdev->private->imask;
+	/* nop command failed on this path. */
 	CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
 		      cdev->private->devno, sch->schid.ssid,
@@ -372,27 +370,32 @@
 __ccw_device_verify_start(struct ccw_device *cdev)
 {
 	struct subchannel *sch;
-	__u8 imask, func;
+	__u8 func;
 	int ret;
 
 	sch = to_subchannel(cdev->dev.parent);
-	while (sch->vpm != sch->lpm) {
-		/* Find first unequal bit in vpm vs. lpm */
-		for (imask = 0x80; imask != 0; imask >>= 1)
-			if ((sch->vpm & imask) != (sch->lpm & imask))
-				break;
-		cdev->private->imask = imask;
+	/* Repeat for all paths. */
+	for (; cdev->private->imask; cdev->private->imask >>= 1,
+				     cdev->private->iretry = 5) {
+		if ((cdev->private->imask & sch->schib.pmcw.pam) == 0)
+			/* Path not available, try next. */
+			continue;
 		if (cdev->private->options.pgroup) {
-			func = (sch->vpm & imask) ?
-				SPID_FUNC_RESIGN : SPID_FUNC_ESTABLISH;
+			if (sch->opm & cdev->private->imask)
+				func = SPID_FUNC_ESTABLISH;
+			else
+				func = SPID_FUNC_RESIGN;
 			ret = __ccw_device_do_pgid(cdev, func);
 		} else
 			ret = __ccw_device_do_nop(cdev);
+		/* We expect an interrupt in case of success or busy
+		 * indication. */
 		if (ret == 0 || ret == -EBUSY)
 			return;
-		cdev->private->iretry = 5;
+		/* Permanent path failure, try next. */
 	}
-	ccw_device_verify_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV);
+	/* Done with all paths. */
+	ccw_device_verify_done(cdev, (sch->vpm != 0) ? 0 : -ENODEV);
 }
 		
 /*
@@ -421,14 +424,14 @@
 	else
 		ret = __ccw_device_check_nop(cdev);
 	memset(&cdev->private->irb, 0, sizeof(struct irb));
+
 	switch (ret) {
 	/* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */
 	case 0:
-		/* Establish or Resign Path Group done. Update vpm. */
-		if ((sch->lpm & cdev->private->imask) != 0)
-			sch->vpm |= cdev->private->imask;
-		else
-			sch->vpm &= ~cdev->private->imask;
+		/* Path verification ccw finished successfully, update lpm. */
+		sch->vpm |= sch->opm & cdev->private->imask;
+		/* Go on with next path. */
+		cdev->private->imask >>= 1;
 		cdev->private->iretry = 5;
 		__ccw_device_verify_start(cdev);
 		break;
@@ -441,6 +444,10 @@
 			cdev->private->options.pgroup = 0;
 		else
 			cdev->private->flags.pgid_single = 1;
+		/* Retry */
+		sch->vpm = 0;
+		cdev->private->imask = 0x80;
+		cdev->private->iretry = 5;
 		/* fall through. */
 	case -EAGAIN:		/* Try again. */
 		__ccw_device_verify_start(cdev);
@@ -449,8 +456,7 @@
 		ccw_device_verify_done(cdev, -ETIME);
 		break;
 	case -EACCES:		/* channel is not operational. */
-		sch->lpm &= ~cdev->private->imask;
-		sch->vpm &= ~cdev->private->imask;
+		cdev->private->imask >>= 1;
 		cdev->private->iretry = 5;
 		__ccw_device_verify_start(cdev);
 		break;
@@ -463,19 +469,17 @@
 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
 
 	cdev->private->flags.pgid_single = 0;
+	cdev->private->imask = 0x80;
 	cdev->private->iretry = 5;
-	/*
-	 * Update sch->lpm with current values to catch paths becoming
-	 * available again.
-	 */
+
+	/* Start with empty vpm. */
+	sch->vpm = 0;
+
+	/* Get current pam. */
 	if (stsch(sch->schid, &sch->schib)) {
 		ccw_device_verify_done(cdev, -ENODEV);
 		return;
 	}
-	sch->lpm = sch->schib.pmcw.pim &
-		sch->schib.pmcw.pam &
-		sch->schib.pmcw.pom &
-		sch->opm;
 	__ccw_device_verify_start(cdev);
 }
 
@@ -524,7 +528,6 @@
 	switch (ret) {
 	/* 0, -ETIME, -EAGAIN, -EOPNOTSUPP or -EACCES */
 	case 0:			/* disband successful. */
-		sch->vpm = 0;
 		ccw_device_disband_done(cdev, ret);
 		break;
 	case -EOPNOTSUPP:
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index 7c93a87..cde822d 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -115,7 +115,7 @@
 static inline __u64 
 qdio_get_micros(void)
 {
-        return (get_clock() >> 10); /* time>>12 is microseconds */
+	return (get_clock() >> 12); /* time>>12 is microseconds */
 }
 
 /* 
@@ -1129,7 +1129,7 @@
 
 #ifdef QDIO_USE_PROCESSING_STATE
 	if (last_position>=0)
-		set_slsb(q, &last_position, SLSB_P_INPUT_NOT_INIT, &count);
+		set_slsb(q, &last_position, SLSB_P_INPUT_PROCESSING, &count);
 #endif /* QDIO_USE_PROCESSING_STATE */
 
 	QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index ceb3ab3..1245693 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -191,49 +191,49 @@
 #if QDIO_VERBOSE_LEVEL>8
 #define QDIO_PRINT_STUPID(x...) printk( KERN_DEBUG QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_STUPID(x...)
+#define QDIO_PRINT_STUPID(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>7
 #define QDIO_PRINT_ALL(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_ALL(x...)
+#define QDIO_PRINT_ALL(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>6
 #define QDIO_PRINT_INFO(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_INFO(x...)
+#define QDIO_PRINT_INFO(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>5
 #define QDIO_PRINT_WARN(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_WARN(x...)
+#define QDIO_PRINT_WARN(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>4
 #define QDIO_PRINT_ERR(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_ERR(x...)
+#define QDIO_PRINT_ERR(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>3
 #define QDIO_PRINT_CRIT(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_CRIT(x...)
+#define QDIO_PRINT_CRIT(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>2
 #define QDIO_PRINT_ALERT(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_ALERT(x...)
+#define QDIO_PRINT_ALERT(x...) do { } while (0)
 #endif
 
 #if QDIO_VERBOSE_LEVEL>1
 #define QDIO_PRINT_EMERG(x...) printk( QDIO_PRINTK_HEADER x)
 #else
-#define QDIO_PRINT_EMERG(x...)
+#define QDIO_PRINT_EMERG(x...) do { } while (0)
 #endif
 
 #define HEXDUMP16(importance,header,ptr) \
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 15edebb..f0a12d2 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -2,5 +2,16 @@
 # S/390 crypto devices
 #
 
-z90crypt-objs := z90main.o z90hardware.o
-obj-$(CONFIG_Z90CRYPT) += z90crypt.o
+ifdef CONFIG_ZCRYPT_MONOLITHIC
+
+z90crypt-objs := zcrypt_mono.o ap_bus.o zcrypt_api.o \
+		zcrypt_pcica.o zcrypt_pcicc.o zcrypt_pcixcc.o zcrypt_cex2a.o
+obj-$(CONFIG_ZCRYPT) += z90crypt.o
+
+else
+
+ap-objs := ap_bus.o
+obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcicc.o zcrypt_pcixcc.o
+obj-$(CONFIG_ZCRYPT) += zcrypt_pcica.o zcrypt_cex2a.o
+
+endif
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
new file mode 100644
index 0000000..6ed0985
--- /dev/null
+++ b/drivers/s390/crypto/ap_bus.c
@@ -0,0 +1,1221 @@
+/*
+ * linux/drivers/s390/crypto/ap_bus.c
+ *
+ * Copyright (C) 2006 IBM Corporation
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * Adjunct processor bus.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <asm/s390_rdev.h>
+
+#include "ap_bus.h"
+
+/* Some prototypes. */
+static void ap_scan_bus(void *);
+static void ap_poll_all(unsigned long);
+static void ap_poll_timeout(unsigned long);
+static int ap_poll_thread_start(void);
+static void ap_poll_thread_stop(void);
+
+/**
+ * Module description.
+ */
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("Adjunct Processor Bus driver, "
+		   "Copyright 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+
+/**
+ * Module parameter
+ */
+int ap_domain_index = -1;	/* Adjunct Processor Domain Index */
+module_param_named(domain, ap_domain_index, int, 0000);
+MODULE_PARM_DESC(domain, "domain index for ap devices");
+EXPORT_SYMBOL(ap_domain_index);
+
+static int ap_thread_flag = 1;
+module_param_named(poll_thread, ap_thread_flag, int, 0000);
+MODULE_PARM_DESC(poll_thread, "Turn on/off poll thread, default is 1 (on).");
+
+static struct device *ap_root_device = NULL;
+
+/**
+ * Workqueue & timer for bus rescan.
+ */
+static struct workqueue_struct *ap_work_queue;
+static struct timer_list ap_config_timer;
+static int ap_config_time = AP_CONFIG_TIME;
+static DECLARE_WORK(ap_config_work, ap_scan_bus, NULL);
+
+/**
+ * Tasklet & timer for AP request polling.
+ */
+static struct timer_list ap_poll_timer = TIMER_INITIALIZER(ap_poll_timeout,0,0);
+static DECLARE_TASKLET(ap_tasklet, ap_poll_all, 0);
+static atomic_t ap_poll_requests = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(ap_poll_wait);
+static struct task_struct *ap_poll_kthread = NULL;
+static DEFINE_MUTEX(ap_poll_thread_mutex);
+
+/**
+ * Test if ap instructions are available.
+ *
+ * Returns 0 if the ap instructions are installed.
+ */
+static inline int ap_instructions_available(void)
+{
+	register unsigned long reg0 asm ("0") = AP_MKQID(0,0);
+	register unsigned long reg1 asm ("1") = -ENODEV;
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(
+		"   .long 0xb2af0000\n"		/* PQAP(TAPQ) */
+		"0: la    %1,0\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (reg0), "+d" (reg1), "+d" (reg2) : : "cc" );
+	return reg1;
+}
+
+/**
+ * Test adjunct processor queue.
+ * @qid: the ap queue number
+ * @queue_depth: pointer to queue depth value
+ * @device_type: pointer to device type value
+ *
+ * Returns ap queue status structure.
+ */
+static inline struct ap_queue_status
+ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type)
+{
+	register unsigned long reg0 asm ("0") = qid;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
+		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+	*device_type = (int) (reg2 >> 24);
+	*queue_depth = (int) (reg2 & 0xff);
+	return reg1;
+}
+
+/**
+ * Reset adjunct processor queue.
+ * @qid: the ap queue number
+ *
+ * Returns ap queue status structure.
+ */
+static inline struct ap_queue_status ap_reset_queue(ap_qid_t qid)
+{
+	register unsigned long reg0 asm ("0") = qid | 0x01000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(
+		".long 0xb2af0000"		/* PQAP(RAPQ) */
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+	return reg1;
+}
+
+/**
+ * Send message to adjunct processor queue.
+ * @qid: the ap queue number
+ * @psmid: the program supplied message identifier
+ * @msg: the message text
+ * @length: the message length
+ *
+ * Returns ap queue status structure.
+ *
+ * Condition code 1 on NQAP can't happen because the L bit is 1.
+ *
+ * Condition code 2 on NQAP also means the send is incomplete,
+ * because a segment boundary was reached. The NQAP is repeated.
+ */
+static inline struct ap_queue_status
+__ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	typedef struct { char _[length]; } msgblock;
+	register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = (unsigned long) msg;
+	register unsigned long reg3 asm ("3") = (unsigned long) length;
+	register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
+	register unsigned long reg5 asm ("5") = (unsigned int) psmid;
+
+	asm volatile (
+		"0: .long 0xb2ad0042\n"		/* DQAP */
+		"   brc   2,0b"
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
+		: "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg)
+		: "cc" );
+	return reg1;
+}
+
+int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	struct ap_queue_status status;
+
+	status = __ap_send(qid, psmid, msg, length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		return 0;
+	case AP_RESPONSE_Q_FULL:
+		return -EBUSY;
+	default:	/* Device is gone. */
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(ap_send);
+
+/*
+ * Receive message from adjunct processor queue.
+ * @qid: the ap queue number
+ * @psmid: pointer to program supplied message identifier
+ * @msg: the message text
+ * @length: the message length
+ *
+ * Returns ap queue status structure.
+ *
+ * Condition code 1 on DQAP means the receive has taken place
+ * but only partially.	The response is incomplete, hence the
+ * DQAP is repeated.
+ *
+ * Condition code 2 on DQAP also means the receive is incomplete,
+ * this time because a segment boundary was reached. Again, the
+ * DQAP is repeated.
+ *
+ * Note that gpr2 is used by the DQAP instruction to keep track of
+ * any 'residual' length, in case the instruction gets interrupted.
+ * Hence it gets zeroed before the instruction.
+ */
+static inline struct ap_queue_status
+__ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
+{
+	typedef struct { char _[length]; } msgblock;
+	register unsigned long reg0 asm("0") = qid | 0x80000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm("2") = 0UL;
+	register unsigned long reg4 asm("4") = (unsigned long) msg;
+	register unsigned long reg5 asm("5") = (unsigned long) length;
+	register unsigned long reg6 asm("6") = 0UL;
+	register unsigned long reg7 asm("7") = 0UL;
+
+
+	asm volatile(
+		"0: .long 0xb2ae0064\n"
+		"   brc   6,0b\n"
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2),
+		"+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7),
+		"=m" (*(msgblock *) msg) : : "cc" );
+	*psmid = (((unsigned long long) reg6) << 32) + reg7;
+	return reg1;
+}
+
+int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length)
+{
+	struct ap_queue_status status;
+
+	status = __ap_recv(qid, psmid, msg, length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		return 0;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (status.queue_empty)
+			return -ENOENT;
+		return -EBUSY;
+	default:
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(ap_recv);
+
+/**
+ * Check if an AP queue is available. The test is repeated for
+ * AP_MAX_RESET times.
+ * @qid: the ap queue number
+ * @queue_depth: pointer to queue depth value
+ * @device_type: pointer to device type value
+ */
+static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
+{
+	struct ap_queue_status status;
+	int t_depth, t_device_type, rc, i;
+
+	rc = -EBUSY;
+	for (i = 0; i < AP_MAX_RESET; i++) {
+		status = ap_test_queue(qid, &t_depth, &t_device_type);
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			*queue_depth = t_depth + 1;
+			*device_type = t_device_type;
+			rc = 0;
+			break;
+		case AP_RESPONSE_Q_NOT_AVAIL:
+			rc = -ENODEV;
+			break;
+		case AP_RESPONSE_RESET_IN_PROGRESS:
+			break;
+		case AP_RESPONSE_DECONFIGURED:
+			rc = -ENODEV;
+			break;
+		case AP_RESPONSE_CHECKSTOPPED:
+			rc = -ENODEV;
+			break;
+		case AP_RESPONSE_BUSY:
+			break;
+		default:
+			BUG();
+		}
+		if (rc != -EBUSY)
+			break;
+		if (i < AP_MAX_RESET - 1)
+			udelay(5);
+	}
+	return rc;
+}
+
+/**
+ * Reset an AP queue and wait for it to become available again.
+ * @qid: the ap queue number
+ */
+static int ap_init_queue(ap_qid_t qid)
+{
+	struct ap_queue_status status;
+	int rc, dummy, i;
+
+	rc = -ENODEV;
+	status = ap_reset_queue(qid);
+	for (i = 0; i < AP_MAX_RESET; i++) {
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			if (status.queue_empty)
+				rc = 0;
+			break;
+		case AP_RESPONSE_Q_NOT_AVAIL:
+		case AP_RESPONSE_DECONFIGURED:
+		case AP_RESPONSE_CHECKSTOPPED:
+			i = AP_MAX_RESET;	/* return with -ENODEV */
+			break;
+		case AP_RESPONSE_RESET_IN_PROGRESS:
+		case AP_RESPONSE_BUSY:
+		default:
+			break;
+		}
+		if (rc != -ENODEV)
+			break;
+		if (i < AP_MAX_RESET - 1) {
+			udelay(5);
+			status = ap_test_queue(qid, &dummy, &dummy);
+		}
+	}
+	return rc;
+}
+
+/**
+ * AP device related attributes.
+ */
+static ssize_t ap_hwtype_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->device_type);
+}
+static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL);
+
+static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->queue_depth);
+}
+static DEVICE_ATTR(depth, 0444, ap_depth_show, NULL);
+
+static ssize_t ap_request_count_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	int rc;
+
+	spin_lock_bh(&ap_dev->lock);
+	rc = snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->total_request_count);
+	spin_unlock_bh(&ap_dev->lock);
+	return rc;
+}
+
+static DEVICE_ATTR(request_count, 0444, ap_request_count_show, NULL);
+
+static ssize_t ap_modalias_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type);
+}
+
+static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL);
+
+static struct attribute *ap_dev_attrs[] = {
+	&dev_attr_hwtype.attr,
+	&dev_attr_depth.attr,
+	&dev_attr_request_count.attr,
+	&dev_attr_modalias.attr,
+	NULL
+};
+static struct attribute_group ap_dev_attr_group = {
+	.attrs = ap_dev_attrs
+};
+
+/**
+ * AP bus driver registration/unregistration.
+ */
+static int ap_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	struct ap_driver *ap_drv = to_ap_drv(drv);
+	struct ap_device_id *id;
+
+	/**
+	 * Compare device type of the device with the list of
+	 * supported types of the device_driver.
+	 */
+	for (id = ap_drv->ids; id->match_flags; id++) {
+		if ((id->match_flags & AP_DEVICE_ID_MATCH_DEVICE_TYPE) &&
+		    (id->dev_type != ap_dev->device_type))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * uevent function for AP devices. It sets up a single environment
+ * variable DEV_TYPE which contains the hardware device type.
+ */
+static int ap_uevent (struct device *dev, char **envp, int num_envp,
+		       char *buffer, int buffer_size)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	int length;
+
+	if (!ap_dev)
+		return -ENODEV;
+
+	/* Set up DEV_TYPE environment variable. */
+	envp[0] = buffer;
+	length = scnprintf(buffer, buffer_size, "DEV_TYPE=%04X",
+			   ap_dev->device_type);
+	if (buffer_size - length <= 0)
+		return -ENOMEM;
+	envp[1] = 0;
+	return 0;
+}
+
+static struct bus_type ap_bus_type = {
+	.name = "ap",
+	.match = &ap_bus_match,
+	.uevent = &ap_uevent,
+};
+
+static int ap_device_probe(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	struct ap_driver *ap_drv = to_ap_drv(dev->driver);
+	int rc;
+
+	ap_dev->drv = ap_drv;
+	rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
+	if (rc)
+		ap_dev->unregistered = 1;
+	return rc;
+}
+
+/**
+ * Flush all requests from the request/pending queue of an AP device.
+ * @ap_dev: pointer to the AP device.
+ */
+static inline void __ap_flush_queue(struct ap_device *ap_dev)
+{
+	struct ap_message *ap_msg, *next;
+
+	list_for_each_entry_safe(ap_msg, next, &ap_dev->pendingq, list) {
+		list_del_init(&ap_msg->list);
+		ap_dev->pendingq_count--;
+		ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+	}
+	list_for_each_entry_safe(ap_msg, next, &ap_dev->requestq, list) {
+		list_del_init(&ap_msg->list);
+		ap_dev->requestq_count--;
+		ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+	}
+}
+
+void ap_flush_queue(struct ap_device *ap_dev)
+{
+	spin_lock_bh(&ap_dev->lock);
+	__ap_flush_queue(ap_dev);
+	spin_unlock_bh(&ap_dev->lock);
+}
+EXPORT_SYMBOL(ap_flush_queue);
+
+static int ap_device_remove(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	struct ap_driver *ap_drv = ap_dev->drv;
+
+	spin_lock_bh(&ap_dev->lock);
+	__ap_flush_queue(ap_dev);
+	/**
+	 * set ->unregistered to 1 while holding the lock. This prevents
+	 * new messages to be put on the queue from now on.
+	 */
+	ap_dev->unregistered = 1;
+	spin_unlock_bh(&ap_dev->lock);
+	if (ap_drv->remove)
+		ap_drv->remove(ap_dev);
+	return 0;
+}
+
+int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
+		       char *name)
+{
+	struct device_driver *drv = &ap_drv->driver;
+
+	drv->bus = &ap_bus_type;
+	drv->probe = ap_device_probe;
+	drv->remove = ap_device_remove;
+	drv->owner = owner;
+	drv->name = name;
+	return driver_register(drv);
+}
+EXPORT_SYMBOL(ap_driver_register);
+
+void ap_driver_unregister(struct ap_driver *ap_drv)
+{
+	driver_unregister(&ap_drv->driver);
+}
+EXPORT_SYMBOL(ap_driver_unregister);
+
+/**
+ * AP bus attributes.
+ */
+static ssize_t ap_domain_show(struct bus_type *bus, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_domain_index);
+}
+
+static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL);
+
+static ssize_t ap_config_time_show(struct bus_type *bus, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time);
+}
+
+static ssize_t ap_config_time_store(struct bus_type *bus,
+				    const char *buf, size_t count)
+{
+	int time;
+
+	if (sscanf(buf, "%d\n", &time) != 1 || time < 5 || time > 120)
+		return -EINVAL;
+	ap_config_time = time;
+	if (!timer_pending(&ap_config_timer) ||
+	    !mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ)) {
+		ap_config_timer.expires = jiffies + ap_config_time * HZ;
+		add_timer(&ap_config_timer);
+	}
+	return count;
+}
+
+static BUS_ATTR(config_time, 0644, ap_config_time_show, ap_config_time_store);
+
+static ssize_t ap_poll_thread_show(struct bus_type *bus, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ap_poll_kthread ? 1 : 0);
+}
+
+static ssize_t ap_poll_thread_store(struct bus_type *bus,
+				    const char *buf, size_t count)
+{
+	int flag, rc;
+
+	if (sscanf(buf, "%d\n", &flag) != 1)
+		return -EINVAL;
+	if (flag) {
+		rc = ap_poll_thread_start();
+		if (rc)
+			return rc;
+	}
+	else
+		ap_poll_thread_stop();
+	return count;
+}
+
+static BUS_ATTR(poll_thread, 0644, ap_poll_thread_show, ap_poll_thread_store);
+
+static struct bus_attribute *const ap_bus_attrs[] = {
+	&bus_attr_ap_domain,
+	&bus_attr_config_time,
+	&bus_attr_poll_thread,
+	NULL
+};
+
+/**
+ * Pick one of the 16 ap domains.
+ */
+static inline int ap_select_domain(void)
+{
+	int queue_depth, device_type, count, max_count, best_domain;
+	int rc, i, j;
+
+	/**
+	 * We want to use a single domain. Either the one specified with
+	 * the "domain=" parameter or the domain with the maximum number
+	 * of devices.
+	 */
+	if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS)
+		/* Domain has already been selected. */
+		return 0;
+	best_domain = -1;
+	max_count = 0;
+	for (i = 0; i < AP_DOMAINS; i++) {
+		count = 0;
+		for (j = 0; j < AP_DEVICES; j++) {
+			ap_qid_t qid = AP_MKQID(j, i);
+			rc = ap_query_queue(qid, &queue_depth, &device_type);
+			if (rc)
+				continue;
+			count++;
+		}
+		if (count > max_count) {
+			max_count = count;
+			best_domain = i;
+		}
+	}
+	if (best_domain >= 0){
+		ap_domain_index = best_domain;
+		return 0;
+	}
+	return -ENODEV;
+}
+
+/**
+ * Find the device type if query queue returned a device type of 0.
+ * @ap_dev: pointer to the AP device.
+ */
+static int ap_probe_device_type(struct ap_device *ap_dev)
+{
+	static unsigned char msg[] = {
+		0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x01,0x00,0x43,0x43,0x41,0x2d,0x41,0x50,
+		0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01,
+		0x00,0x00,0x00,0x00,0x50,0x4b,0x00,0x00,
+		0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x54,0x32,0x01,0x00,0xa0,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0xb8,0x05,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,
+		0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20,
+		0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53,
+		0x2d,0x31,0x2e,0x32,0x37,0x00,0x11,0x22,
+		0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
+		0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,
+		0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,
+		0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,
+		0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,
+		0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
+		0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77,
+		0x88,0x1e,0x00,0x00,0x57,0x00,0x00,0x00,
+		0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00,
+		0x03,0x02,0x00,0x00,0x40,0x01,0x00,0x01,
+		0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c,
+		0xf6,0xd2,0x7b,0x58,0x4b,0xf9,0x28,0x68,
+		0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66,
+		0x63,0x42,0xef,0xf8,0xfd,0xa4,0xf8,0xb0,
+		0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8,
+		0x53,0x8c,0x6f,0x4e,0x72,0x8f,0x6c,0x04,
+		0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57,
+		0xf7,0xdd,0xfd,0x4f,0x11,0x36,0x95,0x5d,
+	};
+	struct ap_queue_status status;
+	unsigned long long psmid;
+	char *reply;
+	int rc, i;
+
+	reply = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reply) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	status = __ap_send(ap_dev->qid, 0x0102030405060708ULL,
+			   msg, sizeof(msg));
+	if (status.response_code != AP_RESPONSE_NORMAL) {
+		rc = -ENODEV;
+		goto out_free;
+	}
+
+	/* Wait for the test message to complete. */
+	for (i = 0; i < 6; i++) {
+		mdelay(300);
+		status = __ap_recv(ap_dev->qid, &psmid, reply, 4096);
+		if (status.response_code == AP_RESPONSE_NORMAL &&
+		    psmid == 0x0102030405060708ULL)
+			break;
+	}
+	if (i < 6) {
+		/* Got an answer. */
+		if (reply[0] == 0x00 && reply[1] == 0x86)
+			ap_dev->device_type = AP_DEVICE_TYPE_PCICC;
+		else
+			ap_dev->device_type = AP_DEVICE_TYPE_PCICA;
+		rc = 0;
+	} else
+		rc = -ENODEV;
+
+out_free:
+	free_page((unsigned long) reply);
+out:
+	return rc;
+}
+
+/**
+ * Scan the ap bus for new devices.
+ */
+static int __ap_scan_bus(struct device *dev, void *data)
+{
+	return to_ap_dev(dev)->qid == (ap_qid_t)(unsigned long) data;
+}
+
+static void ap_device_release(struct device *dev)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+
+	kfree(ap_dev);
+}
+
+static void ap_scan_bus(void *data)
+{
+	struct ap_device *ap_dev;
+	struct device *dev;
+	ap_qid_t qid;
+	int queue_depth, device_type;
+	int rc, i;
+
+	if (ap_select_domain() != 0)
+		return;
+	for (i = 0; i < AP_DEVICES; i++) {
+		qid = AP_MKQID(i, ap_domain_index);
+		dev = bus_find_device(&ap_bus_type, NULL,
+				      (void *)(unsigned long)qid,
+				      __ap_scan_bus);
+		if (dev) {
+			put_device(dev);
+			continue;
+		}
+		rc = ap_query_queue(qid, &queue_depth, &device_type);
+		if (rc)
+			continue;
+		rc = ap_init_queue(qid);
+		if (rc)
+			continue;
+		ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL);
+		if (!ap_dev)
+			break;
+		ap_dev->qid = qid;
+		ap_dev->queue_depth = queue_depth;
+		spin_lock_init(&ap_dev->lock);
+		INIT_LIST_HEAD(&ap_dev->pendingq);
+		INIT_LIST_HEAD(&ap_dev->requestq);
+		if (device_type == 0)
+			ap_probe_device_type(ap_dev);
+		else
+			ap_dev->device_type = device_type;
+
+		ap_dev->device.bus = &ap_bus_type;
+		ap_dev->device.parent = ap_root_device;
+		snprintf(ap_dev->device.bus_id, BUS_ID_SIZE, "card%02x",
+			 AP_QID_DEVICE(ap_dev->qid));
+		ap_dev->device.release = ap_device_release;
+		rc = device_register(&ap_dev->device);
+		if (rc) {
+			kfree(ap_dev);
+			continue;
+		}
+		/* Add device attributes. */
+		rc = sysfs_create_group(&ap_dev->device.kobj,
+					&ap_dev_attr_group);
+		if (rc)
+			device_unregister(&ap_dev->device);
+	}
+}
+
+static void
+ap_config_timeout(unsigned long ptr)
+{
+	queue_work(ap_work_queue, &ap_config_work);
+	ap_config_timer.expires = jiffies + ap_config_time * HZ;
+	add_timer(&ap_config_timer);
+}
+
+/**
+ * Set up the timer to run the poll tasklet
+ */
+static inline void ap_schedule_poll_timer(void)
+{
+	if (timer_pending(&ap_poll_timer))
+		return;
+	mod_timer(&ap_poll_timer, jiffies + AP_POLL_TIME);
+}
+
+/**
+ * Receive pending reply messages from an AP device.
+ * @ap_dev: pointer to the AP device
+ * @flags: pointer to control flags, bit 2^0 is set if another poll is
+ *	   required, bit 2^1 is set if the poll timer needs to get armed
+ * Returns 0 if the device is still present, -ENODEV if not.
+ */
+static inline int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	if (ap_dev->queue_count <= 0)
+		return 0;
+	status = __ap_recv(ap_dev->qid, &ap_dev->reply->psmid,
+			   ap_dev->reply->message, ap_dev->reply->length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		atomic_dec(&ap_poll_requests);
+		ap_dev->queue_count--;
+		list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
+			if (ap_msg->psmid != ap_dev->reply->psmid)
+				continue;
+			list_del_init(&ap_msg->list);
+			ap_dev->pendingq_count--;
+			ap_dev->drv->receive(ap_dev, ap_msg, ap_dev->reply);
+			break;
+		}
+		if (ap_dev->queue_count > 0)
+			*flags |= 1;
+		break;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (status.queue_empty) {
+			/* The card shouldn't forget requests but who knows. */
+			ap_dev->queue_count = 0;
+			list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
+			ap_dev->requestq_count += ap_dev->pendingq_count;
+			ap_dev->pendingq_count = 0;
+		} else
+			*flags |= 2;
+		break;
+	default:
+		return -ENODEV;
+	}
+	return 0;
+}
+
+/**
+ * Send messages from the request queue to an AP device.
+ * @ap_dev: pointer to the AP device
+ * @flags: pointer to control flags, bit 2^0 is set if another poll is
+ *	   required, bit 2^1 is set if the poll timer needs to get armed
+ * Returns 0 if the device is still present, -ENODEV if not.
+ */
+static inline int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	if (ap_dev->requestq_count <= 0 ||
+	    ap_dev->queue_count >= ap_dev->queue_depth)
+		return 0;
+	/* Start the next request on the queue. */
+	ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
+	status = __ap_send(ap_dev->qid, ap_msg->psmid,
+			   ap_msg->message, ap_msg->length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		atomic_inc(&ap_poll_requests);
+		ap_dev->queue_count++;
+		list_move_tail(&ap_msg->list, &ap_dev->pendingq);
+		ap_dev->requestq_count--;
+		ap_dev->pendingq_count++;
+		if (ap_dev->queue_count < ap_dev->queue_depth &&
+		    ap_dev->requestq_count > 0)
+			*flags |= 1;
+		*flags |= 2;
+		break;
+	case AP_RESPONSE_Q_FULL:
+		*flags |= 2;
+		break;
+	case AP_RESPONSE_MESSAGE_TOO_BIG:
+		return -EINVAL;
+	default:
+		return -ENODEV;
+	}
+	return 0;
+}
+
+/**
+ * Poll AP device for pending replies and send new messages. If either
+ * ap_poll_read or ap_poll_write returns -ENODEV unregister the device.
+ * @ap_dev: pointer to the bus device
+ * @flags: pointer to control flags, bit 2^0 is set if another poll is
+ *	   required, bit 2^1 is set if the poll timer needs to get armed
+ * Returns 0.
+ */
+static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
+{
+	int rc;
+
+	rc = ap_poll_read(ap_dev, flags);
+	if (rc)
+		return rc;
+	return ap_poll_write(ap_dev, flags);
+}
+
+/**
+ * Queue a message to a device.
+ * @ap_dev: pointer to the AP device
+ * @ap_msg: the message to be queued
+ */
+static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	struct ap_queue_status status;
+
+	if (list_empty(&ap_dev->requestq) &&
+	    ap_dev->queue_count < ap_dev->queue_depth) {
+		status = __ap_send(ap_dev->qid, ap_msg->psmid,
+				   ap_msg->message, ap_msg->length);
+		switch (status.response_code) {
+		case AP_RESPONSE_NORMAL:
+			list_add_tail(&ap_msg->list, &ap_dev->pendingq);
+			atomic_inc(&ap_poll_requests);
+			ap_dev->pendingq_count++;
+			ap_dev->queue_count++;
+			ap_dev->total_request_count++;
+			break;
+		case AP_RESPONSE_Q_FULL:
+			list_add_tail(&ap_msg->list, &ap_dev->requestq);
+			ap_dev->requestq_count++;
+			ap_dev->total_request_count++;
+			return -EBUSY;
+		case AP_RESPONSE_MESSAGE_TOO_BIG:
+			ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-EINVAL));
+			return -EINVAL;
+		default:	/* Device is gone. */
+			ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+			return -ENODEV;
+		}
+	} else {
+		list_add_tail(&ap_msg->list, &ap_dev->requestq);
+		ap_dev->requestq_count++;
+		ap_dev->total_request_count++;
+		return -EBUSY;
+	}
+	ap_schedule_poll_timer();
+	return 0;
+}
+
+void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	unsigned long flags;
+	int rc;
+
+	spin_lock_bh(&ap_dev->lock);
+	if (!ap_dev->unregistered) {
+		/* Make room on the queue by polling for finished requests. */
+		rc = ap_poll_queue(ap_dev, &flags);
+		if (!rc)
+			rc = __ap_queue_message(ap_dev, ap_msg);
+		if (!rc)
+			wake_up(&ap_poll_wait);
+	} else {
+		ap_dev->drv->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+		rc = 0;
+	}
+	spin_unlock_bh(&ap_dev->lock);
+	if (rc == -ENODEV)
+		device_unregister(&ap_dev->device);
+}
+EXPORT_SYMBOL(ap_queue_message);
+
+/**
+ * Cancel a crypto request. This is done by removing the request
+ * from the devive pendingq or requestq queue. Note that the
+ * request stays on the AP queue. When it finishes the message
+ * reply will be discarded because the psmid can't be found.
+ * @ap_dev: AP device that has the message queued
+ * @ap_msg: the message that is to be removed
+ */
+void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	struct ap_message *tmp;
+
+	spin_lock_bh(&ap_dev->lock);
+	if (!list_empty(&ap_msg->list)) {
+		list_for_each_entry(tmp, &ap_dev->pendingq, list)
+			if (tmp->psmid == ap_msg->psmid) {
+				ap_dev->pendingq_count--;
+				goto found;
+			}
+		ap_dev->requestq_count--;
+	found:
+		list_del_init(&ap_msg->list);
+	}
+	spin_unlock_bh(&ap_dev->lock);
+}
+EXPORT_SYMBOL(ap_cancel_message);
+
+/**
+ * AP receive polling for finished AP requests
+ */
+static void ap_poll_timeout(unsigned long unused)
+{
+	tasklet_schedule(&ap_tasklet);
+}
+
+/**
+ * Poll all AP devices on the bus in a round robin fashion. Continue
+ * polling until bit 2^0 of the control flags is not set. If bit 2^1
+ * of the control flags has been set arm the poll timer.
+ */
+static int __ap_poll_all(struct device *dev, void *data)
+{
+	struct ap_device *ap_dev = to_ap_dev(dev);
+	int rc;
+
+	spin_lock(&ap_dev->lock);
+	if (!ap_dev->unregistered) {
+		rc = ap_poll_queue(to_ap_dev(dev), (unsigned long *) data);
+	} else
+		rc = 0;
+	spin_unlock(&ap_dev->lock);
+	if (rc)
+		device_unregister(&ap_dev->device);
+	return 0;
+}
+
+static void ap_poll_all(unsigned long dummy)
+{
+	unsigned long flags;
+
+	do {
+		flags = 0;
+		bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all);
+	} while (flags & 1);
+	if (flags & 2)
+		ap_schedule_poll_timer();
+}
+
+/**
+ * AP bus poll thread. The purpose of this thread is to poll for
+ * finished requests in a loop if there is a "free" cpu - that is
+ * a cpu that doesn't have anything better to do. The polling stops
+ * as soon as there is another task or if all messages have been
+ * delivered.
+ */
+static int ap_poll_thread(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	unsigned long flags;
+	int requests;
+
+	set_user_nice(current, -20);
+	while (1) {
+		if (need_resched()) {
+			schedule();
+			continue;
+		}
+		add_wait_queue(&ap_poll_wait, &wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+		requests = atomic_read(&ap_poll_requests);
+		if (requests <= 0)
+			schedule();
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&ap_poll_wait, &wait);
+
+		local_bh_disable();
+		flags = 0;
+		bus_for_each_dev(&ap_bus_type, NULL, &flags, __ap_poll_all);
+		local_bh_enable();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ap_poll_wait, &wait);
+	return 0;
+}
+
+static int ap_poll_thread_start(void)
+{
+	int rc;
+
+	mutex_lock(&ap_poll_thread_mutex);
+	if (!ap_poll_kthread) {
+		ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll");
+		rc = IS_ERR(ap_poll_kthread) ? PTR_ERR(ap_poll_kthread) : 0;
+		if (rc)
+			ap_poll_kthread = NULL;
+	}
+	else
+		rc = 0;
+	mutex_unlock(&ap_poll_thread_mutex);
+	return rc;
+}
+
+static void ap_poll_thread_stop(void)
+{
+	mutex_lock(&ap_poll_thread_mutex);
+	if (ap_poll_kthread) {
+		kthread_stop(ap_poll_kthread);
+		ap_poll_kthread = NULL;
+	}
+	mutex_unlock(&ap_poll_thread_mutex);
+}
+
+/**
+ * The module initialization code.
+ */
+int __init ap_module_init(void)
+{
+	int rc, i;
+
+	if (ap_domain_index < -1 || ap_domain_index >= AP_DOMAINS) {
+		printk(KERN_WARNING "Invalid param: domain = %d. "
+		       " Not loading.\n", ap_domain_index);
+		return -EINVAL;
+	}
+	if (ap_instructions_available() != 0) {
+		printk(KERN_WARNING "AP instructions not installed.\n");
+		return -ENODEV;
+	}
+
+	/* Create /sys/bus/ap. */
+	rc = bus_register(&ap_bus_type);
+	if (rc)
+		goto out;
+	for (i = 0; ap_bus_attrs[i]; i++) {
+		rc = bus_create_file(&ap_bus_type, ap_bus_attrs[i]);
+		if (rc)
+			goto out_bus;
+	}
+
+	/* Create /sys/devices/ap. */
+	ap_root_device = s390_root_dev_register("ap");
+	rc = IS_ERR(ap_root_device) ? PTR_ERR(ap_root_device) : 0;
+	if (rc)
+		goto out_bus;
+
+	ap_work_queue = create_singlethread_workqueue("kapwork");
+	if (!ap_work_queue) {
+		rc = -ENOMEM;
+		goto out_root;
+	}
+
+	if (ap_select_domain() == 0)
+		ap_scan_bus(NULL);
+
+	/* Setup the ap bus rescan timer. */
+	init_timer(&ap_config_timer);
+	ap_config_timer.function = ap_config_timeout;
+	ap_config_timer.data = 0;
+	ap_config_timer.expires = jiffies + ap_config_time * HZ;
+	add_timer(&ap_config_timer);
+
+	/* Start the low priority AP bus poll thread. */
+	if (ap_thread_flag) {
+		rc = ap_poll_thread_start();
+		if (rc)
+			goto out_work;
+	}
+
+	return 0;
+
+out_work:
+	del_timer_sync(&ap_config_timer);
+	del_timer_sync(&ap_poll_timer);
+	destroy_workqueue(ap_work_queue);
+out_root:
+	s390_root_dev_unregister(ap_root_device);
+out_bus:
+	while (i--)
+		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
+	bus_unregister(&ap_bus_type);
+out:
+	return rc;
+}
+
+static int __ap_match_all(struct device *dev, void *data)
+{
+	return 1;
+}
+
+/**
+ * The module termination code
+ */
+void ap_module_exit(void)
+{
+	int i;
+	struct device *dev;
+
+	ap_poll_thread_stop();
+	del_timer_sync(&ap_config_timer);
+	del_timer_sync(&ap_poll_timer);
+	destroy_workqueue(ap_work_queue);
+	s390_root_dev_unregister(ap_root_device);
+	while ((dev = bus_find_device(&ap_bus_type, NULL, NULL,
+		    __ap_match_all)))
+	{
+		device_unregister(dev);
+		put_device(dev);
+	}
+	for (i = 0; ap_bus_attrs[i]; i++)
+		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
+	bus_unregister(&ap_bus_type);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(ap_module_init);
+module_exit(ap_module_exit);
+#endif
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
new file mode 100644
index 0000000..83b69c0
--- /dev/null
+++ b/drivers/s390/crypto/ap_bus.h
@@ -0,0 +1,158 @@
+/*
+ * linux/drivers/s390/crypto/ap_bus.h
+ *
+ * Copyright (C) 2006 IBM Corporation
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * Adjunct processor bus header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _AP_BUS_H_
+#define _AP_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/types.h>
+
+#define AP_DEVICES 64		/* Number of AP devices. */
+#define AP_DOMAINS 16		/* Number of AP domains. */
+#define AP_MAX_RESET 90		/* Maximum number of resets. */
+#define AP_CONFIG_TIME 30	/* Time in seconds between AP bus rescans. */
+#define AP_POLL_TIME 1		/* Time in ticks between receive polls. */
+
+extern int ap_domain_index;
+
+/**
+ * The ap_qid_t identifier of an ap queue. It contains a
+ * 6 bit device index and a 4 bit queue index (domain).
+ */
+typedef unsigned int ap_qid_t;
+
+#define AP_MKQID(_device,_queue) (((_device) & 63) << 8 | ((_queue) & 15))
+#define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63)
+#define AP_QID_QUEUE(_qid) ((_qid) & 15)
+
+/**
+ * The ap queue status word is returned by all three AP functions
+ * (PQAP, NQAP and DQAP).  There's a set of flags in the first
+ * byte, followed by a 1 byte response code.
+ */
+struct ap_queue_status {
+	unsigned int queue_empty	: 1;
+	unsigned int replies_waiting	: 1;
+	unsigned int queue_full		: 1;
+	unsigned int pad1		: 5;
+	unsigned int response_code	: 8;
+	unsigned int pad2		: 16;
+};
+
+#define AP_RESPONSE_NORMAL		0x00
+#define AP_RESPONSE_Q_NOT_AVAIL		0x01
+#define AP_RESPONSE_RESET_IN_PROGRESS	0x02
+#define AP_RESPONSE_DECONFIGURED	0x03
+#define AP_RESPONSE_CHECKSTOPPED	0x04
+#define AP_RESPONSE_BUSY		0x05
+#define AP_RESPONSE_Q_FULL		0x10
+#define AP_RESPONSE_NO_PENDING_REPLY	0x10
+#define AP_RESPONSE_INDEX_TOO_BIG	0x11
+#define AP_RESPONSE_NO_FIRST_PART	0x13
+#define AP_RESPONSE_MESSAGE_TOO_BIG	0x15
+
+/**
+ * Known device types
+ */
+#define AP_DEVICE_TYPE_PCICC	3
+#define AP_DEVICE_TYPE_PCICA	4
+#define AP_DEVICE_TYPE_PCIXCC	5
+#define AP_DEVICE_TYPE_CEX2A	6
+#define AP_DEVICE_TYPE_CEX2C	7
+
+struct ap_device;
+struct ap_message;
+
+struct ap_driver {
+	struct device_driver driver;
+	struct ap_device_id *ids;
+
+	int (*probe)(struct ap_device *);
+	void (*remove)(struct ap_device *);
+	/* receive is called from tasklet context */
+	void (*receive)(struct ap_device *, struct ap_message *,
+			struct ap_message *);
+};
+
+#define to_ap_drv(x) container_of((x), struct ap_driver, driver)
+
+int ap_driver_register(struct ap_driver *, struct module *, char *);
+void ap_driver_unregister(struct ap_driver *);
+
+struct ap_device {
+	struct device device;
+	struct ap_driver *drv;		/* Pointer to AP device driver. */
+	spinlock_t lock;		/* Per device lock. */
+
+	ap_qid_t qid;			/* AP queue id. */
+	int queue_depth;		/* AP queue depth.*/
+	int device_type;		/* AP device type. */
+	int unregistered;		/* marks AP device as unregistered */
+
+	int queue_count;		/* # messages currently on AP queue. */
+
+	struct list_head pendingq;	/* List of message sent to AP queue. */
+	int pendingq_count;		/* # requests on pendingq list. */
+	struct list_head requestq;	/* List of message yet to be sent. */
+	int requestq_count;		/* # requests on requestq list. */
+	int total_request_count;	/* # requests ever for this AP device. */
+
+	struct ap_message *reply;	/* Per device reply message. */
+
+	void *private;			/* ap driver private pointer. */
+};
+
+#define to_ap_dev(x) container_of((x), struct ap_device, device)
+
+struct ap_message {
+	struct list_head list;		/* Request queueing. */
+	unsigned long long psmid;	/* Message id. */
+	void *message;			/* Pointer to message buffer. */
+	size_t length;			/* Message length. */
+
+	void *private;			/* ap driver private pointer. */
+};
+
+#define AP_DEVICE(dt)					\
+	.dev_type=(dt),					\
+	.match_flags=AP_DEVICE_ID_MATCH_DEVICE_TYPE,
+
+/**
+ * Note: don't use ap_send/ap_recv after using ap_queue_message
+ * for the first time. Otherwise the ap message queue will get
+ * confused.
+ */
+int ap_send(ap_qid_t, unsigned long long, void *, size_t);
+int ap_recv(ap_qid_t, unsigned long long *, void *, size_t);
+
+void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg);
+void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg);
+void ap_flush_queue(struct ap_device *ap_dev);
+
+int ap_module_init(void);
+void ap_module_exit(void);
+
+#endif /* _AP_BUS_H_ */
diff --git a/drivers/s390/crypto/z90common.h b/drivers/s390/crypto/z90common.h
deleted file mode 100644
index dbbcda3..0000000
--- a/drivers/s390/crypto/z90common.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90common.h
- *
- *  z90crypt 1.3.3
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _Z90COMMON_H_
-#define _Z90COMMON_H_
-
-
-#define RESPBUFFSIZE 256
-#define PCI_FUNC_KEY_DECRYPT 0x5044
-#define PCI_FUNC_KEY_ENCRYPT 0x504B
-extern int ext_bitlens;
-
-enum devstat {
-	DEV_GONE,
-	DEV_ONLINE,
-	DEV_QUEUE_FULL,
-	DEV_EMPTY,
-	DEV_NO_WORK,
-	DEV_BAD_MESSAGE,
-	DEV_TSQ_EXCEPTION,
-	DEV_RSQ_EXCEPTION,
-	DEV_SEN_EXCEPTION,
-	DEV_REC_EXCEPTION
-};
-
-enum hdstat {
-	HD_NOT_THERE,
-	HD_BUSY,
-	HD_DECONFIGURED,
-	HD_CHECKSTOPPED,
-	HD_ONLINE,
-	HD_TSQ_EXCEPTION
-};
-
-#define Z90C_NO_DEVICES		1
-#define Z90C_AMBIGUOUS_DOMAIN	2
-#define Z90C_INCORRECT_DOMAIN	3
-#define ENOTINIT		4
-
-#define SEN_BUSY	 7
-#define SEN_USER_ERROR	 8
-#define SEN_QUEUE_FULL	11
-#define SEN_NOT_AVAIL	16
-#define SEN_PAD_ERROR	17
-#define SEN_RETRY	18
-#define SEN_RELEASED	24
-
-#define REC_EMPTY	 4
-#define REC_BUSY	 6
-#define REC_OPERAND_INV	 8
-#define REC_OPERAND_SIZE 9
-#define REC_EVEN_MOD	10
-#define REC_NO_WORK	11
-#define REC_HARDWAR_ERR	12
-#define REC_NO_RESPONSE	13
-#define REC_RETRY_DEV	14
-#define REC_USER_GONE	15
-#define REC_BAD_MESSAGE	16
-#define REC_INVALID_PAD	17
-#define REC_USE_PCICA	18
-
-#define WRONG_DEVICE_TYPE 20
-
-#define REC_FATAL_ERROR 32
-#define SEN_FATAL_ERROR 33
-#define TSQ_FATAL_ERROR 34
-#define RSQ_FATAL_ERROR 35
-
-#define Z90CRYPT_NUM_TYPES	6
-#define PCICA		0
-#define PCICC		1
-#define PCIXCC_MCL2	2
-#define PCIXCC_MCL3	3
-#define CEX2C		4
-#define CEX2A		5
-#define NILDEV		-1
-#define ANYDEV		-1
-#define PCIXCC_UNK	-2
-
-enum hdevice_type {
-	PCICC_HW  = 3,
-	PCICA_HW  = 4,
-	PCIXCC_HW = 5,
-	CEX2A_HW  = 6,
-	CEX2C_HW  = 7
-};
-
-struct CPRBX {
-	unsigned short cprb_len;
-	unsigned char  cprb_ver_id;
-	unsigned char  pad_000[3];
-	unsigned char  func_id[2];
-	unsigned char  cprb_flags[4];
-	unsigned int   req_parml;
-	unsigned int   req_datal;
-	unsigned int   rpl_msgbl;
-	unsigned int   rpld_parml;
-	unsigned int   rpl_datal;
-	unsigned int   rpld_datal;
-	unsigned int   req_extbl;
-	unsigned char  pad_001[4];
-	unsigned int   rpld_extbl;
-	unsigned char  req_parmb[16];
-	unsigned char  req_datab[16];
-	unsigned char  rpl_parmb[16];
-	unsigned char  rpl_datab[16];
-	unsigned char  req_extb[16];
-	unsigned char  rpl_extb[16];
-	unsigned short ccp_rtcode;
-	unsigned short ccp_rscode;
-	unsigned int   mac_data_len;
-	unsigned char  logon_id[8];
-	unsigned char  mac_value[8];
-	unsigned char  mac_content_flgs;
-	unsigned char  pad_002;
-	unsigned short domain;
-	unsigned char  pad_003[12];
-	unsigned char  pad_004[36];
-};
-
-#ifndef DEV_NAME
-#define DEV_NAME	"z90crypt"
-#endif
-#define PRINTK(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-#define PRINTKN(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": " fmt, ## args)
-#define PRINTKW(fmt, args...) \
-	printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-#define PRINTKC(fmt, args...) \
-	printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-
-#ifdef Z90CRYPT_DEBUG
-#define PDEBUG(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
-#else
-#define PDEBUG(fmt, args...) do {} while (0)
-#endif
-
-#define UMIN(a,b) ((a) < (b) ? (a) : (b))
-#define IS_EVEN(x) ((x) == (2 * ((x) / 2)))
-
-#endif
diff --git a/drivers/s390/crypto/z90crypt.h b/drivers/s390/crypto/z90crypt.h
deleted file mode 100644
index 0ca1d12..0000000
--- a/drivers/s390/crypto/z90crypt.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90crypt.h
- *
- *  z90crypt 1.3.3 (kernel-private header)
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _Z90CRYPT_H_
-#define _Z90CRYPT_H_
-
-#include <asm/z90crypt.h>
-
-/**
- * local errno definitions
- */
-#define ENOBUFF	  129	// filp->private_data->...>work_elem_p->buffer is NULL
-#define EWORKPEND 130	// user issues ioctl while another pending
-#define ERELEASED 131	// user released while ioctl pending
-#define EQUIESCE  132	// z90crypt quiescing (no more work allowed)
-#define ETIMEOUT  133	// request timed out
-#define EUNKNOWN  134	// some unrecognized error occured (retry may succeed)
-#define EGETBUFF  135	// Error getting buffer or hardware lacks capability
-			// (retry in software)
-
-/**
- * DEPRECATED STRUCTURES
- */
-
-/**
- * This structure is DEPRECATED and the corresponding ioctl() has been
- * replaced with individual ioctl()s for each piece of data!
- * This structure will NOT survive past version 1.3.1, so switch to the
- * new ioctl()s.
- */
-#define MASK_LENGTH 64 // mask length
-struct ica_z90_status {
-	int totalcount;
-	int leedslitecount; // PCICA
-	int leeds2count;    // PCICC
-	// int PCIXCCCount; is not in struct for backward compatibility
-	int requestqWaitCount;
-	int pendingqWaitCount;
-	int totalOpenCount;
-	int cryptoDomain;
-	// status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3,
-	//         5=CEX2C
-	unsigned char status[MASK_LENGTH];
-	// qdepth: # work elements waiting for each device
-	unsigned char qdepth[MASK_LENGTH];
-};
-
-#endif /* _Z90CRYPT_H_ */
diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c
deleted file mode 100644
index be60795..0000000
--- a/drivers/s390/crypto/z90hardware.c
+++ /dev/null
@@ -1,2531 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90hardware.c
- *
- *  z90crypt 1.3.3
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <asm/uaccess.h>
-#include <linux/compiler.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include "z90crypt.h"
-#include "z90common.h"
-
-struct cca_token_hdr {
-	unsigned char  token_identifier;
-	unsigned char  version;
-	unsigned short token_length;
-	unsigned char  reserved[4];
-};
-
-#define CCA_TKN_HDR_ID_EXT 0x1E
-
-struct cca_private_ext_ME_sec {
-	unsigned char  section_identifier;
-	unsigned char  version;
-	unsigned short section_length;
-	unsigned char  private_key_hash[20];
-	unsigned char  reserved1[4];
-	unsigned char  key_format;
-	unsigned char  reserved2;
-	unsigned char  key_name_hash[20];
-	unsigned char  key_use_flags[4];
-	unsigned char  reserved3[6];
-	unsigned char  reserved4[24];
-	unsigned char  confounder[24];
-	unsigned char  exponent[128];
-	unsigned char  modulus[128];
-};
-
-#define CCA_PVT_USAGE_ALL 0x80
-
-struct cca_public_sec {
-	unsigned char  section_identifier;
-	unsigned char  version;
-	unsigned short section_length;
-	unsigned char  reserved[2];
-	unsigned short exponent_len;
-	unsigned short modulus_bit_len;
-	unsigned short modulus_byte_len;
-	unsigned char  exponent[3];
-};
-
-struct cca_private_ext_ME {
-	struct cca_token_hdr	      pvtMEHdr;
-	struct cca_private_ext_ME_sec pvtMESec;
-	struct cca_public_sec	      pubMESec;
-};
-
-struct cca_public_key {
-	struct cca_token_hdr  pubHdr;
-	struct cca_public_sec pubSec;
-};
-
-struct cca_pvt_ext_CRT_sec {
-	unsigned char  section_identifier;
-	unsigned char  version;
-	unsigned short section_length;
-	unsigned char  private_key_hash[20];
-	unsigned char  reserved1[4];
-	unsigned char  key_format;
-	unsigned char  reserved2;
-	unsigned char  key_name_hash[20];
-	unsigned char  key_use_flags[4];
-	unsigned short p_len;
-	unsigned short q_len;
-	unsigned short dp_len;
-	unsigned short dq_len;
-	unsigned short u_len;
-	unsigned short mod_len;
-	unsigned char  reserved3[4];
-	unsigned short pad_len;
-	unsigned char  reserved4[52];
-	unsigned char  confounder[8];
-};
-
-#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08
-#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40
-
-struct cca_private_ext_CRT {
-	struct cca_token_hdr	   pvtCrtHdr;
-	struct cca_pvt_ext_CRT_sec pvtCrtSec;
-	struct cca_public_sec	   pubCrtSec;
-};
-
-struct ap_status_word {
-	unsigned char q_stat_flags;
-	unsigned char response_code;
-	unsigned char reserved[2];
-};
-
-#define AP_Q_STATUS_EMPTY		0x80
-#define AP_Q_STATUS_REPLIES_WAITING	0x40
-#define AP_Q_STATUS_ARRAY_FULL		0x20
-
-#define AP_RESPONSE_NORMAL		0x00
-#define AP_RESPONSE_Q_NOT_AVAIL		0x01
-#define AP_RESPONSE_RESET_IN_PROGRESS	0x02
-#define AP_RESPONSE_DECONFIGURED	0x03
-#define AP_RESPONSE_CHECKSTOPPED	0x04
-#define AP_RESPONSE_BUSY		0x05
-#define AP_RESPONSE_Q_FULL		0x10
-#define AP_RESPONSE_NO_PENDING_REPLY	0x10
-#define AP_RESPONSE_INDEX_TOO_BIG	0x11
-#define AP_RESPONSE_NO_FIRST_PART	0x13
-#define AP_RESPONSE_MESSAGE_TOO_BIG	0x15
-
-#define AP_MAX_CDX_BITL		4
-#define AP_RQID_RESERVED_BITL	4
-#define SKIP_BITL		(AP_MAX_CDX_BITL + AP_RQID_RESERVED_BITL)
-
-struct type4_hdr {
-	unsigned char  reserved1;
-	unsigned char  msg_type_code;
-	unsigned short msg_len;
-	unsigned char  request_code;
-	unsigned char  msg_fmt;
-	unsigned short reserved2;
-};
-
-#define TYPE4_TYPE_CODE 0x04
-#define TYPE4_REQU_CODE 0x40
-
-#define TYPE4_SME_LEN 0x0188
-#define TYPE4_LME_LEN 0x0308
-#define TYPE4_SCR_LEN 0x01E0
-#define TYPE4_LCR_LEN 0x03A0
-
-#define TYPE4_SME_FMT 0x00
-#define TYPE4_LME_FMT 0x10
-#define TYPE4_SCR_FMT 0x40
-#define TYPE4_LCR_FMT 0x50
-
-struct type4_sme {
-	struct type4_hdr header;
-	unsigned char	 message[128];
-	unsigned char	 exponent[128];
-	unsigned char	 modulus[128];
-};
-
-struct type4_lme {
-	struct type4_hdr header;
-	unsigned char	 message[256];
-	unsigned char	 exponent[256];
-	unsigned char	 modulus[256];
-};
-
-struct type4_scr {
-	struct type4_hdr header;
-	unsigned char	 message[128];
-	unsigned char	 dp[72];
-	unsigned char	 dq[64];
-	unsigned char	 p[72];
-	unsigned char	 q[64];
-	unsigned char	 u[72];
-};
-
-struct type4_lcr {
-	struct type4_hdr header;
-	unsigned char	 message[256];
-	unsigned char	 dp[136];
-	unsigned char	 dq[128];
-	unsigned char	 p[136];
-	unsigned char	 q[128];
-	unsigned char	 u[136];
-};
-
-union type4_msg {
-	struct type4_sme sme;
-	struct type4_lme lme;
-	struct type4_scr scr;
-	struct type4_lcr lcr;
-};
-
-struct type84_hdr {
-	unsigned char  reserved1;
-	unsigned char  code;
-	unsigned short len;
-	unsigned char  reserved2[4];
-};
-
-#define TYPE84_RSP_CODE 0x84
-
-struct type6_hdr {
-	unsigned char reserved1;
-	unsigned char type;
-	unsigned char reserved2[2];
-	unsigned char right[4];
-	unsigned char reserved3[2];
-	unsigned char reserved4[2];
-	unsigned char apfs[4];
-	unsigned int  offset1;
-	unsigned int  offset2;
-	unsigned int  offset3;
-	unsigned int  offset4;
-	unsigned char agent_id[16];
-	unsigned char rqid[2];
-	unsigned char reserved5[2];
-	unsigned char function_code[2];
-	unsigned char reserved6[2];
-	unsigned int  ToCardLen1;
-	unsigned int  ToCardLen2;
-	unsigned int  ToCardLen3;
-	unsigned int  ToCardLen4;
-	unsigned int  FromCardLen1;
-	unsigned int  FromCardLen2;
-	unsigned int  FromCardLen3;
-	unsigned int  FromCardLen4;
-};
-
-struct CPRB {
-	unsigned char cprb_len[2];
-	unsigned char cprb_ver_id;
-	unsigned char pad_000;
-	unsigned char srpi_rtcode[4];
-	unsigned char srpi_verb;
-	unsigned char flags;
-	unsigned char func_id[2];
-	unsigned char checkpoint_flag;
-	unsigned char resv2;
-	unsigned char req_parml[2];
-	unsigned char req_parmp[4];
-	unsigned char req_datal[4];
-	unsigned char req_datap[4];
-	unsigned char rpl_parml[2];
-	unsigned char pad_001[2];
-	unsigned char rpl_parmp[4];
-	unsigned char rpl_datal[4];
-	unsigned char rpl_datap[4];
-	unsigned char ccp_rscode[2];
-	unsigned char ccp_rtcode[2];
-	unsigned char repd_parml[2];
-	unsigned char mac_data_len[2];
-	unsigned char repd_datal[4];
-	unsigned char req_pc[2];
-	unsigned char res_origin[8];
-	unsigned char mac_value[8];
-	unsigned char logon_id[8];
-	unsigned char usage_domain[2];
-	unsigned char resv3[18];
-	unsigned char svr_namel[2];
-	unsigned char svr_name[8];
-};
-
-struct type6_msg {
-	struct type6_hdr header;
-	struct CPRB	 CPRB;
-};
-
-struct type86_hdr {
-	unsigned char reserved1;
-	unsigned char type;
-	unsigned char format;
-	unsigned char reserved2;
-	unsigned char reply_code;
-	unsigned char reserved3[3];
-};
-
-#define TYPE86_RSP_CODE 0x86
-#define TYPE86_FMT2	0x02
-
-struct type86_fmt2_msg {
-	struct type86_hdr header;
-	unsigned char	  reserved[4];
-	unsigned char	  apfs[4];
-	unsigned int	  count1;
-	unsigned int	  offset1;
-	unsigned int	  count2;
-	unsigned int	  offset2;
-	unsigned int	  count3;
-	unsigned int	  offset3;
-	unsigned int	  count4;
-	unsigned int	  offset4;
-};
-
-static struct type6_hdr static_type6_hdr = {
-	0x00,
-	0x06,
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	0x00000058,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	{0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
-	 0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x50,0x44},
-	{0x00,0x00},
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000
-};
-
-static struct type6_hdr static_type6_hdrX = {
-	0x00,
-	0x06,
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	0x00000058,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	{0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x50,0x44},
-	{0x00,0x00},
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000
-};
-
-static struct CPRB static_cprb = {
-	{0x70,0x00},
-	0x41,
-	0x00,
-	{0x00,0x00,0x00,0x00},
-	0x00,
-	0x00,
-	{0x54,0x32},
-	0x01,
-	0x00,
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00},
-	{0x08,0x00},
-	{0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20}
-};
-
-struct function_and_rules_block {
-	unsigned char function_code[2];
-	unsigned char ulen[2];
-	unsigned char only_rule[8];
-};
-
-static struct function_and_rules_block static_pkd_function_and_rules = {
-	{0x50,0x44},
-	{0x0A,0x00},
-	{'P','K','C','S','-','1','.','2'}
-};
-
-static struct function_and_rules_block static_pke_function_and_rules = {
-	{0x50,0x4B},
-	{0x0A,0x00},
-	{'P','K','C','S','-','1','.','2'}
-};
-
-struct T6_keyBlock_hdr {
-	unsigned char blen[2];
-	unsigned char ulen[2];
-	unsigned char flags[2];
-};
-
-static struct T6_keyBlock_hdr static_T6_keyBlock_hdr = {
-	{0x89,0x01},
-	{0x87,0x01},
-	{0x00}
-};
-
-static struct CPRBX static_cprbx = {
-	0x00DC,
-	0x02,
-	{0x00,0x00,0x00},
-	{0x54,0x32},
-	{0x00,0x00,0x00,0x00},
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	{0x00,0x00,0x00,0x00},
-	0x00000000,
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	0x0000,
-	0x0000,
-	0x00000000,
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	0x00,
-	0x00,
-	0x0000,
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-	{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-	 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}
-};
-
-static struct function_and_rules_block static_pkd_function_and_rulesX_MCL2 = {
-	{0x50,0x44},
-	{0x00,0x0A},
-	{'P','K','C','S','-','1','.','2'}
-};
-
-static struct function_and_rules_block static_pke_function_and_rulesX_MCL2 = {
-	{0x50,0x4B},
-	{0x00,0x0A},
-	{'Z','E','R','O','-','P','A','D'}
-};
-
-static struct function_and_rules_block static_pkd_function_and_rulesX = {
-	{0x50,0x44},
-	{0x00,0x0A},
-	{'Z','E','R','O','-','P','A','D'}
-};
-
-static struct function_and_rules_block static_pke_function_and_rulesX = {
-	{0x50,0x4B},
-	{0x00,0x0A},
-	{'M','R','P',' ',' ',' ',' ',' '}
-};
-
-static unsigned char static_PKE_function_code[2] = {0x50, 0x4B};
-
-struct T6_keyBlock_hdrX {
-	unsigned short blen;
-	unsigned short ulen;
-	unsigned char flags[2];
-};
-
-static unsigned char static_pad[256] = {
-0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
-0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
-0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
-0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
-0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
-0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
-0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
-0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
-0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
-0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
-0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
-0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
-0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
-0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
-0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
-0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
-};
-
-static struct cca_private_ext_ME static_pvt_me_key = {
-	{
-		0x1E,
-		0x00,
-		0x0183,
-		{0x00,0x00,0x00,0x00}
-	},
-
-	{
-		0x02,
-		0x00,
-		0x016C,
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00},
-		0x00,
-		0x00,
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00},
-		{0x80,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00},
-		{0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}
-	},
-
-	{
-		0x04,
-		0x00,
-		0x000F,
-		{0x00,0x00},
-		0x0003,
-		0x0000,
-		0x0000,
-		{0x01,0x00,0x01}
-	}
-};
-
-static struct cca_public_key static_public_key = {
-	{
-		0x1E,
-		0x00,
-		0x0000,
-		{0x00,0x00,0x00,0x00}
-	},
-
-	{
-		0x04,
-		0x00,
-		0x0000,
-		{0x00,0x00},
-		0x0000,
-		0x0000,
-		0x0000,
-		{0x01,0x00,0x01}
-	}
-};
-
-#define FIXED_TYPE6_ME_LEN 0x0000025F
-
-#define FIXED_TYPE6_ME_EN_LEN 0x000000F0
-
-#define FIXED_TYPE6_ME_LENX 0x000002CB
-
-#define FIXED_TYPE6_ME_EN_LENX 0x0000015C
-
-static struct cca_public_sec static_cca_pub_sec = {
-	0x04,
-	0x00,
-	0x000f,
-	{0x00,0x00},
-	0x0003,
-	0x0000,
-	0x0000,
-	{0x01,0x00,0x01}
-};
-
-#define FIXED_TYPE6_CR_LEN 0x00000177
-
-#define FIXED_TYPE6_CR_LENX 0x000001E3
-
-#define MAX_RESPONSE_SIZE 0x00000710
-
-#define MAX_RESPONSEX_SIZE 0x0000077C
-
-#define RESPONSE_CPRB_SIZE  0x000006B8
-#define RESPONSE_CPRBX_SIZE 0x00000724
-
-struct type50_hdr {
-	u8    reserved1;
-	u8    msg_type_code;
-	u16   msg_len;
-	u8    reserved2;
-	u8    ignored;
-	u16   reserved3;
-};
-
-#define TYPE50_TYPE_CODE 0x50
-
-#define TYPE50_MEB1_LEN (sizeof(struct type50_meb1_msg))
-#define TYPE50_MEB2_LEN (sizeof(struct type50_meb2_msg))
-#define TYPE50_CRB1_LEN (sizeof(struct type50_crb1_msg))
-#define TYPE50_CRB2_LEN (sizeof(struct type50_crb2_msg))
-
-#define TYPE50_MEB1_FMT 0x0001
-#define TYPE50_MEB2_FMT 0x0002
-#define TYPE50_CRB1_FMT 0x0011
-#define TYPE50_CRB2_FMT 0x0012
-
-struct type50_meb1_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			exponent[128];
-	u8			modulus[128];
-	u8			message[128];
-};
-
-struct type50_meb2_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			exponent[256];
-	u8			modulus[256];
-	u8			message[256];
-};
-
-struct type50_crb1_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			p[64];
-	u8			q[64];
-	u8			dp[64];
-	u8			dq[64];
-	u8			u[64];
-	u8			message[128];
-};
-
-struct type50_crb2_msg {
-	struct type50_hdr	header;
-	u16			keyblock_type;
-	u8			reserved[6];
-	u8			p[128];
-	u8			q[128];
-	u8			dp[128];
-	u8			dq[128];
-	u8			u[128];
-	u8			message[256];
-};
-
-union type50_msg {
-	struct type50_meb1_msg meb1;
-	struct type50_meb2_msg meb2;
-	struct type50_crb1_msg crb1;
-	struct type50_crb2_msg crb2;
-};
-
-struct type80_hdr {
-	u8	reserved1;
-	u8	type;
-	u16	len;
-	u8	code;
-	u8	reserved2[3];
-	u8	reserved3[8];
-};
-
-#define TYPE80_RSP_CODE 0x80
-
-struct error_hdr {
-	unsigned char reserved1;
-	unsigned char type;
-	unsigned char reserved2[2];
-	unsigned char reply_code;
-	unsigned char reserved3[3];
-};
-
-#define TYPE82_RSP_CODE 0x82
-#define TYPE88_RSP_CODE 0x88
-
-#define REP82_ERROR_MACHINE_FAILURE  0x10
-#define REP82_ERROR_PREEMPT_FAILURE  0x12
-#define REP82_ERROR_CHECKPT_FAILURE  0x14
-#define REP82_ERROR_MESSAGE_TYPE     0x20
-#define REP82_ERROR_INVALID_COMM_CD  0x21
-#define REP82_ERROR_INVALID_MSG_LEN  0x23
-#define REP82_ERROR_RESERVD_FIELD    0x24
-#define REP82_ERROR_FORMAT_FIELD     0x29
-#define REP82_ERROR_INVALID_COMMAND  0x30
-#define REP82_ERROR_MALFORMED_MSG    0x40
-#define REP82_ERROR_RESERVED_FIELDO  0x50
-#define REP82_ERROR_WORD_ALIGNMENT   0x60
-#define REP82_ERROR_MESSAGE_LENGTH   0x80
-#define REP82_ERROR_OPERAND_INVALID  0x82
-#define REP82_ERROR_OPERAND_SIZE     0x84
-#define REP82_ERROR_EVEN_MOD_IN_OPND 0x85
-#define REP82_ERROR_RESERVED_FIELD   0x88
-#define REP82_ERROR_TRANSPORT_FAIL   0x90
-#define REP82_ERROR_PACKET_TRUNCATED 0xA0
-#define REP82_ERROR_ZERO_BUFFER_LEN  0xB0
-
-#define REP88_ERROR_MODULE_FAILURE   0x10
-#define REP88_ERROR_MODULE_TIMEOUT   0x11
-#define REP88_ERROR_MODULE_NOTINIT   0x13
-#define REP88_ERROR_MODULE_NOTAVAIL  0x14
-#define REP88_ERROR_MODULE_DISABLED  0x15
-#define REP88_ERROR_MODULE_IN_DIAGN  0x17
-#define REP88_ERROR_FASTPATH_DISABLD 0x19
-#define REP88_ERROR_MESSAGE_TYPE     0x20
-#define REP88_ERROR_MESSAGE_MALFORMD 0x22
-#define REP88_ERROR_MESSAGE_LENGTH   0x23
-#define REP88_ERROR_RESERVED_FIELD   0x24
-#define REP88_ERROR_KEY_TYPE         0x34
-#define REP88_ERROR_INVALID_KEY      0x82
-#define REP88_ERROR_OPERAND          0x84
-#define REP88_ERROR_OPERAND_EVEN_MOD 0x85
-
-#define CALLER_HEADER 12
-
-static inline int
-testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	llgfr	0,%4		\n"
-	 "	slgr	1,1		\n"
-	 "	lgr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "	lgr	%3,2		\n"
-	 "	srl	%3,24		\n"
-	 "	sll	2,24		\n"
-	 "	srl	2,24		\n"
-	 "	lgr	%2,2		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h5		\n"
-	 "	jg	2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	8		\n"
-	 "	.quad	0b,3b		\n"
-	 "	.quad	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat),"=d" (*q_depth), "=d" (*dev_type)
-	 :"d" (q_nr), "K" (DEV_TSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#else
-	("	lr	0,%4		\n"
-	 "	slr	1,1		\n"
-	 "	lr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "	lr	%3,2		\n"
-	 "	srl	%3,24		\n"
-	 "	sll	2,24		\n"
-	 "	srl	2,24		\n"
-	 "	lr	%2,2		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h5		\n"
-	 "	bras	1,4f		\n"
-	 "	.long	2b		\n"
-	 "4:				\n"
-	 "	l	1,0(1)		\n"
-	 "	br	1		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	4		\n"
-	 "	.long	0b,3b		\n"
-	 "	.long	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat),"=d" (*q_depth), "=d" (*dev_type)
-	 :"d" (q_nr), "K" (DEV_TSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#endif
-	return ccode;
-}
-
-static inline int
-resetq(int q_nr, struct ap_status_word *stat_p)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	llgfr	0,%2		\n"
-	 "	lghi	1,1		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slgr	1,1		\n"
-	 "	lgr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h3		\n"
-	 "	jg	2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	8		\n"
-	 "	.quad	0b,3b		\n"
-	 "	.quad	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat_p)
-	 :"d" (q_nr), "K" (DEV_RSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#else
-	("	lr	0,%2		\n"
-	 "	lhi	1,1		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slr	1,1		\n"
-	 "	lr	2,1		\n"
-	 "0:	.long	0xb2af0000	\n"
-	 "1:	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h3		\n"
-	 "	bras	1,4f		\n"
-	 "	.long	2b		\n"
-	 "4:				\n"
-	 "	l	1,0(1)		\n"
-	 "	br	1		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	4		\n"
-	 "	.long	0b,3b		\n"
-	 "	.long	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat_p)
-	 :"d" (q_nr), "K" (DEV_RSQ_EXCEPTION)
-	 :"cc","0","1","2","memory");
-#endif
-	return ccode;
-}
-
-static inline int
-sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	lgr	6,%3		\n"
-	 "	llgfr	7,%2		\n"
-	 "	llgt	0,0(6)		\n"
-	 "	lghi	1,64		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	la	6,4(6)		\n"
-	 "	llgt	2,0(6)		\n"
-	 "	llgt	3,4(6)		\n"
-	 "	la	6,8(6)		\n"
-	 "	slr	1,1		\n"
-	 "0:	.long	0xb2ad0026	\n"
-	 "1:	brc	2,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h4		\n"
-	 "	jg	2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	8		\n"
-	 "	.quad	0b,3b		\n"
-	 "	.quad	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat)
-	 :"d" (msg_len),"a" (msg_ext), "K" (DEV_SEN_EXCEPTION)
-	 :"cc","0","1","2","3","6","7","memory");
-#else
-	("	lr	6,%3		\n"
-	 "	lr	7,%2		\n"
-	 "	l	0,0(6)		\n"
-	 "	lhi	1,64		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	la	6,4(6)		\n"
-	 "	l	2,0(6)		\n"
-	 "	l	3,4(6)		\n"
-	 "	la	6,8(6)		\n"
-	 "	slr	1,1		\n"
-	 "0:	.long	0xb2ad0026	\n"
-	 "1:	brc	2,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi	%0,%h4		\n"
-	 "	bras	1,4f		\n"
-	 "	.long	2b		\n"
-	 "4:				\n"
-	 "	l	1,0(1)		\n"
-	 "	br	1		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "	.align	4		\n"
-	 "	.long	0b,3b		\n"
-	 "	.long	1b,3b		\n"
-	 ".previous"
-	 :"=d" (ccode),"=d" (*stat)
-	 :"d" (msg_len),"a" (msg_ext), "K" (DEV_SEN_EXCEPTION)
-	 :"cc","0","1","2","3","6","7","memory");
-#endif
-	return ccode;
-}
-
-static inline int
-rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id,
-    struct ap_status_word *st)
-{
-	int ccode;
-
-	asm volatile
-#ifdef CONFIG_64BIT
-	("	llgfr	0,%2		\n"
-	 "	lgr	3,%4		\n"
-	 "	lgr	6,%3		\n"
-	 "	llgfr	7,%5		\n"
-	 "	lghi	1,128		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slgr	1,1		\n"
-	 "	lgr	2,1		\n"
-	 "	lgr	4,1		\n"
-	 "	lgr	5,1		\n"
-	 "0:	.long	0xb2ae0046	\n"
-	 "1:	brc	2,0b		\n"
-	 "	brc	4,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	iihh	%0,0		\n"
-	 "	iihl	%0,0		\n"
-	 "	lgr	%1,1		\n"
-	 "	st	4,0(3)		\n"
-	 "	st	5,4(3)		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi   %0,%h6		\n"
-	 "	jg    2b		\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "   .align	8		\n"
-	 "   .quad	0b,3b		\n"
-	 "   .quad	1b,3b		\n"
-	 ".previous"
-	 :"=d"(ccode),"=d"(*st)
-	 :"d" (q_nr), "d" (rsp), "d" (id), "d" (buff_l), "K" (DEV_REC_EXCEPTION)
-	 :"cc","0","1","2","3","4","5","6","7","memory");
-#else
-	("	lr	0,%2		\n"
-	 "	lr	3,%4		\n"
-	 "	lr	6,%3		\n"
-	 "	lr	7,%5		\n"
-	 "	lhi	1,128		\n"
-	 "	sll	1,24		\n"
-	 "	or	0,1		\n"
-	 "	slr	1,1		\n"
-	 "	lr	2,1		\n"
-	 "	lr	4,1		\n"
-	 "	lr	5,1		\n"
-	 "0:	.long	0xb2ae0046	\n"
-	 "1:	brc	2,0b		\n"
-	 "	brc	4,0b		\n"
-	 "	ipm	%0		\n"
-	 "	srl	%0,28		\n"
-	 "	lr	%1,1		\n"
-	 "	st	4,0(3)		\n"
-	 "	st	5,4(3)		\n"
-	 "2:				\n"
-	 ".section .fixup,\"ax\"	\n"
-	 "3:				\n"
-	 "	lhi   %0,%h6		\n"
-	 "	bras  1,4f		\n"
-	 "	.long 2b		\n"
-	 "4:				\n"
-	 "	l     1,0(1)		\n"
-	 "	br    1			\n"
-	 ".previous			\n"
-	 ".section __ex_table,\"a\"	\n"
-	 "   .align	4		\n"
-	 "   .long	0b,3b		\n"
-	 "   .long	1b,3b		\n"
-	 ".previous"
-	 :"=d"(ccode),"=d"(*st)
-	 :"d" (q_nr), "d" (rsp), "d" (id), "d" (buff_l), "K" (DEV_REC_EXCEPTION)
-	 :"cc","0","1","2","3","4","5","6","7","memory");
-#endif
-	return ccode;
-}
-
-static inline void
-itoLe2(int *i_p, unsigned char *lechars)
-{
-	*lechars       = *((unsigned char *) i_p + sizeof(int) - 1);
-	*(lechars + 1) = *((unsigned char *) i_p + sizeof(int) - 2);
-}
-
-static inline void
-le2toI(unsigned char *lechars, int *i_p)
-{
-	unsigned char *ic_p;
-	*i_p = 0;
-	ic_p = (unsigned char *) i_p;
-	*(ic_p + 2) = *(lechars + 1);
-	*(ic_p + 3) = *(lechars);
-}
-
-static inline int
-is_empty(unsigned char *ptr, int len)
-{
-	return !memcmp(ptr, (unsigned char *) &static_pvt_me_key+60, len);
-}
-
-enum hdstat
-query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type)
-{
-	int q_nr, i, t_depth, t_dev_type;
-	enum devstat ccode;
-	struct ap_status_word stat_word;
-	enum hdstat stat;
-	int break_out;
-
-	q_nr = (deviceNr << SKIP_BITL) + cdx;
-	stat = HD_BUSY;
-	ccode = testq(q_nr, &t_depth, &t_dev_type, &stat_word);
-	PDEBUG("ccode %d response_code %02X\n", ccode, stat_word.response_code);
-	break_out = 0;
-	for (i = 0; i < resetNr; i++) {
-		if (ccode > 3) {
-			PRINTKC("Exception testing device %d\n", i);
-			return HD_TSQ_EXCEPTION;
-		}
-		switch (ccode) {
-		case 0:
-			PDEBUG("t_dev_type %d\n", t_dev_type);
-			break_out = 1;
-			stat = HD_ONLINE;
-			*q_depth = t_depth + 1;
-			switch (t_dev_type) {
-			case PCICA_HW:
-				*dev_type = PCICA;
-				break;
-			case PCICC_HW:
-				*dev_type = PCICC;
-				break;
-			case PCIXCC_HW:
-				*dev_type = PCIXCC_UNK;
-				break;
-			case CEX2C_HW:
-				*dev_type = CEX2C;
-				break;
-			case CEX2A_HW:
-				*dev_type = CEX2A;
-				break;
-			default:
-				*dev_type = NILDEV;
-				break;
-			}
-			PDEBUG("available device %d: Q depth = %d, dev "
-			       "type = %d, stat = %02X%02X%02X%02X\n",
-			       deviceNr, *q_depth, *dev_type,
-			       stat_word.q_stat_flags,
-			       stat_word.response_code,
-			       stat_word.reserved[0],
-			       stat_word.reserved[1]);
-			break;
-		case 3:
-			switch (stat_word.response_code) {
-			case AP_RESPONSE_NORMAL:
-				stat = HD_ONLINE;
-				break_out = 1;
-				*q_depth = t_depth + 1;
-				*dev_type = t_dev_type;
-				PDEBUG("cc3, available device "
-				       "%d: Q depth = %d, dev "
-				       "type = %d, stat = "
-				       "%02X%02X%02X%02X\n",
-				       deviceNr, *q_depth,
-				       *dev_type,
-				       stat_word.q_stat_flags,
-				       stat_word.response_code,
-				       stat_word.reserved[0],
-				       stat_word.reserved[1]);
-				break;
-			case AP_RESPONSE_Q_NOT_AVAIL:
-				stat = HD_NOT_THERE;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_RESET_IN_PROGRESS:
-				PDEBUG("device %d in reset\n",
-				       deviceNr);
-				break;
-			case AP_RESPONSE_DECONFIGURED:
-				stat = HD_DECONFIGURED;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_CHECKSTOPPED:
-				stat = HD_CHECKSTOPPED;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_BUSY:
-				PDEBUG("device %d busy\n",
-				       deviceNr);
-				break;
-			default:
-				break;
-			}
-			break;
-		default:
-			stat = HD_NOT_THERE;
-			break_out = 1;
-			break;
-		}
-		if (break_out)
-			break;
-
-		udelay(5);
-
-		ccode = testq(q_nr, &t_depth, &t_dev_type, &stat_word);
-	}
-	return stat;
-}
-
-enum devstat
-reset_device(int deviceNr, int cdx, int resetNr)
-{
-	int q_nr, ccode = 0, dummy_qdepth, dummy_devType, i;
-	struct ap_status_word stat_word;
-	enum devstat stat;
-	int break_out;
-
-	q_nr = (deviceNr << SKIP_BITL) + cdx;
-	stat = DEV_GONE;
-	ccode = resetq(q_nr, &stat_word);
-	if (ccode > 3)
-		return DEV_RSQ_EXCEPTION;
-
-	break_out = 0;
-	for (i = 0; i < resetNr; i++) {
-		switch (ccode) {
-		case 0:
-			stat = DEV_ONLINE;
-			if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY)
-				break_out = 1;
-			break;
-		case 3:
-			switch (stat_word.response_code) {
-			case AP_RESPONSE_NORMAL:
-				stat = DEV_ONLINE;
-				if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY)
-					break_out = 1;
-				break;
-			case AP_RESPONSE_Q_NOT_AVAIL:
-			case AP_RESPONSE_DECONFIGURED:
-			case AP_RESPONSE_CHECKSTOPPED:
-				stat = DEV_GONE;
-				break_out = 1;
-				break;
-			case AP_RESPONSE_RESET_IN_PROGRESS:
-			case AP_RESPONSE_BUSY:
-			default:
-				break;
-			}
-			break;
-		default:
-			stat = DEV_GONE;
-			break_out = 1;
-			break;
-		}
-		if (break_out == 1)
-			break;
-		udelay(5);
-
-		ccode = testq(q_nr, &dummy_qdepth, &dummy_devType, &stat_word);
-		if (ccode > 3) {
-			stat = DEV_TSQ_EXCEPTION;
-			break;
-		}
-	}
-	PDEBUG("Number of testq's needed for reset: %d\n", i);
-
-	if (i >= resetNr) {
-	  stat = DEV_GONE;
-	}
-
-	return stat;
-}
-
-#ifdef DEBUG_HYDRA_MSGS
-static inline void
-print_buffer(unsigned char *buffer, int bufflen)
-{
-	int i;
-	for (i = 0; i < bufflen; i += 16) {
-		PRINTK("%04X: %02X%02X%02X%02X %02X%02X%02X%02X "
-		       "%02X%02X%02X%02X %02X%02X%02X%02X\n", i,
-		       buffer[i+0], buffer[i+1], buffer[i+2], buffer[i+3],
-		       buffer[i+4], buffer[i+5], buffer[i+6], buffer[i+7],
-		       buffer[i+8], buffer[i+9], buffer[i+10], buffer[i+11],
-		       buffer[i+12], buffer[i+13], buffer[i+14], buffer[i+15]);
-	}
-}
-#endif
-
-enum devstat
-send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext)
-{
-	struct ap_status_word stat_word;
-	enum devstat stat;
-	int ccode;
-	u32 *q_nr_p = (u32 *)msg_ext;
-
-	*q_nr_p = (dev_nr << SKIP_BITL) + cdx;
-	PDEBUG("msg_len passed to sen: %d\n", msg_len);
-	PDEBUG("q number passed to sen: %02x%02x%02x%02x\n",
-	       msg_ext[0], msg_ext[1], msg_ext[2], msg_ext[3]);
-	stat = DEV_GONE;
-
-#ifdef DEBUG_HYDRA_MSGS
-	PRINTK("Request header: %02X%02X%02X%02X %02X%02X%02X%02X "
-	       "%02X%02X%02X%02X\n",
-	       msg_ext[0], msg_ext[1], msg_ext[2], msg_ext[3],
-	       msg_ext[4], msg_ext[5], msg_ext[6], msg_ext[7],
-	       msg_ext[8], msg_ext[9], msg_ext[10], msg_ext[11]);
-	print_buffer(msg_ext+CALLER_HEADER, msg_len);
-#endif
-
-	ccode = sen(msg_len, msg_ext, &stat_word);
-	if (ccode > 3)
-		return DEV_SEN_EXCEPTION;
-
-	PDEBUG("nq cc: %u, st: %02x%02x%02x%02x\n",
-	       ccode, stat_word.q_stat_flags, stat_word.response_code,
-	       stat_word.reserved[0], stat_word.reserved[1]);
-	switch (ccode) {
-	case 0:
-		stat = DEV_ONLINE;
-		break;
-	case 1:
-		stat = DEV_GONE;
-		break;
-	case 3:
-		switch (stat_word.response_code) {
-		case AP_RESPONSE_NORMAL:
-			stat = DEV_ONLINE;
-			break;
-		case AP_RESPONSE_Q_FULL:
-			stat = DEV_QUEUE_FULL;
-			break;
-		default:
-			stat = DEV_GONE;
-			break;
-		}
-		break;
-	default:
-		stat = DEV_GONE;
-		break;
-	}
-
-	return stat;
-}
-
-enum devstat
-receive_from_AP(int dev_nr, int cdx, int resplen, unsigned char *resp,
-		unsigned char *psmid)
-{
-	int ccode;
-	struct ap_status_word stat_word;
-	enum devstat stat;
-
-	memset(resp, 0x00, 8);
-
-	ccode = rec((dev_nr << SKIP_BITL) + cdx, resplen, resp, psmid,
-		    &stat_word);
-	if (ccode > 3)
-		return DEV_REC_EXCEPTION;
-
-	PDEBUG("dq cc: %u, st: %02x%02x%02x%02x\n",
-	       ccode, stat_word.q_stat_flags, stat_word.response_code,
-	       stat_word.reserved[0], stat_word.reserved[1]);
-
-	stat = DEV_GONE;
-	switch (ccode) {
-	case 0:
-		stat = DEV_ONLINE;
-#ifdef DEBUG_HYDRA_MSGS
-		print_buffer(resp, resplen);
-#endif
-		break;
-	case 3:
-		switch (stat_word.response_code) {
-		case AP_RESPONSE_NORMAL:
-			stat = DEV_ONLINE;
-			break;
-		case AP_RESPONSE_NO_PENDING_REPLY:
-			if (stat_word.q_stat_flags & AP_Q_STATUS_EMPTY)
-				stat = DEV_EMPTY;
-			else
-				stat = DEV_NO_WORK;
-			break;
-		case AP_RESPONSE_INDEX_TOO_BIG:
-		case AP_RESPONSE_NO_FIRST_PART:
-		case AP_RESPONSE_MESSAGE_TOO_BIG:
-			stat = DEV_BAD_MESSAGE;
-			break;
-		default:
-			break;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return stat;
-}
-
-static inline int
-pad_msg(unsigned char *buffer, int  totalLength, int msgLength)
-{
-	int pad_len;
-
-	for (pad_len = 0; pad_len < (totalLength - msgLength); pad_len++)
-		if (buffer[pad_len] != 0x00)
-			break;
-	pad_len -= 3;
-	if (pad_len < 8)
-		return SEN_PAD_ERROR;
-
-	buffer[0] = 0x00;
-	buffer[1] = 0x02;
-
-	memcpy(buffer+2, static_pad, pad_len);
-
-	buffer[pad_len + 2] = 0x00;
-
-	return 0;
-}
-
-static inline int
-is_common_public_key(unsigned char *key, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		if (key[i])
-			break;
-	key += i;
-	len -= i;
-	if (((len == 1) && (key[0] == 3)) ||
-	    ((len == 3) && (key[0] == 1) && (key[1] == 0) && (key[2] == 1)))
-		return 1;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type4MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p,
-			   union type4_msg *z90cMsg_p)
-{
-	int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len;
-	unsigned char *mod_tgt, *exp_tgt, *inp_tgt;
-	union type4_msg *tmp_type4_msg;
-
-	mod_len = icaMex_p->inputdatalength;
-
-	msg_size = ((mod_len <= 128) ? TYPE4_SME_LEN : TYPE4_LME_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, msg_size);
-
-	tmp_type4_msg = (union type4_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type4_msg->sme.header.msg_type_code = TYPE4_TYPE_CODE;
-	tmp_type4_msg->sme.header.request_code = TYPE4_REQU_CODE;
-
-	if (mod_len <= 128) {
-		tmp_type4_msg->sme.header.msg_fmt = TYPE4_SME_FMT;
-		tmp_type4_msg->sme.header.msg_len = TYPE4_SME_LEN;
-		mod_tgt = tmp_type4_msg->sme.modulus;
-		mod_tgt_len = sizeof(tmp_type4_msg->sme.modulus);
-		exp_tgt = tmp_type4_msg->sme.exponent;
-		exp_tgt_len = sizeof(tmp_type4_msg->sme.exponent);
-		inp_tgt = tmp_type4_msg->sme.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->sme.message);
-	} else {
-		tmp_type4_msg->lme.header.msg_fmt = TYPE4_LME_FMT;
-		tmp_type4_msg->lme.header.msg_len = TYPE4_LME_LEN;
-		mod_tgt = tmp_type4_msg->lme.modulus;
-		mod_tgt_len = sizeof(tmp_type4_msg->lme.modulus);
-		exp_tgt = tmp_type4_msg->lme.exponent;
-		exp_tgt_len = sizeof(tmp_type4_msg->lme.exponent);
-		inp_tgt = tmp_type4_msg->lme.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->lme.message);
-	}
-
-	mod_tgt += (mod_tgt_len - mod_len);
-	if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(mod_tgt, mod_len))
-		return SEN_USER_ERROR;
-	exp_tgt += (exp_tgt_len - mod_len);
-	if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(exp_tgt, mod_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = msg_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type4CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p,
-			   int *z90cMsg_l_p, union type4_msg *z90cMsg_p)
-{
-	int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len,
-	    dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len;
-	unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt;
-	union type4_msg *tmp_type4_msg;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = mod_len / 2 + 8;
-
-	tmp_size = ((mod_len <= 128) ? TYPE4_SCR_LEN : TYPE4_LCR_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-
-	tmp_type4_msg = (union type4_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type4_msg->scr.header.msg_type_code = TYPE4_TYPE_CODE;
-	tmp_type4_msg->scr.header.request_code = TYPE4_REQU_CODE;
-	if (mod_len <= 128) {
-		tmp_type4_msg->scr.header.msg_fmt = TYPE4_SCR_FMT;
-		tmp_type4_msg->scr.header.msg_len = TYPE4_SCR_LEN;
-		p_tgt = tmp_type4_msg->scr.p;
-		p_tgt_len = sizeof(tmp_type4_msg->scr.p);
-		q_tgt = tmp_type4_msg->scr.q;
-		q_tgt_len = sizeof(tmp_type4_msg->scr.q);
-		dp_tgt = tmp_type4_msg->scr.dp;
-		dp_tgt_len = sizeof(tmp_type4_msg->scr.dp);
-		dq_tgt = tmp_type4_msg->scr.dq;
-		dq_tgt_len = sizeof(tmp_type4_msg->scr.dq);
-		u_tgt = tmp_type4_msg->scr.u;
-		u_tgt_len = sizeof(tmp_type4_msg->scr.u);
-		inp_tgt = tmp_type4_msg->scr.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->scr.message);
-	} else {
-		tmp_type4_msg->lcr.header.msg_fmt = TYPE4_LCR_FMT;
-		tmp_type4_msg->lcr.header.msg_len = TYPE4_LCR_LEN;
-		p_tgt = tmp_type4_msg->lcr.p;
-		p_tgt_len = sizeof(tmp_type4_msg->lcr.p);
-		q_tgt = tmp_type4_msg->lcr.q;
-		q_tgt_len = sizeof(tmp_type4_msg->lcr.q);
-		dp_tgt = tmp_type4_msg->lcr.dp;
-		dp_tgt_len = sizeof(tmp_type4_msg->lcr.dp);
-		dq_tgt = tmp_type4_msg->lcr.dq;
-		dq_tgt_len = sizeof(tmp_type4_msg->lcr.dq);
-		u_tgt = tmp_type4_msg->lcr.u;
-		u_tgt_len = sizeof(tmp_type4_msg->lcr.u);
-		inp_tgt = tmp_type4_msg->lcr.message;
-		inp_tgt_len = sizeof(tmp_type4_msg->lcr.message);
-	}
-
-	p_tgt += (p_tgt_len - long_len);
-	if (copy_from_user(p_tgt, icaMsg_p->np_prime, long_len))
-		return SEN_RELEASED;
-	if (is_empty(p_tgt, long_len))
-		return SEN_USER_ERROR;
-	q_tgt += (q_tgt_len - short_len);
-	if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(q_tgt, short_len))
-		return SEN_USER_ERROR;
-	dp_tgt += (dp_tgt_len - long_len);
-	if (copy_from_user(dp_tgt, icaMsg_p->bp_key, long_len))
-		return SEN_RELEASED;
-	if (is_empty(dp_tgt, long_len))
-		return SEN_USER_ERROR;
-	dq_tgt += (dq_tgt_len - short_len);
-	if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(dq_tgt, short_len))
-		return SEN_USER_ERROR;
-	u_tgt += (u_tgt_len - long_len);
-	if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv, long_len))
-		return SEN_RELEASED;
-	if (is_empty(u_tgt, long_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type6MEX_de_msg(struct ica_rsa_modexpo *icaMsg_p, int cdx,
-			      int *z90cMsg_l_p, struct type6_msg *z90cMsg_p)
-{
-	int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l;
-	unsigned char *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRB *cprb_p;
-	struct cca_private_ext_ME *key_p;
-	static int deprecated_msg_count = 0;
-
-	mod_len = icaMsg_p->inputdatalength;
-	tmp_size = FIXED_TYPE6_ME_LEN + mod_len;
-	total_CPRB_len = tmp_size - sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRB);
-	tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-
-	temp = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(temp, &static_type6_hdr, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)temp;
-	tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4);
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE;
-
-	temp += sizeof(struct type6_hdr);
-	memcpy(temp, &static_cprb, sizeof(struct CPRB));
-	cprb_p = (struct CPRB *) temp;
-	cprb_p->usage_domain[0]= (unsigned char)cdx;
-	itoLe2(&parmBlock_l, cprb_p->req_parml);
-	itoLe2((int *)&(tp6Hdr_p->FromCardLen1), cprb_p->rpl_parml);
-
-	temp += sizeof(struct CPRB);
-	memcpy(temp, &static_pkd_function_and_rules,
-	       sizeof(struct function_and_rules_block));
-
-	temp += sizeof(struct function_and_rules_block);
-	vud_len = 2 + icaMsg_p->inputdatalength;
-	itoLe2(&vud_len, temp);
-
-	temp += 2;
-	if (copy_from_user(temp, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-
-	temp += mod_len;
-	memcpy(temp, &static_T6_keyBlock_hdr, sizeof(struct T6_keyBlock_hdr));
-
-	temp += sizeof(struct T6_keyBlock_hdr);
-	memcpy(temp, &static_pvt_me_key, sizeof(struct cca_private_ext_ME));
-	key_p = (struct cca_private_ext_ME *)temp;
-	temp = key_p->pvtMESec.exponent + sizeof(key_p->pvtMESec.exponent)
-	       - mod_len;
-	if (copy_from_user(temp, icaMsg_p->b_key, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-
-	if (is_common_public_key(temp, mod_len)) {
-		if (deprecated_msg_count < 20) {
-			PRINTK("Common public key used for modex decrypt\n");
-			deprecated_msg_count++;
-			if (deprecated_msg_count == 20)
-				PRINTK("No longer issuing messages about common"
-				       " public key for modex decrypt.\n");
-		}
-		return SEN_NOT_AVAIL;
-	}
-
-	temp = key_p->pvtMESec.modulus + sizeof(key_p->pvtMESec.modulus)
-	       - mod_len;
-	if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-
-	key_p->pubMESec.modulus_bit_len = 8 * mod_len;
-
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type6MEX_en_msg(struct ica_rsa_modexpo *icaMsg_p, int cdx,
-			      int *z90cMsg_l_p, struct type6_msg *z90cMsg_p)
-{
-	int mod_len, vud_len, exp_len, key_len;
-	int pad_len, tmp_size, total_CPRB_len, parmBlock_l, i;
-	unsigned char *temp_exp, *exp_p, *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRB *cprb_p;
-	struct cca_public_key *key_p;
-	struct T6_keyBlock_hdr *keyb_p;
-
-	temp_exp = kmalloc(256, GFP_KERNEL);
-	if (!temp_exp)
-		return EGETBUFF;
-	mod_len = icaMsg_p->inputdatalength;
-	if (copy_from_user(temp_exp, icaMsg_p->b_key, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(temp_exp, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-
-	exp_p = temp_exp;
-	for (i = 0; i < mod_len; i++)
-		if (exp_p[i])
-			break;
-	if (i >= mod_len) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-
-	exp_len = mod_len - i;
-	exp_p += i;
-
-	PDEBUG("exp_len after computation: %08x\n", exp_len);
-	tmp_size = FIXED_TYPE6_ME_EN_LEN + 2 * mod_len + exp_len;
-	total_CPRB_len = tmp_size - sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRB);
-	tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER;
-
-	vud_len = 2 + mod_len;
-	memset(z90cMsg_p, 0, tmp_size);
-
-	temp = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(temp, &static_type6_hdr, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)temp;
-	tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4);
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE;
-	memcpy(tp6Hdr_p->function_code, static_PKE_function_code,
-	       sizeof(static_PKE_function_code));
-	temp += sizeof(struct type6_hdr);
-	memcpy(temp, &static_cprb, sizeof(struct CPRB));
-	cprb_p = (struct CPRB *) temp;
-	cprb_p->usage_domain[0]= (unsigned char)cdx;
-	itoLe2((int *)&(tp6Hdr_p->FromCardLen1), cprb_p->rpl_parml);
-	temp += sizeof(struct CPRB);
-	memcpy(temp, &static_pke_function_and_rules,
-		 sizeof(struct function_and_rules_block));
-	temp += sizeof(struct function_and_rules_block);
-	temp += 2;
-	if (copy_from_user(temp, icaMsg_p->inputdata, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(temp, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	if ((temp[0] != 0x00) || (temp[1] != 0x02)) {
-		kfree(temp_exp);
-		return SEN_NOT_AVAIL;
-	}
-	for (i = 2; i < mod_len; i++)
-		if (temp[i] == 0x00)
-			break;
-	if ((i < 9) || (i > (mod_len - 2))) {
-		kfree(temp_exp);
-		return SEN_NOT_AVAIL;
-	}
-	pad_len = i + 1;
-	vud_len = mod_len - pad_len;
-	memmove(temp, temp+pad_len, vud_len);
-	temp -= 2;
-	vud_len += 2;
-	itoLe2(&vud_len, temp);
-	temp += (vud_len);
-	keyb_p = (struct T6_keyBlock_hdr *)temp;
-	temp += sizeof(struct T6_keyBlock_hdr);
-	memcpy(temp, &static_public_key, sizeof(static_public_key));
-	key_p = (struct cca_public_key *)temp;
-	temp = key_p->pubSec.exponent;
-	memcpy(temp, exp_p, exp_len);
-	kfree(temp_exp);
-	temp += exp_len;
-	if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-	key_p->pubSec.modulus_bit_len = 8 * mod_len;
-	key_p->pubSec.modulus_byte_len = mod_len;
-	key_p->pubSec.exponent_len = exp_len;
-	key_p->pubSec.section_length = CALLER_HEADER + mod_len + exp_len;
-	key_len = key_p->pubSec.section_length + sizeof(struct cca_token_hdr);
-	key_p->pubHdr.token_length = key_len;
-	key_len += 4;
-	itoLe2(&key_len, keyb_p->ulen);
-	key_len += 2;
-	itoLe2(&key_len, keyb_p->blen);
-	parmBlock_l -= pad_len;
-	itoLe2(&parmBlock_l, cprb_p->req_parml);
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type6CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx,
-			   int *z90cMsg_l_p, struct type6_msg *z90cMsg_p)
-{
-	int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l, short_len;
-	int long_len, pad_len, keyPartsLen, tmp_l;
-	unsigned char *tgt_p, *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRB *cprb_p;
-	struct cca_token_hdr *keyHdr_p;
-	struct cca_pvt_ext_CRT_sec *pvtSec_p;
-	struct cca_public_sec *pubSec_p;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = 8 + short_len;
-	keyPartsLen = 3 * long_len + 2 * short_len;
-	pad_len = (8 - (keyPartsLen % 8)) % 8;
-	keyPartsLen += pad_len + mod_len;
-	tmp_size = FIXED_TYPE6_CR_LEN + keyPartsLen + mod_len;
-	total_CPRB_len = tmp_size -  sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRB);
-	vud_len = 2 + mod_len;
-	tmp_size = 4*((tmp_size + 3)/4) + CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-	tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(tgt_p, &static_type6_hdr, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)tgt_p;
-	tp6Hdr_p->ToCardLen1 = 4*((total_CPRB_len+3)/4);
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRB_SIZE;
-	tgt_p += sizeof(struct type6_hdr);
-	cprb_p = (struct CPRB *) tgt_p;
-	memcpy(tgt_p, &static_cprb, sizeof(struct CPRB));
-	cprb_p->usage_domain[0]= *((unsigned char *)(&(cdx))+3);
-	itoLe2(&parmBlock_l, cprb_p->req_parml);
-	memcpy(cprb_p->rpl_parml, cprb_p->req_parml,
-	       sizeof(cprb_p->req_parml));
-	tgt_p += sizeof(struct CPRB);
-	memcpy(tgt_p, &static_pkd_function_and_rules,
-	       sizeof(struct function_and_rules_block));
-	tgt_p += sizeof(struct function_and_rules_block);
-	itoLe2(&vud_len, tgt_p);
-	tgt_p += 2;
-	if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, mod_len))
-		return SEN_USER_ERROR;
-	tgt_p += mod_len;
-	tmp_l = sizeof(struct T6_keyBlock_hdr) + sizeof(struct cca_token_hdr) +
-		sizeof(struct cca_pvt_ext_CRT_sec) + 0x0F + keyPartsLen;
-	itoLe2(&tmp_l, tgt_p);
-	temp = tgt_p + 2;
-	tmp_l -= 2;
-	itoLe2(&tmp_l, temp);
-	tgt_p += sizeof(struct T6_keyBlock_hdr);
-	keyHdr_p = (struct cca_token_hdr *)tgt_p;
-	keyHdr_p->token_identifier = CCA_TKN_HDR_ID_EXT;
-	tmp_l -= 4;
-	keyHdr_p->token_length = tmp_l;
-	tgt_p += sizeof(struct cca_token_hdr);
-	pvtSec_p = (struct cca_pvt_ext_CRT_sec *)tgt_p;
-	pvtSec_p->section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT;
-	pvtSec_p->section_length =
-		sizeof(struct cca_pvt_ext_CRT_sec) + keyPartsLen;
-	pvtSec_p->key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL;
-	pvtSec_p->key_use_flags[0] = CCA_PVT_USAGE_ALL;
-	pvtSec_p->p_len = long_len;
-	pvtSec_p->q_len = short_len;
-	pvtSec_p->dp_len = long_len;
-	pvtSec_p->dq_len = short_len;
-	pvtSec_p->u_len = long_len;
-	pvtSec_p->mod_len = mod_len;
-	pvtSec_p->pad_len = pad_len;
-	tgt_p += sizeof(struct cca_pvt_ext_CRT_sec);
-	if (copy_from_user(tgt_p, icaMsg_p->np_prime, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bp_key, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->u_mult_inv, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	tgt_p += pad_len;
-	memset(tgt_p, 0xFF, mod_len);
-	tgt_p += mod_len;
-	memcpy(tgt_p, &static_cca_pub_sec, sizeof(struct cca_public_sec));
-	pubSec_p = (struct cca_public_sec *) tgt_p;
-	pubSec_p->modulus_bit_len = 8 * mod_len;
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type6MEX_msgX(struct ica_rsa_modexpo *icaMsg_p, int cdx,
-			    int *z90cMsg_l_p, struct type6_msg *z90cMsg_p,
-			    int dev_type)
-{
-	int mod_len, exp_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l;
-	int key_len, i;
-	unsigned char *temp_exp, *tgt_p, *temp, *exp_p;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRBX *cprbx_p;
-	struct cca_public_key *key_p;
-	struct T6_keyBlock_hdrX *keyb_p;
-
-	temp_exp = kmalloc(256, GFP_KERNEL);
-	if (!temp_exp)
-		return EGETBUFF;
-	mod_len = icaMsg_p->inputdatalength;
-	if (copy_from_user(temp_exp, icaMsg_p->b_key, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(temp_exp, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	exp_p = temp_exp;
-	for (i = 0; i < mod_len; i++)
-		if (exp_p[i])
-			break;
-	if (i >= mod_len) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	exp_len = mod_len - i;
-	exp_p += i;
-	PDEBUG("exp_len after computation: %08x\n", exp_len);
-	tmp_size = FIXED_TYPE6_ME_EN_LENX + 2 * mod_len + exp_len;
-	total_CPRB_len = tmp_size - sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRBX);
-	tmp_size = tmp_size + CALLER_HEADER;
-	vud_len = 2 + mod_len;
-	memset(z90cMsg_p, 0, tmp_size);
-	tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(tgt_p, &static_type6_hdrX, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)tgt_p;
-	tp6Hdr_p->ToCardLen1 = total_CPRB_len;
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRBX_SIZE;
-	memcpy(tp6Hdr_p->function_code, static_PKE_function_code,
-	       sizeof(static_PKE_function_code));
-	tgt_p += sizeof(struct type6_hdr);
-	memcpy(tgt_p, &static_cprbx, sizeof(struct CPRBX));
-	cprbx_p = (struct CPRBX *) tgt_p;
-	cprbx_p->domain = (unsigned short)cdx;
-	cprbx_p->rpl_msgbl = RESPONSE_CPRBX_SIZE;
-	tgt_p += sizeof(struct CPRBX);
-	if (dev_type == PCIXCC_MCL2)
-		memcpy(tgt_p, &static_pke_function_and_rulesX_MCL2,
-		       sizeof(struct function_and_rules_block));
-	else
-		memcpy(tgt_p, &static_pke_function_and_rulesX,
-		       sizeof(struct function_and_rules_block));
-	tgt_p += sizeof(struct function_and_rules_block);
-
-	tgt_p += 2;
-	if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len)) {
-		kfree(temp_exp);
-		return SEN_RELEASED;
-	}
-	if (is_empty(tgt_p, mod_len)) {
-		kfree(temp_exp);
-		return SEN_USER_ERROR;
-	}
-	tgt_p -= 2;
-	*((short *)tgt_p) = (short) vud_len;
-	tgt_p += vud_len;
-	keyb_p = (struct T6_keyBlock_hdrX *)tgt_p;
-	tgt_p += sizeof(struct T6_keyBlock_hdrX);
-	memcpy(tgt_p, &static_public_key, sizeof(static_public_key));
-	key_p = (struct cca_public_key *)tgt_p;
-	temp = key_p->pubSec.exponent;
-	memcpy(temp, exp_p, exp_len);
-	kfree(temp_exp);
-	temp += exp_len;
-	if (copy_from_user(temp, icaMsg_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(temp, mod_len))
-		return SEN_USER_ERROR;
-	key_p->pubSec.modulus_bit_len = 8 * mod_len;
-	key_p->pubSec.modulus_byte_len = mod_len;
-	key_p->pubSec.exponent_len = exp_len;
-	key_p->pubSec.section_length = CALLER_HEADER + mod_len + exp_len;
-	key_len = key_p->pubSec.section_length + sizeof(struct cca_token_hdr);
-	key_p->pubHdr.token_length = key_len;
-	key_len += 4;
-	keyb_p->ulen = (unsigned short)key_len;
-	key_len += 2;
-	keyb_p->blen = (unsigned short)key_len;
-	cprbx_p->req_parml = parmBlock_l;
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type6CRT_msgX(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx,
-			    int *z90cMsg_l_p, struct type6_msg *z90cMsg_p,
-			    int dev_type)
-{
-	int mod_len, vud_len, tmp_size, total_CPRB_len, parmBlock_l, short_len;
-	int long_len, pad_len, keyPartsLen, tmp_l;
-	unsigned char *tgt_p, *temp;
-	struct type6_hdr *tp6Hdr_p;
-	struct CPRBX *cprbx_p;
-	struct cca_token_hdr *keyHdr_p;
-	struct cca_pvt_ext_CRT_sec *pvtSec_p;
-	struct cca_public_sec *pubSec_p;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = 8 + short_len;
-	keyPartsLen = 3 * long_len + 2 * short_len;
-	pad_len = (8 - (keyPartsLen % 8)) % 8;
-	keyPartsLen += pad_len + mod_len;
-	tmp_size = FIXED_TYPE6_CR_LENX + keyPartsLen + mod_len;
-	total_CPRB_len = tmp_size -  sizeof(struct type6_hdr);
-	parmBlock_l = total_CPRB_len - sizeof(struct CPRBX);
-	vud_len = 2 + mod_len;
-	tmp_size = tmp_size + CALLER_HEADER;
-	memset(z90cMsg_p, 0, tmp_size);
-	tgt_p = (unsigned char *)z90cMsg_p + CALLER_HEADER;
-	memcpy(tgt_p, &static_type6_hdrX, sizeof(struct type6_hdr));
-	tp6Hdr_p = (struct type6_hdr *)tgt_p;
-	tp6Hdr_p->ToCardLen1 = total_CPRB_len;
-	tp6Hdr_p->FromCardLen1 = RESPONSE_CPRBX_SIZE;
-	tgt_p += sizeof(struct type6_hdr);
-	cprbx_p = (struct CPRBX *) tgt_p;
-	memcpy(tgt_p, &static_cprbx, sizeof(struct CPRBX));
-	cprbx_p->domain = (unsigned short)cdx;
-	cprbx_p->req_parml = parmBlock_l;
-	cprbx_p->rpl_msgbl = parmBlock_l;
-	tgt_p += sizeof(struct CPRBX);
-	if (dev_type == PCIXCC_MCL2)
-		memcpy(tgt_p, &static_pkd_function_and_rulesX_MCL2,
-		       sizeof(struct function_and_rules_block));
-	else
-		memcpy(tgt_p, &static_pkd_function_and_rulesX,
-		       sizeof(struct function_and_rules_block));
-	tgt_p += sizeof(struct function_and_rules_block);
-	*((short *)tgt_p) = (short) vud_len;
-	tgt_p += 2;
-	if (copy_from_user(tgt_p, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, mod_len))
-		return SEN_USER_ERROR;
-	tgt_p += mod_len;
-	tmp_l = sizeof(struct T6_keyBlock_hdr) + sizeof(struct cca_token_hdr) +
-		sizeof(struct cca_pvt_ext_CRT_sec) + 0x0F + keyPartsLen;
-	*((short *)tgt_p) = (short) tmp_l;
-	temp = tgt_p + 2;
-	tmp_l -= 2;
-	*((short *)temp) = (short) tmp_l;
-	tgt_p += sizeof(struct T6_keyBlock_hdr);
-	keyHdr_p = (struct cca_token_hdr *)tgt_p;
-	keyHdr_p->token_identifier = CCA_TKN_HDR_ID_EXT;
-	tmp_l -= 4;
-	keyHdr_p->token_length = tmp_l;
-	tgt_p += sizeof(struct cca_token_hdr);
-	pvtSec_p = (struct cca_pvt_ext_CRT_sec *)tgt_p;
-	pvtSec_p->section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT;
-	pvtSec_p->section_length =
-		sizeof(struct cca_pvt_ext_CRT_sec) + keyPartsLen;
-	pvtSec_p->key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL;
-	pvtSec_p->key_use_flags[0] = CCA_PVT_USAGE_ALL;
-	pvtSec_p->p_len = long_len;
-	pvtSec_p->q_len = short_len;
-	pvtSec_p->dp_len = long_len;
-	pvtSec_p->dq_len = short_len;
-	pvtSec_p->u_len = long_len;
-	pvtSec_p->mod_len = mod_len;
-	pvtSec_p->pad_len = pad_len;
-	tgt_p += sizeof(struct cca_pvt_ext_CRT_sec);
-	if (copy_from_user(tgt_p, icaMsg_p->np_prime, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bp_key, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	if (copy_from_user(tgt_p, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, short_len))
-		return SEN_USER_ERROR;
-	tgt_p += short_len;
-	if (copy_from_user(tgt_p, icaMsg_p->u_mult_inv, long_len))
-		return SEN_RELEASED;
-	if (is_empty(tgt_p, long_len))
-		return SEN_USER_ERROR;
-	tgt_p += long_len;
-	tgt_p += pad_len;
-	memset(tgt_p, 0xFF, mod_len);
-	tgt_p += mod_len;
-	memcpy(tgt_p, &static_cca_pub_sec, sizeof(struct cca_public_sec));
-	pubSec_p = (struct cca_public_sec *) tgt_p;
-	pubSec_p->modulus_bit_len = 8 * mod_len;
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICAMEX_msg_to_type50MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p,
-			    union type50_msg *z90cMsg_p)
-{
-	int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len;
-	unsigned char *mod_tgt, *exp_tgt, *inp_tgt;
-	union type50_msg *tmp_type50_msg;
-
-	mod_len = icaMex_p->inputdatalength;
-
-	msg_size = ((mod_len <= 128) ? TYPE50_MEB1_LEN : TYPE50_MEB2_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, msg_size);
-
-	tmp_type50_msg = (union type50_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type50_msg->meb1.header.msg_type_code = TYPE50_TYPE_CODE;
-
-	if (mod_len <= 128) {
-		tmp_type50_msg->meb1.header.msg_len = TYPE50_MEB1_LEN;
-		tmp_type50_msg->meb1.keyblock_type = TYPE50_MEB1_FMT;
-		mod_tgt = tmp_type50_msg->meb1.modulus;
-		mod_tgt_len = sizeof(tmp_type50_msg->meb1.modulus);
-		exp_tgt = tmp_type50_msg->meb1.exponent;
-		exp_tgt_len = sizeof(tmp_type50_msg->meb1.exponent);
-		inp_tgt = tmp_type50_msg->meb1.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->meb1.message);
-	} else {
-		tmp_type50_msg->meb2.header.msg_len = TYPE50_MEB2_LEN;
-		tmp_type50_msg->meb2.keyblock_type = TYPE50_MEB2_FMT;
-		mod_tgt = tmp_type50_msg->meb2.modulus;
-		mod_tgt_len = sizeof(tmp_type50_msg->meb2.modulus);
-		exp_tgt = tmp_type50_msg->meb2.exponent;
-		exp_tgt_len = sizeof(tmp_type50_msg->meb2.exponent);
-		inp_tgt = tmp_type50_msg->meb2.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->meb2.message);
-	}
-
-	mod_tgt += (mod_tgt_len - mod_len);
-	if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(mod_tgt, mod_len))
-		return SEN_USER_ERROR;
-	exp_tgt += (exp_tgt_len - mod_len);
-	if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(exp_tgt, mod_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = msg_size - CALLER_HEADER;
-
-	return 0;
-}
-
-static int
-ICACRT_msg_to_type50CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p,
-			    int *z90cMsg_l_p, union type50_msg *z90cMsg_p)
-{
-	int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len,
-	    dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len, long_offset;
-	unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt,
-		      temp[8];
-	union type50_msg *tmp_type50_msg;
-
-	mod_len = icaMsg_p->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = mod_len / 2 + 8;
-	long_offset = 0;
-
-	if (long_len > 128) {
-		memset(temp, 0x00, sizeof(temp));
-		if (copy_from_user(temp, icaMsg_p->np_prime, long_len-128))
-			return SEN_RELEASED;
-		if (!is_empty(temp, 8))
-			return SEN_NOT_AVAIL;
-		if (copy_from_user(temp, icaMsg_p->bp_key, long_len-128))
-			return SEN_RELEASED;
-		if (!is_empty(temp, 8))
-			return SEN_NOT_AVAIL;
-		if (copy_from_user(temp, icaMsg_p->u_mult_inv, long_len-128))
-			return SEN_RELEASED;
-		if (!is_empty(temp, 8))
-			return SEN_NOT_AVAIL;
-		long_offset = long_len - 128;
-		long_len = 128;
-	}
-
-	tmp_size = ((long_len <= 64) ? TYPE50_CRB1_LEN : TYPE50_CRB2_LEN) +
-		    CALLER_HEADER;
-
-	memset(z90cMsg_p, 0, tmp_size);
-
-	tmp_type50_msg = (union type50_msg *)
-		((unsigned char *) z90cMsg_p + CALLER_HEADER);
-
-	tmp_type50_msg->crb1.header.msg_type_code = TYPE50_TYPE_CODE;
-	if (long_len <= 64) {
-		tmp_type50_msg->crb1.header.msg_len = TYPE50_CRB1_LEN;
-		tmp_type50_msg->crb1.keyblock_type = TYPE50_CRB1_FMT;
-		p_tgt = tmp_type50_msg->crb1.p;
-		p_tgt_len = sizeof(tmp_type50_msg->crb1.p);
-		q_tgt = tmp_type50_msg->crb1.q;
-		q_tgt_len = sizeof(tmp_type50_msg->crb1.q);
-		dp_tgt = tmp_type50_msg->crb1.dp;
-		dp_tgt_len = sizeof(tmp_type50_msg->crb1.dp);
-		dq_tgt = tmp_type50_msg->crb1.dq;
-		dq_tgt_len = sizeof(tmp_type50_msg->crb1.dq);
-		u_tgt = tmp_type50_msg->crb1.u;
-		u_tgt_len = sizeof(tmp_type50_msg->crb1.u);
-		inp_tgt = tmp_type50_msg->crb1.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->crb1.message);
-	} else {
-		tmp_type50_msg->crb2.header.msg_len = TYPE50_CRB2_LEN;
-		tmp_type50_msg->crb2.keyblock_type = TYPE50_CRB2_FMT;
-		p_tgt = tmp_type50_msg->crb2.p;
-		p_tgt_len = sizeof(tmp_type50_msg->crb2.p);
-		q_tgt = tmp_type50_msg->crb2.q;
-		q_tgt_len = sizeof(tmp_type50_msg->crb2.q);
-		dp_tgt = tmp_type50_msg->crb2.dp;
-		dp_tgt_len = sizeof(tmp_type50_msg->crb2.dp);
-		dq_tgt = tmp_type50_msg->crb2.dq;
-		dq_tgt_len = sizeof(tmp_type50_msg->crb2.dq);
-		u_tgt = tmp_type50_msg->crb2.u;
-		u_tgt_len = sizeof(tmp_type50_msg->crb2.u);
-		inp_tgt = tmp_type50_msg->crb2.message;
-		inp_tgt_len = sizeof(tmp_type50_msg->crb2.message);
-	}
-
-	p_tgt += (p_tgt_len - long_len);
-	if (copy_from_user(p_tgt, icaMsg_p->np_prime + long_offset, long_len))
-		return SEN_RELEASED;
-	if (is_empty(p_tgt, long_len))
-		return SEN_USER_ERROR;
-	q_tgt += (q_tgt_len - short_len);
-	if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len))
-		return SEN_RELEASED;
-	if (is_empty(q_tgt, short_len))
-		return SEN_USER_ERROR;
-	dp_tgt += (dp_tgt_len - long_len);
-	if (copy_from_user(dp_tgt, icaMsg_p->bp_key + long_offset, long_len))
-		return SEN_RELEASED;
-	if (is_empty(dp_tgt, long_len))
-		return SEN_USER_ERROR;
-	dq_tgt += (dq_tgt_len - short_len);
-	if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len))
-		return SEN_RELEASED;
-	if (is_empty(dq_tgt, short_len))
-		return SEN_USER_ERROR;
-	u_tgt += (u_tgt_len - long_len);
-	if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv + long_offset, long_len))
-		return SEN_RELEASED;
-	if (is_empty(u_tgt, long_len))
-		return SEN_USER_ERROR;
-	inp_tgt += (inp_tgt_len - mod_len);
-	if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len))
-		return SEN_RELEASED;
-	if (is_empty(inp_tgt, mod_len))
-		return SEN_USER_ERROR;
-
-	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
-
-	return 0;
-}
-
-int
-convert_request(unsigned char *buffer, int func, unsigned short function,
-		int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p)
-{
-	if (dev_type == PCICA) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type4CRT_msg(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				msg_l_p, (union type4_msg *) msg_p);
-		else
-			return ICAMEX_msg_to_type4MEX_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				msg_l_p, (union type4_msg *) msg_p);
-	}
-	if (dev_type == PCICC) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type6CRT_msg(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				cdx, msg_l_p, (struct type6_msg *)msg_p);
-		if (function == PCI_FUNC_KEY_ENCRYPT)
-			return ICAMEX_msg_to_type6MEX_en_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p);
-		else
-			return ICAMEX_msg_to_type6MEX_de_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p);
-	}
-	if ((dev_type == PCIXCC_MCL2) ||
-	    (dev_type == PCIXCC_MCL3) ||
-	    (dev_type == CEX2C)) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type6CRT_msgX(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p,
-				dev_type);
-		else
-			return ICAMEX_msg_to_type6MEX_msgX(
-				(struct ica_rsa_modexpo *) buffer,
-				cdx, msg_l_p, (struct type6_msg *) msg_p,
-				dev_type);
-	}
-	if (dev_type == CEX2A) {
-		if (func == ICARSACRT)
-			return ICACRT_msg_to_type50CRT_msg(
-				(struct ica_rsa_modexpo_crt *) buffer,
-				msg_l_p, (union type50_msg *) msg_p);
-		else
-			return ICAMEX_msg_to_type50MEX_msg(
-				(struct ica_rsa_modexpo *) buffer,
-				msg_l_p, (union type50_msg *) msg_p);
-	}
-
-	return 0;
-}
-
-int ext_bitlens_msg_count = 0;
-static inline void
-unset_ext_bitlens(void)
-{
-	if (!ext_bitlens_msg_count) {
-		PRINTK("Unable to use coprocessors for extended bitlengths. "
-		       "Using PCICAs/CEX2As (if present) for extended "
-		       "bitlengths. This is not an error.\n");
-		ext_bitlens_msg_count++;
-	}
-	ext_bitlens = 0;
-}
-
-int
-convert_response(unsigned char *response, unsigned char *buffer,
-		 int *respbufflen_p, unsigned char *resp_buff)
-{
-	struct ica_rsa_modexpo *icaMsg_p = (struct ica_rsa_modexpo *) buffer;
-	struct error_hdr *errh_p = (struct error_hdr *) response;
-	struct type80_hdr *t80h_p = (struct type80_hdr *) response;
-	struct type84_hdr *t84h_p = (struct type84_hdr *) response;
-	struct type86_fmt2_msg *t86m_p =  (struct type86_fmt2_msg *) response;
-	int reply_code, service_rc, service_rs, src_l;
-	unsigned char *src_p, *tgt_p;
-	struct CPRB *cprb_p;
-	struct CPRBX *cprbx_p;
-
-	src_p = 0;
-	reply_code = 0;
-	service_rc = 0;
-	service_rs = 0;
-	src_l = 0;
-	switch (errh_p->type) {
-	case TYPE82_RSP_CODE:
-	case TYPE88_RSP_CODE:
-		reply_code = errh_p->reply_code;
-		src_p = (unsigned char *)errh_p;
-		PRINTK("Hardware error: Type %02X Message Header: "
-		       "%02x%02x%02x%02x%02x%02x%02x%02x\n",
-		       errh_p->type,
-		       src_p[0], src_p[1], src_p[2], src_p[3],
-		       src_p[4], src_p[5], src_p[6], src_p[7]);
-		break;
-	case TYPE80_RSP_CODE:
-		src_l = icaMsg_p->outputdatalength;
-		src_p = response + (int)t80h_p->len - src_l;
-		break;
-	case TYPE84_RSP_CODE:
-		src_l = icaMsg_p->outputdatalength;
-		src_p = response + (int)t84h_p->len - src_l;
-		break;
-	case TYPE86_RSP_CODE:
-		reply_code = t86m_p->header.reply_code;
-		if (reply_code != 0)
-			break;
-		cprb_p = (struct CPRB *)
-			(response + sizeof(struct type86_fmt2_msg));
-		cprbx_p = (struct CPRBX *) cprb_p;
-		if (cprb_p->cprb_ver_id != 0x02) {
-			le2toI(cprb_p->ccp_rtcode, &service_rc);
-			if (service_rc != 0) {
-				le2toI(cprb_p->ccp_rscode, &service_rs);
-				if ((service_rc == 8) && (service_rs == 66))
-					PDEBUG("Bad block format on PCICC\n");
-				else if ((service_rc == 8) && (service_rs == 65))
-					PDEBUG("Probably an even modulus on "
-					       "PCICC\n");
-				else if ((service_rc == 8) && (service_rs == 770)) {
-					PDEBUG("Invalid key length on PCICC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else if ((service_rc == 8) && (service_rs == 783)) {
-					PDEBUG("Extended bitlengths not enabled"
-					       "on PCICC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else
-					PRINTK("service rc/rs (PCICC): %d/%d\n",
-					       service_rc, service_rs);
-				return REC_OPERAND_INV;
-			}
-			src_p = (unsigned char *)cprb_p + sizeof(struct CPRB);
-			src_p += 4;
-			le2toI(src_p, &src_l);
-			src_l -= 2;
-			src_p += 2;
-		} else {
-			service_rc = (int)cprbx_p->ccp_rtcode;
-			if (service_rc != 0) {
-				service_rs = (int) cprbx_p->ccp_rscode;
-				if ((service_rc == 8) && (service_rs == 66))
-					PDEBUG("Bad block format on PCIXCC\n");
-				else if ((service_rc == 8) && (service_rs == 65))
-					PDEBUG("Probably an even modulus on "
-					       "PCIXCC\n");
-				else if ((service_rc == 8) && (service_rs == 770)) {
-					PDEBUG("Invalid key length on PCIXCC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else if ((service_rc == 8) && (service_rs == 783)) {
-					PDEBUG("Extended bitlengths not enabled"
-					       "on PCIXCC\n");
-					unset_ext_bitlens();
-					return REC_USE_PCICA;
-				}
-				else
-					PRINTK("service rc/rs (PCIXCC): %d/%d\n",
-					       service_rc, service_rs);
-				return REC_OPERAND_INV;
-			}
-			src_p = (unsigned char *)
-				cprbx_p + sizeof(struct CPRBX);
-			src_p += 4;
-			src_l = (int)(*((short *) src_p));
-			src_l -= 2;
-			src_p += 2;
-		}
-		break;
-	default:
-		src_p = (unsigned char *)errh_p;
-		PRINTK("Unrecognized Message Header: "
-		       "%02x%02x%02x%02x%02x%02x%02x%02x\n",
-		       src_p[0], src_p[1], src_p[2], src_p[3],
-		       src_p[4], src_p[5], src_p[6], src_p[7]);
-		return REC_BAD_MESSAGE;
-	}
-
-	if (reply_code)
-		switch (reply_code) {
-		case REP82_ERROR_MACHINE_FAILURE:
-			if (errh_p->type == TYPE82_RSP_CODE)
-				PRINTKW("Machine check failure\n");
-			else
-				PRINTKW("Module failure\n");
-			return REC_HARDWAR_ERR;
-		case REP82_ERROR_OPERAND_INVALID:
-			return REC_OPERAND_INV;
-		case REP88_ERROR_MESSAGE_MALFORMD:
-			PRINTKW("Message malformed\n");
-			return REC_OPERAND_INV;
-		case REP82_ERROR_OPERAND_SIZE:
-			return REC_OPERAND_SIZE;
-		case REP82_ERROR_EVEN_MOD_IN_OPND:
-			return REC_EVEN_MOD;
-		case REP82_ERROR_MESSAGE_TYPE:
-			return WRONG_DEVICE_TYPE;
-		case REP82_ERROR_TRANSPORT_FAIL:
-			PRINTKW("Transport failed (APFS = %02X%02X%02X%02X)\n",
-				t86m_p->apfs[0], t86m_p->apfs[1],
-				t86m_p->apfs[2], t86m_p->apfs[3]);
-			return REC_HARDWAR_ERR;
-		default:
-			PRINTKW("reply code = %d\n", reply_code);
-			return REC_HARDWAR_ERR;
-		}
-
-	if (service_rc != 0)
-		return REC_OPERAND_INV;
-
-	if ((src_l > icaMsg_p->outputdatalength) ||
-	    (src_l > RESPBUFFSIZE) ||
-	    (src_l <= 0))
-		return REC_OPERAND_SIZE;
-
-	PDEBUG("Length returned = %d\n", src_l);
-	tgt_p = resp_buff + icaMsg_p->outputdatalength - src_l;
-	memcpy(tgt_p, src_p, src_l);
-	if ((errh_p->type == TYPE86_RSP_CODE) && (resp_buff < tgt_p)) {
-		memset(resp_buff, 0, icaMsg_p->outputdatalength - src_l);
-		if (pad_msg(resp_buff, icaMsg_p->outputdatalength, src_l))
-			return REC_INVALID_PAD;
-	}
-	*respbufflen_p = icaMsg_p->outputdatalength;
-	if (*respbufflen_p == 0)
-		PRINTK("Zero *respbufflen_p\n");
-
-	return 0;
-}
-
diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c
deleted file mode 100644
index b2f20ab..0000000
--- a/drivers/s390/crypto/z90main.c
+++ /dev/null
@@ -1,3379 +0,0 @@
-/*
- *  linux/drivers/s390/crypto/z90main.c
- *
- *  z90crypt 1.3.3
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs (burrough@us.ibm.com)
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <asm/uaccess.h>       // copy_(from|to)_user
-#include <linux/compat.h>
-#include <linux/compiler.h>
-#include <linux/delay.h>       // mdelay
-#include <linux/init.h>
-#include <linux/interrupt.h>   // for tasklets
-#include <linux/miscdevice.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/proc_fs.h>
-#include <linux/syscalls.h>
-#include "z90crypt.h"
-#include "z90common.h"
-
-/**
- * Defaults that may be modified.
- */
-
-/**
- * You can specify a different minor at compile time.
- */
-#ifndef Z90CRYPT_MINOR
-#define Z90CRYPT_MINOR	MISC_DYNAMIC_MINOR
-#endif
-
-/**
- * You can specify a different domain at compile time or on the insmod
- * command line.
- */
-#ifndef DOMAIN_INDEX
-#define DOMAIN_INDEX	-1
-#endif
-
-/**
- * This is the name under which the device is registered in /proc/modules.
- */
-#define REG_NAME	"z90crypt"
-
-/**
- * Cleanup should run every CLEANUPTIME seconds and should clean up requests
- * older than CLEANUPTIME seconds in the past.
- */
-#ifndef CLEANUPTIME
-#define CLEANUPTIME 15
-#endif
-
-/**
- * Config should run every CONFIGTIME seconds
- */
-#ifndef CONFIGTIME
-#define CONFIGTIME 30
-#endif
-
-/**
- * The first execution of the config task should take place
- * immediately after initialization
- */
-#ifndef INITIAL_CONFIGTIME
-#define INITIAL_CONFIGTIME 1
-#endif
-
-/**
- * Reader should run every READERTIME milliseconds
- * With the 100Hz patch for s390, z90crypt can lock the system solid while
- * under heavy load. We'll try to avoid that.
- */
-#ifndef READERTIME
-#if HZ > 1000
-#define READERTIME 2
-#else
-#define READERTIME 10
-#endif
-#endif
-
-/**
- * turn long device array index into device pointer
- */
-#define LONG2DEVPTR(ndx) (z90crypt.device_p[(ndx)])
-
-/**
- * turn short device array index into long device array index
- */
-#define SHRT2LONG(ndx) (z90crypt.overall_device_x.device_index[(ndx)])
-
-/**
- * turn short device array index into device pointer
- */
-#define SHRT2DEVPTR(ndx) LONG2DEVPTR(SHRT2LONG(ndx))
-
-/**
- * Status for a work-element
- */
-#define STAT_DEFAULT	0x00 // request has not been processed
-
-#define STAT_ROUTED	0x80 // bit 7: requests get routed to specific device
-			     //	       else, device is determined each write
-#define STAT_FAILED	0x40 // bit 6: this bit is set if the request failed
-			     //	       before being sent to the hardware.
-#define STAT_WRITTEN	0x30 // bits 5-4: work to be done, not sent to device
-//			0x20 // UNUSED state
-#define STAT_READPEND	0x10 // bits 5-4: work done, we're returning data now
-#define STAT_NOWORK	0x00 // bits off: no work on any queue
-#define STAT_RDWRMASK	0x30 // mask for bits 5-4
-
-/**
- * Macros to check the status RDWRMASK
- */
-#define CHK_RDWRMASK(statbyte) ((statbyte) & STAT_RDWRMASK)
-#define SET_RDWRMASK(statbyte, newval) \
-	{(statbyte) &= ~STAT_RDWRMASK; (statbyte) |= newval;}
-
-/**
- * Audit Trail.	 Progress of a Work element
- * audit[0]: Unless noted otherwise, these bits are all set by the process
- */
-#define FP_COPYFROM	0x80 // Caller's buffer has been copied to work element
-#define FP_BUFFREQ	0x40 // Low Level buffer requested
-#define FP_BUFFGOT	0x20 // Low Level buffer obtained
-#define FP_SENT		0x10 // Work element sent to a crypto device
-			     // (may be set by process or by reader task)
-#define FP_PENDING	0x08 // Work element placed on pending queue
-			     // (may be set by process or by reader task)
-#define FP_REQUEST	0x04 // Work element placed on request queue
-#define FP_ASLEEP	0x02 // Work element about to sleep
-#define FP_AWAKE	0x01 // Work element has been awakened
-
-/**
- * audit[1]: These bits are set by the reader task and/or the cleanup task
- */
-#define FP_NOTPENDING	  0x80 // Work element removed from pending queue
-#define FP_AWAKENING	  0x40 // Caller about to be awakened
-#define FP_TIMEDOUT	  0x20 // Caller timed out
-#define FP_RESPSIZESET	  0x10 // Response size copied to work element
-#define FP_RESPADDRCOPIED 0x08 // Response address copied to work element
-#define FP_RESPBUFFCOPIED 0x04 // Response buffer copied to work element
-#define FP_REMREQUEST	  0x02 // Work element removed from request queue
-#define FP_SIGNALED	  0x01 // Work element was awakened by a signal
-
-/**
- * audit[2]: unused
- */
-
-/**
- * state of the file handle in private_data.status
- */
-#define STAT_OPEN 0
-#define STAT_CLOSED 1
-
-/**
- * PID() expands to the process ID of the current process
- */
-#define PID() (current->pid)
-
-/**
- * Selected Constants.	The number of APs and the number of devices
- */
-#ifndef Z90CRYPT_NUM_APS
-#define Z90CRYPT_NUM_APS 64
-#endif
-#ifndef Z90CRYPT_NUM_DEVS
-#define Z90CRYPT_NUM_DEVS Z90CRYPT_NUM_APS
-#endif
-
-/**
- * Buffer size for receiving responses. The maximum Response Size
- * is actually the maximum request size, since in an error condition
- * the request itself may be returned unchanged.
- */
-#define MAX_RESPONSE_SIZE 0x0000077C
-
-/**
- * A count and status-byte mask
- */
-struct status {
-	int	      st_count;		    // # of enabled devices
-	int	      disabled_count;	    // # of disabled devices
-	int	      user_disabled_count;  // # of devices disabled via proc fs
-	unsigned char st_mask[Z90CRYPT_NUM_APS]; // current status mask
-};
-
-/**
- * The array of device indexes is a mechanism for fast indexing into
- * a long (and sparse) array.  For instance, if APs 3, 9 and 47 are
- * installed, z90CDeviceIndex[0] is 3, z90CDeviceIndex[1] is 9, and
- * z90CDeviceIndex[2] is 47.
- */
-struct device_x {
-	int device_index[Z90CRYPT_NUM_DEVS];
-};
-
-/**
- * All devices are arranged in a single array: 64 APs
- */
-struct device {
-	int		 dev_type;	    // PCICA, PCICC, PCIXCC_MCL2,
-					    // PCIXCC_MCL3, CEX2C, CEX2A
-	enum devstat	 dev_stat;	    // current device status
-	int		 dev_self_x;	    // Index in array
-	int		 disabled;	    // Set when device is in error
-	int		 user_disabled;	    // Set when device is disabled by user
-	int		 dev_q_depth;	    // q depth
-	unsigned char *	 dev_resp_p;	    // Response buffer address
-	int		 dev_resp_l;	    // Response Buffer length
-	int		 dev_caller_count;  // Number of callers
-	int		 dev_total_req_cnt; // # requests for device since load
-	struct list_head dev_caller_list;   // List of callers
-};
-
-/**
- * There's a struct status and a struct device_x for each device type.
- */
-struct hdware_block {
-	struct status	hdware_mask;
-	struct status	type_mask[Z90CRYPT_NUM_TYPES];
-	struct device_x type_x_addr[Z90CRYPT_NUM_TYPES];
-	unsigned char	device_type_array[Z90CRYPT_NUM_APS];
-};
-
-/**
- * z90crypt is the topmost data structure in the hierarchy.
- */
-struct z90crypt {
-	int		     max_count;		// Nr of possible crypto devices
-	struct status	     mask;
-	int		     q_depth_array[Z90CRYPT_NUM_DEVS];
-	int		     dev_type_array[Z90CRYPT_NUM_DEVS];
-	struct device_x	     overall_device_x;	// array device indexes
-	struct device *	     device_p[Z90CRYPT_NUM_DEVS];
-	int		     terminating;
-	int		     domain_established;// TRUE:  domain has been found
-	int		     cdx;		// Crypto Domain Index
-	int		     len;		// Length of this data structure
-	struct hdware_block *hdware_info;
-};
-
-/**
- * An array of these structures is pointed to from dev_caller
- * The length of the array depends on the device type. For APs,
- * there are 8.
- *
- * The caller buffer is allocated to the user at OPEN. At WRITE,
- * it contains the request; at READ, the response. The function
- * send_to_crypto_device converts the request to device-dependent
- * form and use the caller's OPEN-allocated buffer for the response.
- *
- * For the contents of caller_dev_dep_req and caller_dev_dep_req_p
- * because that points to it, see the discussion in z90hardware.c.
- * Search for "extended request message block".
- */
-struct caller {
-	int		 caller_buf_l;		 // length of original request
-	unsigned char *	 caller_buf_p;		 // Original request on WRITE
-	int		 caller_dev_dep_req_l;	 // len device dependent request
-	unsigned char *	 caller_dev_dep_req_p;	 // Device dependent form
-	unsigned char	 caller_id[8];		 // caller-supplied message id
-	struct list_head caller_liste;
-	unsigned char	 caller_dev_dep_req[MAX_RESPONSE_SIZE];
-};
-
-/**
- * Function prototypes from z90hardware.c
- */
-enum hdstat query_online(int deviceNr, int cdx, int resetNr, int *q_depth,
-			 int *dev_type);
-enum devstat reset_device(int deviceNr, int cdx, int resetNr);
-enum devstat send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext);
-enum devstat receive_from_AP(int dev_nr, int cdx, int resplen,
-			     unsigned char *resp, unsigned char *psmid);
-int convert_request(unsigned char *buffer, int func, unsigned short function,
-		    int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p);
-int convert_response(unsigned char *response, unsigned char *buffer,
-		     int *respbufflen_p, unsigned char *resp_buff);
-
-/**
- * Low level function prototypes
- */
-static int create_z90crypt(int *cdx_p);
-static int refresh_z90crypt(int *cdx_p);
-static int find_crypto_devices(struct status *deviceMask);
-static int create_crypto_device(int index);
-static int destroy_crypto_device(int index);
-static void destroy_z90crypt(void);
-static int refresh_index_array(struct status *status_str,
-			       struct device_x *index_array);
-static int probe_device_type(struct device *devPtr);
-static int probe_PCIXCC_type(struct device *devPtr);
-
-/**
- * proc fs definitions
- */
-static struct proc_dir_entry *z90crypt_entry;
-
-/**
- * data structures
- */
-
-/**
- * work_element.opener points back to this structure
- */
-struct priv_data {
-	pid_t	opener_pid;
-	unsigned char	status;		// 0: open  1: closed
-};
-
-/**
- * A work element is allocated for each request
- */
-struct work_element {
-	struct priv_data *priv_data;
-	pid_t		  pid;
-	int		  devindex;	  // index of device processing this w_e
-					  // (If request did not specify device,
-					  // -1 until placed onto a queue)
-	int		  devtype;
-	struct list_head  liste;	  // used for requestq and pendingq
-	char		  buffer[128];	  // local copy of user request
-	int		  buff_size;	  // size of the buffer for the request
-	char		  resp_buff[RESPBUFFSIZE];
-	int		  resp_buff_size;
-	char __user *	  resp_addr;	  // address of response in user space
-	unsigned int	  funccode;	  // function code of request
-	wait_queue_head_t waitq;
-	unsigned long	  requestsent;	  // time at which the request was sent
-	atomic_t	  alarmrung;	  // wake-up signal
-	unsigned char	  caller_id[8];	  // pid + counter, for this w_e
-	unsigned char	  status[1];	  // bits to mark status of the request
-	unsigned char	  audit[3];	  // record of work element's progress
-	unsigned char *	  requestptr;	  // address of request buffer
-	int		  retcode;	  // return code of request
-};
-
-/**
- * High level function prototypes
- */
-static int z90crypt_open(struct inode *, struct file *);
-static int z90crypt_release(struct inode *, struct file *);
-static ssize_t z90crypt_read(struct file *, char __user *, size_t, loff_t *);
-static ssize_t z90crypt_write(struct file *, const char __user *,
-							size_t, loff_t *);
-static long z90crypt_unlocked_ioctl(struct file *, unsigned int, unsigned long);
-static long z90crypt_compat_ioctl(struct file *, unsigned int, unsigned long);
-
-static void z90crypt_reader_task(unsigned long);
-static void z90crypt_schedule_reader_task(unsigned long);
-static void z90crypt_config_task(unsigned long);
-static void z90crypt_cleanup_task(unsigned long);
-
-static int z90crypt_status(char *, char **, off_t, int, int *, void *);
-static int z90crypt_status_write(struct file *, const char __user *,
-				 unsigned long, void *);
-
-/**
- * Storage allocated at initialization and used throughout the life of
- * this insmod
- */
-static int domain = DOMAIN_INDEX;
-static struct z90crypt z90crypt;
-static int quiesce_z90crypt;
-static spinlock_t queuespinlock;
-static struct list_head request_list;
-static int requestq_count;
-static struct list_head pending_list;
-static int pendingq_count;
-
-static struct tasklet_struct reader_tasklet;
-static struct timer_list reader_timer;
-static struct timer_list config_timer;
-static struct timer_list cleanup_timer;
-static atomic_t total_open;
-static atomic_t z90crypt_step;
-
-static struct file_operations z90crypt_fops = {
-	.owner		= THIS_MODULE,
-	.read		= z90crypt_read,
-	.write		= z90crypt_write,
-	.unlocked_ioctl	= z90crypt_unlocked_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= z90crypt_compat_ioctl,
-#endif
-	.open		= z90crypt_open,
-	.release	= z90crypt_release
-};
-
-static struct miscdevice z90crypt_misc_device = {
-	.minor	    = Z90CRYPT_MINOR,
-	.name	    = DEV_NAME,
-	.fops	    = &z90crypt_fops,
-};
-
-/**
- * Documentation values.
- */
-MODULE_AUTHOR("zSeries Linux Crypto Team: Robert H. Burroughs, Eric D. Rossman"
-	      "and Jochen Roehrig");
-MODULE_DESCRIPTION("zSeries Linux Cryptographic Coprocessor device driver, "
-		   "Copyright 2001, 2005 IBM Corporation");
-MODULE_LICENSE("GPL");
-module_param(domain, int, 0);
-MODULE_PARM_DESC(domain, "domain index for device");
-
-#ifdef CONFIG_COMPAT
-/**
- * ioctl32 conversion routines
- */
-struct ica_rsa_modexpo_32 { // For 32-bit callers
-	compat_uptr_t	inputdata;
-	unsigned int	inputdatalength;
-	compat_uptr_t	outputdata;
-	unsigned int	outputdatalength;
-	compat_uptr_t	b_key;
-	compat_uptr_t	n_modulus;
-};
-
-static long
-trans_modexpo32(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct ica_rsa_modexpo_32 __user *mex32u = compat_ptr(arg);
-	struct ica_rsa_modexpo_32  mex32k;
-	struct ica_rsa_modexpo __user *mex64;
-	long ret = 0;
-	unsigned int i;
-
-	if (!access_ok(VERIFY_WRITE, mex32u, sizeof(struct ica_rsa_modexpo_32)))
-		return -EFAULT;
-	mex64 = compat_alloc_user_space(sizeof(struct ica_rsa_modexpo));
-	if (!access_ok(VERIFY_WRITE, mex64, sizeof(struct ica_rsa_modexpo)))
-		return -EFAULT;
-	if (copy_from_user(&mex32k, mex32u, sizeof(struct ica_rsa_modexpo_32)))
-		return -EFAULT;
-	if (__put_user(compat_ptr(mex32k.inputdata), &mex64->inputdata)   ||
-	    __put_user(mex32k.inputdatalength, &mex64->inputdatalength)   ||
-	    __put_user(compat_ptr(mex32k.outputdata), &mex64->outputdata) ||
-	    __put_user(mex32k.outputdatalength, &mex64->outputdatalength) ||
-	    __put_user(compat_ptr(mex32k.b_key), &mex64->b_key)           ||
-	    __put_user(compat_ptr(mex32k.n_modulus), &mex64->n_modulus))
-		return -EFAULT;
-	ret = z90crypt_unlocked_ioctl(filp, cmd, (unsigned long)mex64);
-	if (!ret)
-		if (__get_user(i, &mex64->outputdatalength) ||
-		    __put_user(i, &mex32u->outputdatalength))
-			ret = -EFAULT;
-	return ret;
-}
-
-struct ica_rsa_modexpo_crt_32 { // For 32-bit callers
-	compat_uptr_t	inputdata;
-	unsigned int	inputdatalength;
-	compat_uptr_t	outputdata;
-	unsigned int	outputdatalength;
-	compat_uptr_t	bp_key;
-	compat_uptr_t	bq_key;
-	compat_uptr_t	np_prime;
-	compat_uptr_t	nq_prime;
-	compat_uptr_t	u_mult_inv;
-};
-
-static long
-trans_modexpo_crt32(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct ica_rsa_modexpo_crt_32 __user *crt32u = compat_ptr(arg);
-	struct ica_rsa_modexpo_crt_32  crt32k;
-	struct ica_rsa_modexpo_crt __user *crt64;
-	long ret = 0;
-	unsigned int i;
-
-	if (!access_ok(VERIFY_WRITE, crt32u,
-		       sizeof(struct ica_rsa_modexpo_crt_32)))
-		return -EFAULT;
-	crt64 = compat_alloc_user_space(sizeof(struct ica_rsa_modexpo_crt));
-	if (!access_ok(VERIFY_WRITE, crt64, sizeof(struct ica_rsa_modexpo_crt)))
-		return -EFAULT;
-	if (copy_from_user(&crt32k, crt32u,
-			   sizeof(struct ica_rsa_modexpo_crt_32)))
-		return -EFAULT;
-	if (__put_user(compat_ptr(crt32k.inputdata), &crt64->inputdata)   ||
-	    __put_user(crt32k.inputdatalength, &crt64->inputdatalength)   ||
-	    __put_user(compat_ptr(crt32k.outputdata), &crt64->outputdata) ||
-	    __put_user(crt32k.outputdatalength, &crt64->outputdatalength) ||
-	    __put_user(compat_ptr(crt32k.bp_key), &crt64->bp_key)         ||
-	    __put_user(compat_ptr(crt32k.bq_key), &crt64->bq_key)         ||
-	    __put_user(compat_ptr(crt32k.np_prime), &crt64->np_prime)     ||
-	    __put_user(compat_ptr(crt32k.nq_prime), &crt64->nq_prime)     ||
-	    __put_user(compat_ptr(crt32k.u_mult_inv), &crt64->u_mult_inv))
-		return -EFAULT;
-	ret = z90crypt_unlocked_ioctl(filp, cmd, (unsigned long)crt64);
-	if (!ret)
-		if (__get_user(i, &crt64->outputdatalength) ||
-		    __put_user(i, &crt32u->outputdatalength))
-			ret = -EFAULT;
-	return ret;
-}
-
-static long
-z90crypt_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case ICAZ90STATUS:
-	case Z90QUIESCE:
-	case Z90STAT_TOTALCOUNT:
-	case Z90STAT_PCICACOUNT:
-	case Z90STAT_PCICCCOUNT:
-	case Z90STAT_PCIXCCCOUNT:
-	case Z90STAT_PCIXCCMCL2COUNT:
-	case Z90STAT_PCIXCCMCL3COUNT:
-	case Z90STAT_CEX2CCOUNT:
-	case Z90STAT_REQUESTQ_COUNT:
-	case Z90STAT_PENDINGQ_COUNT:
-	case Z90STAT_TOTALOPEN_COUNT:
-	case Z90STAT_DOMAIN_INDEX:
-	case Z90STAT_STATUS_MASK:
-	case Z90STAT_QDEPTH_MASK:
-	case Z90STAT_PERDEV_REQCNT:
-		return z90crypt_unlocked_ioctl(filp, cmd, arg);
-	case ICARSAMODEXPO:
-		return trans_modexpo32(filp, cmd, arg);
-	case ICARSACRT:
-		return trans_modexpo_crt32(filp, cmd, arg);
-	default:
-		return -ENOIOCTLCMD;
-  	}
-}
-#endif
-
-/**
- * The module initialization code.
- */
-static int __init
-z90crypt_init_module(void)
-{
-	int result, nresult;
-	struct proc_dir_entry *entry;
-
-	PDEBUG("PID %d\n", PID());
-
-	if ((domain < -1) || (domain > 15)) {
-		PRINTKW("Invalid param: domain = %d.  Not loading.\n", domain);
-		return -EINVAL;
-	}
-
-	/* Register as misc device with given minor (or get a dynamic one). */
-	result = misc_register(&z90crypt_misc_device);
-	if (result < 0) {
-		PRINTKW(KERN_ERR "misc_register (minor %d) failed with %d\n",
-			z90crypt_misc_device.minor, result);
-		return result;
-	}
-
-	PDEBUG("Registered " DEV_NAME " with result %d\n", result);
-
-	result = create_z90crypt(&domain);
-	if (result != 0) {
-		PRINTKW("create_z90crypt (domain index %d) failed with %d.\n",
-			domain, result);
-		result = -ENOMEM;
-		goto init_module_cleanup;
-	}
-
-	if (result == 0) {
-		PRINTKN("Version %d.%d.%d loaded, built on %s %s\n",
-			z90crypt_VERSION, z90crypt_RELEASE, z90crypt_VARIANT,
-			__DATE__, __TIME__);
-		PDEBUG("create_z90crypt (domain index %d) successful.\n",
-		       domain);
-	} else
-		PRINTK("No devices at startup\n");
-
-	/* Initialize globals. */
-	spin_lock_init(&queuespinlock);
-
-	INIT_LIST_HEAD(&pending_list);
-	pendingq_count = 0;
-
-	INIT_LIST_HEAD(&request_list);
-	requestq_count = 0;
-
-	quiesce_z90crypt = 0;
-
-	atomic_set(&total_open, 0);
-	atomic_set(&z90crypt_step, 0);
-
-	/* Set up the cleanup task. */
-	init_timer(&cleanup_timer);
-	cleanup_timer.function = z90crypt_cleanup_task;
-	cleanup_timer.data = 0;
-	cleanup_timer.expires = jiffies + (CLEANUPTIME * HZ);
-	add_timer(&cleanup_timer);
-
-	/* Set up the proc file system */
-	entry = create_proc_entry("driver/z90crypt", 0644, 0);
-	if (entry) {
-		entry->nlink = 1;
-		entry->data = 0;
-		entry->read_proc = z90crypt_status;
-		entry->write_proc = z90crypt_status_write;
-	}
-	else
-		PRINTK("Couldn't create z90crypt proc entry\n");
-	z90crypt_entry = entry;
-
-	/* Set up the configuration task. */
-	init_timer(&config_timer);
-	config_timer.function = z90crypt_config_task;
-	config_timer.data = 0;
-	config_timer.expires = jiffies + (INITIAL_CONFIGTIME * HZ);
-	add_timer(&config_timer);
-
-	/* Set up the reader task */
-	tasklet_init(&reader_tasklet, z90crypt_reader_task, 0);
-	init_timer(&reader_timer);
-	reader_timer.function = z90crypt_schedule_reader_task;
-	reader_timer.data = 0;
-	reader_timer.expires = jiffies + (READERTIME * HZ / 1000);
-	add_timer(&reader_timer);
-
-	return 0; // success
-
-init_module_cleanup:
-	if ((nresult = misc_deregister(&z90crypt_misc_device)))
-		PRINTK("misc_deregister failed with %d.\n", nresult);
-	else
-		PDEBUG("misc_deregister successful.\n");
-
-	return result; // failure
-}
-
-/**
- * The module termination code
- */
-static void __exit
-z90crypt_cleanup_module(void)
-{
-	int nresult;
-
-	PDEBUG("PID %d\n", PID());
-
-	remove_proc_entry("driver/z90crypt", 0);
-
-	if ((nresult = misc_deregister(&z90crypt_misc_device)))
-		PRINTK("misc_deregister failed with %d.\n", nresult);
-	else
-		PDEBUG("misc_deregister successful.\n");
-
-	/* Remove the tasks */
-	tasklet_kill(&reader_tasklet);
-	del_timer(&reader_timer);
-	del_timer(&config_timer);
-	del_timer(&cleanup_timer);
-
-	destroy_z90crypt();
-
-	PRINTKN("Unloaded.\n");
-}
-
-/**
- * Functions running under a process id
- *
- * The I/O functions:
- *     z90crypt_open
- *     z90crypt_release
- *     z90crypt_read
- *     z90crypt_write
- *     z90crypt_unlocked_ioctl
- *     z90crypt_status
- *     z90crypt_status_write
- *	 disable_card
- *	 enable_card
- *
- * Helper functions:
- *     z90crypt_rsa
- *	 z90crypt_prepare
- *	 z90crypt_send
- *	 z90crypt_process_results
- *
- */
-static int
-z90crypt_open(struct inode *inode, struct file *filp)
-{
-	struct priv_data *private_data_p;
-
-	if (quiesce_z90crypt)
-		return -EQUIESCE;
-
-	private_data_p = kzalloc(sizeof(struct priv_data), GFP_KERNEL);
-	if (!private_data_p) {
-		PRINTK("Memory allocate failed\n");
-		return -ENOMEM;
-	}
-
-	private_data_p->status = STAT_OPEN;
-	private_data_p->opener_pid = PID();
-	filp->private_data = private_data_p;
-	atomic_inc(&total_open);
-
-	return 0;
-}
-
-static int
-z90crypt_release(struct inode *inode, struct file *filp)
-{
-	struct priv_data *private_data_p = filp->private_data;
-
-	PDEBUG("PID %d (filp %p)\n", PID(), filp);
-
-	private_data_p->status = STAT_CLOSED;
-	memset(private_data_p, 0, sizeof(struct priv_data));
-	kfree(private_data_p);
-	atomic_dec(&total_open);
-
-	return 0;
-}
-
-/*
- * there are two read functions, of which compile options will choose one
- * without USE_GET_RANDOM_BYTES
- *   => read() always returns -EPERM;
- * otherwise
- *   => read() uses get_random_bytes() kernel function
- */
-#ifndef USE_GET_RANDOM_BYTES
-/**
- * z90crypt_read will not be supported beyond z90crypt 1.3.1
- */
-static ssize_t
-z90crypt_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
-{
-	PDEBUG("filp %p (PID %d)\n", filp, PID());
-	return -EPERM;
-}
-#else // we want to use get_random_bytes
-/**
- * read() just returns a string of random bytes.  Since we have no way
- * to generate these cryptographically, we just execute get_random_bytes
- * for the length specified.
- */
-#include <linux/random.h>
-static ssize_t
-z90crypt_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
-{
-	unsigned char *temp_buff;
-
-	PDEBUG("filp %p (PID %d)\n", filp, PID());
-
-	if (quiesce_z90crypt)
-		return -EQUIESCE;
-	if (count < 0) {
-		PRINTK("Requested random byte count negative: %ld\n", count);
-		return -EINVAL;
-	}
-	if (count > RESPBUFFSIZE) {
-		PDEBUG("count[%d] > RESPBUFFSIZE", count);
-		return -EINVAL;
-	}
-	if (count == 0)
-		return 0;
-	temp_buff = kmalloc(RESPBUFFSIZE, GFP_KERNEL);
-	if (!temp_buff) {
-		PRINTK("Memory allocate failed\n");
-		return -ENOMEM;
-	}
-	get_random_bytes(temp_buff, count);
-
-	if (copy_to_user(buf, temp_buff, count) != 0) {
-		kfree(temp_buff);
-		return -EFAULT;
-	}
-	kfree(temp_buff);
-	return count;
-}
-#endif
-
-/**
- * Write is is not allowed
- */
-static ssize_t
-z90crypt_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos)
-{
-	PDEBUG("filp %p (PID %d)\n", filp, PID());
-	return -EPERM;
-}
-
-/**
- * New status functions
- */
-static inline int
-get_status_totalcount(void)
-{
-	return z90crypt.hdware_info->hdware_mask.st_count;
-}
-
-static inline int
-get_status_PCICAcount(void)
-{
-	return z90crypt.hdware_info->type_mask[PCICA].st_count;
-}
-
-static inline int
-get_status_PCICCcount(void)
-{
-	return z90crypt.hdware_info->type_mask[PCICC].st_count;
-}
-
-static inline int
-get_status_PCIXCCcount(void)
-{
-	return z90crypt.hdware_info->type_mask[PCIXCC_MCL2].st_count +
-	       z90crypt.hdware_info->type_mask[PCIXCC_MCL3].st_count;
-}
-
-static inline int
-get_status_PCIXCCMCL2count(void)
-{
-	return z90crypt.hdware_info->type_mask[PCIXCC_MCL2].st_count;
-}
-
-static inline int
-get_status_PCIXCCMCL3count(void)
-{
-	return z90crypt.hdware_info->type_mask[PCIXCC_MCL3].st_count;
-}
-
-static inline int
-get_status_CEX2Ccount(void)
-{
-	return z90crypt.hdware_info->type_mask[CEX2C].st_count;
-}
-
-static inline int
-get_status_CEX2Acount(void)
-{
-	return z90crypt.hdware_info->type_mask[CEX2A].st_count;
-}
-
-static inline int
-get_status_requestq_count(void)
-{
-	return requestq_count;
-}
-
-static inline int
-get_status_pendingq_count(void)
-{
-	return pendingq_count;
-}
-
-static inline int
-get_status_totalopen_count(void)
-{
-	return atomic_read(&total_open);
-}
-
-static inline int
-get_status_domain_index(void)
-{
-	return z90crypt.cdx;
-}
-
-static inline unsigned char *
-get_status_status_mask(unsigned char status[Z90CRYPT_NUM_APS])
-{
-	int i, ix;
-
-	memcpy(status, z90crypt.hdware_info->device_type_array,
-	       Z90CRYPT_NUM_APS);
-
-	for (i = 0; i < get_status_totalcount(); i++) {
-		ix = SHRT2LONG(i);
-		if (LONG2DEVPTR(ix)->user_disabled)
-			status[ix] = 0x0d;
-	}
-
-	return status;
-}
-
-static inline unsigned char *
-get_status_qdepth_mask(unsigned char qdepth[Z90CRYPT_NUM_APS])
-{
-	int i, ix;
-
-	memset(qdepth, 0, Z90CRYPT_NUM_APS);
-
-	for (i = 0; i < get_status_totalcount(); i++) {
-		ix = SHRT2LONG(i);
-		qdepth[ix] = LONG2DEVPTR(ix)->dev_caller_count;
-	}
-
-	return qdepth;
-}
-
-static inline unsigned int *
-get_status_perdevice_reqcnt(unsigned int reqcnt[Z90CRYPT_NUM_APS])
-{
-	int i, ix;
-
-	memset(reqcnt, 0, Z90CRYPT_NUM_APS * sizeof(int));
-
-	for (i = 0; i < get_status_totalcount(); i++) {
-		ix = SHRT2LONG(i);
-		reqcnt[ix] = LONG2DEVPTR(ix)->dev_total_req_cnt;
-	}
-
-	return reqcnt;
-}
-
-static inline void
-init_work_element(struct work_element *we_p,
-		  struct priv_data *priv_data, pid_t pid)
-{
-	int step;
-
-	we_p->requestptr = (unsigned char *)we_p + sizeof(struct work_element);
-	/* Come up with a unique id for this caller. */
-	step = atomic_inc_return(&z90crypt_step);
-	memcpy(we_p->caller_id+0, (void *) &pid, sizeof(pid));
-	memcpy(we_p->caller_id+4, (void *) &step, sizeof(step));
-	we_p->pid = pid;
-	we_p->priv_data = priv_data;
-	we_p->status[0] = STAT_DEFAULT;
-	we_p->audit[0] = 0x00;
-	we_p->audit[1] = 0x00;
-	we_p->audit[2] = 0x00;
-	we_p->resp_buff_size = 0;
-	we_p->retcode = 0;
-	we_p->devindex = -1;
-	we_p->devtype = -1;
-	atomic_set(&we_p->alarmrung, 0);
-	init_waitqueue_head(&we_p->waitq);
-	INIT_LIST_HEAD(&(we_p->liste));
-}
-
-static inline int
-allocate_work_element(struct work_element **we_pp,
-		      struct priv_data *priv_data_p, pid_t pid)
-{
-	struct work_element *we_p;
-
-	we_p = (struct work_element *) get_zeroed_page(GFP_KERNEL);
-	if (!we_p)
-		return -ENOMEM;
-	init_work_element(we_p, priv_data_p, pid);
-	*we_pp = we_p;
-	return 0;
-}
-
-static inline void
-remove_device(struct device *device_p)
-{
-	if (!device_p || (device_p->disabled != 0))
-		return;
-	device_p->disabled = 1;
-	z90crypt.hdware_info->type_mask[device_p->dev_type].disabled_count++;
-	z90crypt.hdware_info->hdware_mask.disabled_count++;
-}
-
-/**
- * Bitlength limits for each card
- *
- * There are new MCLs which allow more bitlengths. See the table for details.
- * The MCL must be applied and the newer bitlengths enabled for these to work.
- *
- * Card Type    Old limit    New limit
- * PCICA          ??-2048     same (the lower limit is less than 128 bit...)
- * PCICC         512-1024     512-2048
- * PCIXCC_MCL2   512-2048     ----- (applying any GA LIC will make an MCL3 card)
- * PCIXCC_MCL3   -----        128-2048
- * CEX2C         512-2048     128-2048
- * CEX2A          ??-2048     same (the lower limit is less than 128 bit...)
- *
- * ext_bitlens (extended bitlengths) is a global, since you should not apply an
- * MCL to just one card in a machine. We assume, at first, that all cards have
- * these capabilities.
- */
-int ext_bitlens = 1; // This is global
-#define PCIXCC_MIN_MOD_SIZE	 16	//  128 bits
-#define OLD_PCIXCC_MIN_MOD_SIZE	 64	//  512 bits
-#define PCICC_MIN_MOD_SIZE	 64	//  512 bits
-#define OLD_PCICC_MAX_MOD_SIZE	128	// 1024 bits
-#define MAX_MOD_SIZE		256	// 2048 bits
-
-static inline int
-select_device_type(int *dev_type_p, int bytelength)
-{
-	static int count = 0;
-	int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, CEX2A_avail,
-	    index_to_use;
-	struct status *stat;
-	if ((*dev_type_p != PCICC) && (*dev_type_p != PCICA) &&
-	    (*dev_type_p != PCIXCC_MCL2) && (*dev_type_p != PCIXCC_MCL3) &&
-	    (*dev_type_p != CEX2C) && (*dev_type_p != CEX2A) &&
-	    (*dev_type_p != ANYDEV))
-		return -1;
-	if (*dev_type_p != ANYDEV) {
-		stat = &z90crypt.hdware_info->type_mask[*dev_type_p];
-		if (stat->st_count >
-		    (stat->disabled_count + stat->user_disabled_count))
-			return 0;
-		return -1;
-	}
-
-	/**
-	 * Assumption: PCICA, PCIXCC_MCL3, CEX2C, and CEX2A are all similar in
-	 * speed.
-	 *
-	 * PCICA and CEX2A do NOT co-exist, so it would be either one or the
-	 * other present.
-	 */
-	stat = &z90crypt.hdware_info->type_mask[PCICA];
-	PCICA_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	stat = &z90crypt.hdware_info->type_mask[PCIXCC_MCL3];
-	PCIXCC_MCL3_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	stat = &z90crypt.hdware_info->type_mask[CEX2C];
-	CEX2C_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	stat = &z90crypt.hdware_info->type_mask[CEX2A];
-	CEX2A_avail = stat->st_count -
-			(stat->disabled_count + stat->user_disabled_count);
-	if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail || CEX2A_avail) {
-		/**
-		 * bitlength is a factor, PCICA or CEX2A are the most capable,
-		 * even with the new MCL for PCIXCC.
-		 */
-		if ((bytelength < PCIXCC_MIN_MOD_SIZE) ||
-		    (!ext_bitlens && (bytelength < OLD_PCIXCC_MIN_MOD_SIZE))) {
-			if (PCICA_avail) {
-				*dev_type_p = PCICA;
-				return 0;
-			}
-			if (CEX2A_avail) {
-				*dev_type_p = CEX2A;
-				return 0;
-			}
-			return -1;
-		}
-
-		index_to_use = count % (PCICA_avail + PCIXCC_MCL3_avail +
-					CEX2C_avail + CEX2A_avail);
-		if (index_to_use < PCICA_avail)
-			*dev_type_p = PCICA;
-		else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail))
-			*dev_type_p = PCIXCC_MCL3;
-		else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail +
-					 CEX2C_avail))
-			*dev_type_p = CEX2C;
-		else
-			*dev_type_p = CEX2A;
-		count++;
-		return 0;
-	}
-
-	/* Less than OLD_PCIXCC_MIN_MOD_SIZE cannot go to a PCIXCC_MCL2 */
-	if (bytelength < OLD_PCIXCC_MIN_MOD_SIZE)
-		return -1;
-	stat = &z90crypt.hdware_info->type_mask[PCIXCC_MCL2];
-	if (stat->st_count >
-	    (stat->disabled_count + stat->user_disabled_count)) {
-		*dev_type_p = PCIXCC_MCL2;
-		return 0;
-	}
-
-	/**
-	 * Less than PCICC_MIN_MOD_SIZE or more than OLD_PCICC_MAX_MOD_SIZE
-	 * (if we don't have the MCL applied and the newer bitlengths enabled)
-	 * cannot go to a PCICC
-	 */
-	if ((bytelength < PCICC_MIN_MOD_SIZE) ||
-	    (!ext_bitlens && (bytelength > OLD_PCICC_MAX_MOD_SIZE))) {
-		return -1;
-	}
-	stat = &z90crypt.hdware_info->type_mask[PCICC];
-	if (stat->st_count >
-	    (stat->disabled_count + stat->user_disabled_count)) {
-		*dev_type_p = PCICC;
-		return 0;
-	}
-
-	return -1;
-}
-
-/**
- * Try the selected number, then the selected type (can be ANYDEV)
- */
-static inline int
-select_device(int *dev_type_p, int *device_nr_p, int bytelength)
-{
-	int i, indx, devTp, low_count, low_indx;
-	struct device_x *index_p;
-	struct device *dev_ptr;
-
-	PDEBUG("device type = %d, index = %d\n", *dev_type_p, *device_nr_p);
-	if ((*device_nr_p >= 0) && (*device_nr_p < Z90CRYPT_NUM_DEVS)) {
-		PDEBUG("trying index = %d\n", *device_nr_p);
-		dev_ptr = z90crypt.device_p[*device_nr_p];
-
-		if (dev_ptr &&
-		    (dev_ptr->dev_stat != DEV_GONE) &&
-		    (dev_ptr->disabled == 0) &&
-		    (dev_ptr->user_disabled == 0)) {
-			PDEBUG("selected by number, index = %d\n",
-			       *device_nr_p);
-			*dev_type_p = dev_ptr->dev_type;
-			return *device_nr_p;
-		}
-	}
-	*device_nr_p = -1;
-	PDEBUG("trying type = %d\n", *dev_type_p);
-	devTp = *dev_type_p;
-	if (select_device_type(&devTp, bytelength) == -1) {
-		PDEBUG("failed to select by type\n");
-		return -1;
-	}
-	PDEBUG("selected type = %d\n", devTp);
-	index_p = &z90crypt.hdware_info->type_x_addr[devTp];
-	low_count = 0x0000FFFF;
-	low_indx = -1;
-	for (i = 0; i < z90crypt.hdware_info->type_mask[devTp].st_count; i++) {
-		indx = index_p->device_index[i];
-		dev_ptr = z90crypt.device_p[indx];
-		if (dev_ptr &&
-		    (dev_ptr->dev_stat != DEV_GONE) &&
-		    (dev_ptr->disabled == 0) &&
-		    (dev_ptr->user_disabled == 0) &&
-		    (devTp == dev_ptr->dev_type) &&
-		    (low_count > dev_ptr->dev_caller_count)) {
-			low_count = dev_ptr->dev_caller_count;
-			low_indx = indx;
-		}
-	}
-	*device_nr_p = low_indx;
-	return low_indx;
-}
-
-static inline int
-send_to_crypto_device(struct work_element *we_p)
-{
-	struct caller *caller_p;
-	struct device *device_p;
-	int dev_nr;
-	int bytelen = ((struct ica_rsa_modexpo *)we_p->buffer)->inputdatalength;
-
-	if (!we_p->requestptr)
-		return SEN_FATAL_ERROR;
-	caller_p = (struct caller *)we_p->requestptr;
-	dev_nr = we_p->devindex;
-	if (select_device(&we_p->devtype, &dev_nr, bytelen) == -1) {
-		if (z90crypt.hdware_info->hdware_mask.st_count != 0)
-			return SEN_RETRY;
-		else
-			return SEN_NOT_AVAIL;
-	}
-	we_p->devindex = dev_nr;
-	device_p = z90crypt.device_p[dev_nr];
-	if (!device_p)
-		return SEN_NOT_AVAIL;
-	if (device_p->dev_type != we_p->devtype)
-		return SEN_RETRY;
-	if (device_p->dev_caller_count >= device_p->dev_q_depth)
-		return SEN_QUEUE_FULL;
-	PDEBUG("device number prior to send: %d\n", dev_nr);
-	switch (send_to_AP(dev_nr, z90crypt.cdx,
-			   caller_p->caller_dev_dep_req_l,
-			   caller_p->caller_dev_dep_req_p)) {
-	case DEV_SEN_EXCEPTION:
-		PRINTKC("Exception during send to device %d\n", dev_nr);
-		z90crypt.terminating = 1;
-		return SEN_FATAL_ERROR;
-	case DEV_GONE:
-		PRINTK("Device %d not available\n", dev_nr);
-		remove_device(device_p);
-		return SEN_NOT_AVAIL;
-	case DEV_EMPTY:
-		return SEN_NOT_AVAIL;
-	case DEV_NO_WORK:
-		return SEN_FATAL_ERROR;
-	case DEV_BAD_MESSAGE:
-		return SEN_USER_ERROR;
-	case DEV_QUEUE_FULL:
-		return SEN_QUEUE_FULL;
-	default:
-	case DEV_ONLINE:
-		break;
-	}
-	list_add_tail(&(caller_p->caller_liste), &(device_p->dev_caller_list));
-	device_p->dev_caller_count++;
-	return 0;
-}
-
-/**
- * Send puts the user's work on one of two queues:
- *   the pending queue if the send was successful
- *   the request queue if the send failed because device full or busy
- */
-static inline int
-z90crypt_send(struct work_element *we_p, const char *buf)
-{
-	int rv;
-
-	PDEBUG("PID %d\n", PID());
-
-	if (CHK_RDWRMASK(we_p->status[0]) != STAT_NOWORK) {
-		PDEBUG("PID %d tried to send more work but has outstanding "
-		       "work.\n", PID());
-		return -EWORKPEND;
-	}
-	we_p->devindex = -1; // Reset device number
-	spin_lock_irq(&queuespinlock);
-	rv = send_to_crypto_device(we_p);
-	switch (rv) {
-	case 0:
-		we_p->requestsent = jiffies;
-		we_p->audit[0] |= FP_SENT;
-		list_add_tail(&we_p->liste, &pending_list);
-		++pendingq_count;
-		we_p->audit[0] |= FP_PENDING;
-		break;
-	case SEN_BUSY:
-	case SEN_QUEUE_FULL:
-		rv = 0;
-		we_p->devindex = -1; // any device will do
-		we_p->requestsent = jiffies;
-		list_add_tail(&we_p->liste, &request_list);
-		++requestq_count;
-		we_p->audit[0] |= FP_REQUEST;
-		break;
-	case SEN_RETRY:
-		rv = -ERESTARTSYS;
-		break;
-	case SEN_NOT_AVAIL:
-		PRINTK("*** No devices available.\n");
-		rv = we_p->retcode = -ENODEV;
-		we_p->status[0] |= STAT_FAILED;
-		break;
-	case REC_OPERAND_INV:
-	case REC_OPERAND_SIZE:
-	case REC_EVEN_MOD:
-	case REC_INVALID_PAD:
-		rv = we_p->retcode = -EINVAL;
-		we_p->status[0] |= STAT_FAILED;
-		break;
-	default:
-		we_p->retcode = rv;
-		we_p->status[0] |= STAT_FAILED;
-		break;
-	}
-	if (rv != -ERESTARTSYS)
-		SET_RDWRMASK(we_p->status[0], STAT_WRITTEN);
-	spin_unlock_irq(&queuespinlock);
-	if (rv == 0)
-		tasklet_schedule(&reader_tasklet);
-	return rv;
-}
-
-/**
- * process_results copies the user's work from kernel space.
- */
-static inline int
-z90crypt_process_results(struct work_element *we_p, char __user *buf)
-{
-	int rv;
-
-	PDEBUG("we_p %p (PID %d)\n", we_p, PID());
-
-	LONG2DEVPTR(we_p->devindex)->dev_total_req_cnt++;
-	SET_RDWRMASK(we_p->status[0], STAT_READPEND);
-
-	rv = 0;
-	if (!we_p->buffer) {
-		PRINTK("we_p %p PID %d in STAT_READPEND: buffer NULL.\n",
-			we_p, PID());
-		rv = -ENOBUFF;
-	}
-
-	if (!rv)
-		if ((rv = copy_to_user(buf, we_p->buffer, we_p->buff_size))) {
-			PDEBUG("copy_to_user failed: rv = %d\n", rv);
-			rv = -EFAULT;
-		}
-
-	if (!rv)
-		rv = we_p->retcode;
-	if (!rv)
-		if (we_p->resp_buff_size
-		    &&	copy_to_user(we_p->resp_addr, we_p->resp_buff,
-				     we_p->resp_buff_size))
-			rv = -EFAULT;
-
-	SET_RDWRMASK(we_p->status[0], STAT_NOWORK);
-	return rv;
-}
-
-static unsigned char NULL_psmid[8] =
-{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-
-/**
- * Used in device configuration functions
- */
-#define MAX_RESET 90
-
-/**
- * This is used only for PCICC support
- */
-static inline int
-is_PKCS11_padded(unsigned char *buffer, int length)
-{
-	int i;
-	if ((buffer[0] != 0x00) || (buffer[1] != 0x01))
-		return 0;
-	for (i = 2; i < length; i++)
-		if (buffer[i] != 0xFF)
-			break;
-	if ((i < 10) || (i == length))
-		return 0;
-	if (buffer[i] != 0x00)
-		return 0;
-	return 1;
-}
-
-/**
- * This is used only for PCICC support
- */
-static inline int
-is_PKCS12_padded(unsigned char *buffer, int length)
-{
-	int i;
-	if ((buffer[0] != 0x00) || (buffer[1] != 0x02))
-		return 0;
-	for (i = 2; i < length; i++)
-		if (buffer[i] == 0x00)
-			break;
-	if ((i < 10) || (i == length))
-		return 0;
-	if (buffer[i] != 0x00)
-		return 0;
-	return 1;
-}
-
-/**
- * builds struct caller and converts message from generic format to
- * device-dependent format
- * func is ICARSAMODEXPO or ICARSACRT
- * function is PCI_FUNC_KEY_ENCRYPT or PCI_FUNC_KEY_DECRYPT
- */
-static inline int
-build_caller(struct work_element *we_p, short function)
-{
-	int rv;
-	struct caller *caller_p = (struct caller *)we_p->requestptr;
-
-	if ((we_p->devtype != PCICC) && (we_p->devtype != PCICA) &&
-	    (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) &&
-	    (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A))
-		return SEN_NOT_AVAIL;
-
-	memcpy(caller_p->caller_id, we_p->caller_id,
-	       sizeof(caller_p->caller_id));
-	caller_p->caller_dev_dep_req_p = caller_p->caller_dev_dep_req;
-	caller_p->caller_dev_dep_req_l = MAX_RESPONSE_SIZE;
-	caller_p->caller_buf_p = we_p->buffer;
-	INIT_LIST_HEAD(&(caller_p->caller_liste));
-
-	rv = convert_request(we_p->buffer, we_p->funccode, function,
-			     z90crypt.cdx, we_p->devtype,
-			     &caller_p->caller_dev_dep_req_l,
-			     caller_p->caller_dev_dep_req_p);
-	if (rv) {
-		if (rv == SEN_NOT_AVAIL)
-			PDEBUG("request can't be processed on hdwr avail\n");
-		else
-			PRINTK("Error from convert_request: %d\n", rv);
-	}
-	else
-		memcpy(&(caller_p->caller_dev_dep_req_p[4]), we_p->caller_id,8);
-	return rv;
-}
-
-static inline void
-unbuild_caller(struct device *device_p, struct caller *caller_p)
-{
-	if (!caller_p)
-		return;
-	if (caller_p->caller_liste.next && caller_p->caller_liste.prev)
-		if (!list_empty(&caller_p->caller_liste)) {
-			list_del_init(&caller_p->caller_liste);
-			device_p->dev_caller_count--;
-		}
-	memset(caller_p->caller_id, 0, sizeof(caller_p->caller_id));
-}
-
-static inline int
-get_crypto_request_buffer(struct work_element *we_p)
-{
-	struct ica_rsa_modexpo *mex_p;
-	struct ica_rsa_modexpo_crt *crt_p;
-	unsigned char *temp_buffer;
-	short function;
-	int rv;
-
-	mex_p =	(struct ica_rsa_modexpo *) we_p->buffer;
-	crt_p = (struct ica_rsa_modexpo_crt *) we_p->buffer;
-
-	PDEBUG("device type input = %d\n", we_p->devtype);
-
-	if (z90crypt.terminating)
-		return REC_NO_RESPONSE;
-	if (memcmp(we_p->caller_id, NULL_psmid, 8) == 0) {
-		PRINTK("psmid zeroes\n");
-		return SEN_FATAL_ERROR;
-	}
-	if (!we_p->buffer) {
-		PRINTK("buffer pointer NULL\n");
-		return SEN_USER_ERROR;
-	}
-	if (!we_p->requestptr) {
-		PRINTK("caller pointer NULL\n");
-		return SEN_USER_ERROR;
-	}
-
-	if ((we_p->devtype != PCICA) && (we_p->devtype != PCICC) &&
-	    (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) &&
-	    (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A) &&
-	    (we_p->devtype != ANYDEV)) {
-		PRINTK("invalid device type\n");
-		return SEN_USER_ERROR;
-	}
-
-	if ((mex_p->inputdatalength < 1) ||
-	    (mex_p->inputdatalength > MAX_MOD_SIZE)) {
-		PRINTK("inputdatalength[%d] is not valid\n",
-		       mex_p->inputdatalength);
-		return SEN_USER_ERROR;
-	}
-
-	if (mex_p->outputdatalength < mex_p->inputdatalength) {
-		PRINTK("outputdatalength[%d] < inputdatalength[%d]\n",
-		       mex_p->outputdatalength, mex_p->inputdatalength);
-		return SEN_USER_ERROR;
-	}
-
-	if (!mex_p->inputdata || !mex_p->outputdata) {
-		PRINTK("inputdata[%p] or outputdata[%p] is NULL\n",
-		       mex_p->outputdata, mex_p->inputdata);
-		return SEN_USER_ERROR;
-	}
-
-	/**
-	 * As long as outputdatalength is big enough, we can set the
-	 * outputdatalength equal to the inputdatalength, since that is the
-	 * number of bytes we will copy in any case
-	 */
-	mex_p->outputdatalength = mex_p->inputdatalength;
-
-	rv = 0;
-	switch (we_p->funccode) {
-	case ICARSAMODEXPO:
-		if (!mex_p->b_key || !mex_p->n_modulus)
-			rv = SEN_USER_ERROR;
-		break;
-	case ICARSACRT:
-		if (!IS_EVEN(crt_p->inputdatalength)) {
-			PRINTK("inputdatalength[%d] is odd, CRT form\n",
-			       crt_p->inputdatalength);
-			rv = SEN_USER_ERROR;
-			break;
-		}
-		if (!crt_p->bp_key ||
-		    !crt_p->bq_key ||
-		    !crt_p->np_prime ||
-		    !crt_p->nq_prime ||
-		    !crt_p->u_mult_inv) {
-			PRINTK("CRT form, bad data: %p/%p/%p/%p/%p\n",
-			       crt_p->bp_key, crt_p->bq_key,
-			       crt_p->np_prime, crt_p->nq_prime,
-			       crt_p->u_mult_inv);
-			rv = SEN_USER_ERROR;
-		}
-		break;
-	default:
-		PRINTK("bad func = %d\n", we_p->funccode);
-		rv = SEN_USER_ERROR;
-		break;
-	}
-	if (rv != 0)
-		return rv;
-
-	if (select_device_type(&we_p->devtype, mex_p->inputdatalength) < 0)
-		return SEN_NOT_AVAIL;
-
-	temp_buffer = (unsigned char *)we_p + sizeof(struct work_element) +
-		      sizeof(struct caller);
-	if (copy_from_user(temp_buffer, mex_p->inputdata,
-			   mex_p->inputdatalength) != 0)
-		return SEN_RELEASED;
-
-	function = PCI_FUNC_KEY_ENCRYPT;
-	switch (we_p->devtype) {
-	/* PCICA and CEX2A do everything with a simple RSA mod-expo operation */
-	case PCICA:
-	case CEX2A:
-		function = PCI_FUNC_KEY_ENCRYPT;
-		break;
-	/**
-	 * PCIXCC_MCL2 does all Mod-Expo form with a simple RSA mod-expo
-	 * operation, and all CRT forms with a PKCS-1.2 format decrypt.
-	 * PCIXCC_MCL3 and CEX2C do all Mod-Expo and CRT forms with a simple RSA
-	 * mod-expo operation
-	 */
-	case PCIXCC_MCL2:
-		if (we_p->funccode == ICARSAMODEXPO)
-			function = PCI_FUNC_KEY_ENCRYPT;
-		else
-			function = PCI_FUNC_KEY_DECRYPT;
-		break;
-	case PCIXCC_MCL3:
-	case CEX2C:
-		if (we_p->funccode == ICARSAMODEXPO)
-			function = PCI_FUNC_KEY_ENCRYPT;
-		else
-			function = PCI_FUNC_KEY_DECRYPT;
-		break;
-	/**
-	 * PCICC does everything as a PKCS-1.2 format request
-	 */
-	case PCICC:
-		/* PCICC cannot handle input that is is PKCS#1.1 padded */
-		if (is_PKCS11_padded(temp_buffer, mex_p->inputdatalength)) {
-			return SEN_NOT_AVAIL;
-		}
-		if (we_p->funccode == ICARSAMODEXPO) {
-			if (is_PKCS12_padded(temp_buffer,
-					     mex_p->inputdatalength))
-				function = PCI_FUNC_KEY_ENCRYPT;
-			else
-				function = PCI_FUNC_KEY_DECRYPT;
-		} else
-			/* all CRT forms are decrypts */
-			function = PCI_FUNC_KEY_DECRYPT;
-		break;
-	}
-	PDEBUG("function: %04x\n", function);
-	rv = build_caller(we_p, function);
-	PDEBUG("rv from build_caller = %d\n", rv);
-	return rv;
-}
-
-static inline int
-z90crypt_prepare(struct work_element *we_p, unsigned int funccode,
-		 const char __user *buffer)
-{
-	int rv;
-
-	we_p->devindex = -1;
-	if (funccode == ICARSAMODEXPO)
-		we_p->buff_size = sizeof(struct ica_rsa_modexpo);
-	else
-		we_p->buff_size = sizeof(struct ica_rsa_modexpo_crt);
-
-	if (copy_from_user(we_p->buffer, buffer, we_p->buff_size))
-		return -EFAULT;
-
-	we_p->audit[0] |= FP_COPYFROM;
-	SET_RDWRMASK(we_p->status[0], STAT_WRITTEN);
-	we_p->funccode = funccode;
-	we_p->devtype = -1;
-	we_p->audit[0] |= FP_BUFFREQ;
-	rv = get_crypto_request_buffer(we_p);
-	switch (rv) {
-	case 0:
-		we_p->audit[0] |= FP_BUFFGOT;
-		break;
-	case SEN_USER_ERROR:
-		rv = -EINVAL;
-		break;
-	case SEN_QUEUE_FULL:
-		rv = 0;
-		break;
-	case SEN_RELEASED:
-		rv = -EFAULT;
-		break;
-	case REC_NO_RESPONSE:
-		rv = -ENODEV;
-		break;
-	case SEN_NOT_AVAIL:
-	case EGETBUFF:
-		rv = -EGETBUFF;
-		break;
-	default:
-		PRINTK("rv = %d\n", rv);
-		rv = -EGETBUFF;
-		break;
-	}
-	if (CHK_RDWRMASK(we_p->status[0]) == STAT_WRITTEN)
-		SET_RDWRMASK(we_p->status[0], STAT_DEFAULT);
-	return rv;
-}
-
-static inline void
-purge_work_element(struct work_element *we_p)
-{
-	struct list_head *lptr;
-
-	spin_lock_irq(&queuespinlock);
-	list_for_each(lptr, &request_list) {
-		if (lptr == &we_p->liste) {
-			list_del_init(lptr);
-			requestq_count--;
-			break;
-		}
-	}
-	list_for_each(lptr, &pending_list) {
-		if (lptr == &we_p->liste) {
-			list_del_init(lptr);
-			pendingq_count--;
-			break;
-		}
-	}
-	spin_unlock_irq(&queuespinlock);
-}
-
-/**
- * Build the request and send it.
- */
-static inline int
-z90crypt_rsa(struct priv_data *private_data_p, pid_t pid,
-	     unsigned int cmd, unsigned long arg)
-{
-	struct work_element *we_p;
-	int rv;
-
-	if ((rv = allocate_work_element(&we_p, private_data_p, pid))) {
-		PDEBUG("PID %d: allocate_work_element returned ENOMEM\n", pid);
-		return rv;
-	}
-	if ((rv = z90crypt_prepare(we_p, cmd, (const char __user *)arg)))
-		PDEBUG("PID %d: rv = %d from z90crypt_prepare\n", pid, rv);
-	if (!rv)
-		if ((rv = z90crypt_send(we_p, (const char *)arg)))
-			PDEBUG("PID %d: rv %d from z90crypt_send.\n", pid, rv);
-	if (!rv) {
-		we_p->audit[0] |= FP_ASLEEP;
-		wait_event(we_p->waitq, atomic_read(&we_p->alarmrung));
-		we_p->audit[0] |= FP_AWAKE;
-		rv = we_p->retcode;
-	}
-	if (!rv)
-		rv = z90crypt_process_results(we_p, (char __user *)arg);
-
-	if ((we_p->status[0] & STAT_FAILED)) {
-		switch (rv) {
-		/**
-		 * EINVAL *after* receive is almost always a padding error or
-		 * length error issued by a coprocessor (not an accelerator).
-		 * We convert this return value to -EGETBUFF which should
-		 * trigger a fallback to software.
-		 */
-		case -EINVAL:
-			if ((we_p->devtype != PCICA) &&
-			    (we_p->devtype != CEX2A))
-				rv = -EGETBUFF;
-			break;
-		case -ETIMEOUT:
-			if (z90crypt.mask.st_count > 0)
-				rv = -ERESTARTSYS; // retry with another
-			else
-				rv = -ENODEV; // no cards left
-		/* fall through to clean up request queue */
-		case -ERESTARTSYS:
-		case -ERELEASED:
-			switch (CHK_RDWRMASK(we_p->status[0])) {
-			case STAT_WRITTEN:
-				purge_work_element(we_p);
-				break;
-			case STAT_READPEND:
-			case STAT_NOWORK:
-			default:
-				break;
-			}
-			break;
-		default:
-			we_p->status[0] ^= STAT_FAILED;
-			break;
-		}
-	}
-	free_page((long)we_p);
-	return rv;
-}
-
-/**
- * This function is a little long, but it's really just one large switch
- * statement.
- */
-static long
-z90crypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct priv_data *private_data_p = filp->private_data;
-	unsigned char *status;
-	unsigned char *qdepth;
-	unsigned int *reqcnt;
-	struct ica_z90_status *pstat;
-	int ret, i, loopLim, tempstat;
-	static int deprecated_msg_count1 = 0;
-	static int deprecated_msg_count2 = 0;
-
-	PDEBUG("filp %p (PID %d), cmd 0x%08X\n", filp, PID(), cmd);
-	PDEBUG("cmd 0x%08X: dir %s, size 0x%04X, type 0x%02X, nr 0x%02X\n",
-		cmd,
-		!_IOC_DIR(cmd) ? "NO"
-		: ((_IOC_DIR(cmd) == (_IOC_READ|_IOC_WRITE)) ? "RW"
-		: ((_IOC_DIR(cmd) == _IOC_READ) ? "RD"
-		: "WR")),
-		_IOC_SIZE(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd));
-
-	if (_IOC_TYPE(cmd) != Z90_IOCTL_MAGIC) {
-		PRINTK("cmd 0x%08X contains bad magic\n", cmd);
-		return -ENOTTY;
-	}
-
-	ret = 0;
-	switch (cmd) {
-	case ICARSAMODEXPO:
-	case ICARSACRT:
-		if (quiesce_z90crypt) {
-			ret = -EQUIESCE;
-			break;
-		}
-		ret = -ENODEV; // Default if no devices
-		loopLim = z90crypt.hdware_info->hdware_mask.st_count -
-			(z90crypt.hdware_info->hdware_mask.disabled_count +
-			 z90crypt.hdware_info->hdware_mask.user_disabled_count);
-		for (i = 0; i < loopLim; i++) {
-			ret = z90crypt_rsa(private_data_p, PID(), cmd, arg);
-			if (ret != -ERESTARTSYS)
-				break;
-		}
-		if (ret == -ERESTARTSYS)
-			ret = -ENODEV;
-		break;
-
-	case Z90STAT_TOTALCOUNT:
-		tempstat = get_status_totalcount();
-		if (copy_to_user((int __user *)arg, &tempstat,sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCICACOUNT:
-		tempstat = get_status_PCICAcount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCICCCOUNT:
-		tempstat = get_status_PCICCcount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCIXCCMCL2COUNT:
-		tempstat = get_status_PCIXCCMCL2count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PCIXCCMCL3COUNT:
-		tempstat = get_status_PCIXCCMCL3count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_CEX2CCOUNT:
-		tempstat = get_status_CEX2Ccount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_CEX2ACOUNT:
-		tempstat = get_status_CEX2Acount();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_REQUESTQ_COUNT:
-		tempstat = get_status_requestq_count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_PENDINGQ_COUNT:
-		tempstat = get_status_pendingq_count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_TOTALOPEN_COUNT:
-		tempstat = get_status_totalopen_count();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_DOMAIN_INDEX:
-		tempstat = get_status_domain_index();
-		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90STAT_STATUS_MASK:
-		status = kmalloc(Z90CRYPT_NUM_APS, GFP_KERNEL);
-		if (!status) {
-			PRINTK("kmalloc for status failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-		get_status_status_mask(status);
-		if (copy_to_user((char __user *) arg, status, Z90CRYPT_NUM_APS)
-									!= 0)
-			ret = -EFAULT;
-		kfree(status);
-		break;
-
-	case Z90STAT_QDEPTH_MASK:
-		qdepth = kmalloc(Z90CRYPT_NUM_APS, GFP_KERNEL);
-		if (!qdepth) {
-			PRINTK("kmalloc for qdepth failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-		get_status_qdepth_mask(qdepth);
-		if (copy_to_user((char __user *) arg, qdepth, Z90CRYPT_NUM_APS) != 0)
-			ret = -EFAULT;
-		kfree(qdepth);
-		break;
-
-	case Z90STAT_PERDEV_REQCNT:
-		reqcnt = kmalloc(sizeof(int) * Z90CRYPT_NUM_APS, GFP_KERNEL);
-		if (!reqcnt) {
-			PRINTK("kmalloc for reqcnt failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-		get_status_perdevice_reqcnt(reqcnt);
-		if (copy_to_user((char __user *) arg, reqcnt,
-				 Z90CRYPT_NUM_APS * sizeof(int)) != 0)
-			ret = -EFAULT;
-		kfree(reqcnt);
-		break;
-
-		/* THIS IS DEPRECATED.	USE THE NEW STATUS CALLS */
-	case ICAZ90STATUS:
-		if (deprecated_msg_count1 < 20) {
-			PRINTK("deprecated call to ioctl (ICAZ90STATUS)!\n");
-			deprecated_msg_count1++;
-			if (deprecated_msg_count1 == 20)
-				PRINTK("No longer issuing messages related to "
-				       "deprecated call to ICAZ90STATUS.\n");
-		}
-
-		pstat = kmalloc(sizeof(struct ica_z90_status), GFP_KERNEL);
-		if (!pstat) {
-			PRINTK("kmalloc for pstat failed!\n");
-			ret = -ENOMEM;
-			break;
-		}
-
-		pstat->totalcount	 = get_status_totalcount();
-		pstat->leedslitecount	 = get_status_PCICAcount();
-		pstat->leeds2count	 = get_status_PCICCcount();
-		pstat->requestqWaitCount = get_status_requestq_count();
-		pstat->pendingqWaitCount = get_status_pendingq_count();
-		pstat->totalOpenCount	 = get_status_totalopen_count();
-		pstat->cryptoDomain	 = get_status_domain_index();
-		get_status_status_mask(pstat->status);
-		get_status_qdepth_mask(pstat->qdepth);
-
-		if (copy_to_user((struct ica_z90_status __user *) arg, pstat,
-				 sizeof(struct ica_z90_status)) != 0)
-			ret = -EFAULT;
-		kfree(pstat);
-		break;
-
-		/* THIS IS DEPRECATED.	USE THE NEW STATUS CALLS */
-	case Z90STAT_PCIXCCCOUNT:
-		if (deprecated_msg_count2 < 20) {
-			PRINTK("deprecated ioctl (Z90STAT_PCIXCCCOUNT)!\n");
-			deprecated_msg_count2++;
-			if (deprecated_msg_count2 == 20)
-				PRINTK("No longer issuing messages about depre"
-				       "cated ioctl Z90STAT_PCIXCCCOUNT.\n");
-		}
-
-		tempstat = get_status_PCIXCCcount();
-		if (copy_to_user((int *)arg, &tempstat, sizeof(int)) != 0)
-			ret = -EFAULT;
-		break;
-
-	case Z90QUIESCE:
-		if (current->euid != 0) {
-			PRINTK("QUIESCE fails: euid %d\n",
-			       current->euid);
-			ret = -EACCES;
-		} else {
-			PRINTK("QUIESCE device from PID %d\n", PID());
-			quiesce_z90crypt = 1;
-		}
-		break;
-
-	default:
-		/* user passed an invalid IOCTL number */
-		PDEBUG("cmd 0x%08X contains invalid ioctl code\n", cmd);
-		ret = -ENOTTY;
-		break;
-	}
-
-	return ret;
-}
-
-static inline int
-sprintcl(unsigned char *outaddr, unsigned char *addr, unsigned int len)
-{
-	int hl, i;
-
-	hl = 0;
-	for (i = 0; i < len; i++)
-		hl += sprintf(outaddr+hl, "%01x", (unsigned int) addr[i]);
-	hl += sprintf(outaddr+hl, " ");
-
-	return hl;
-}
-
-static inline int
-sprintrw(unsigned char *outaddr, unsigned char *addr, unsigned int len)
-{
-	int hl, inl, c, cx;
-
-	hl = sprintf(outaddr, "	   ");
-	inl = 0;
-	for (c = 0; c < (len / 16); c++) {
-		hl += sprintcl(outaddr+hl, addr+inl, 16);
-		inl += 16;
-	}
-
-	cx = len%16;
-	if (cx) {
-		hl += sprintcl(outaddr+hl, addr+inl, cx);
-		inl += cx;
-	}
-
-	hl += sprintf(outaddr+hl, "\n");
-
-	return hl;
-}
-
-static inline int
-sprinthx(unsigned char *title, unsigned char *outaddr,
-	 unsigned char *addr, unsigned int len)
-{
-	int hl, inl, r, rx;
-
-	hl = sprintf(outaddr, "\n%s\n", title);
-	inl = 0;
-	for (r = 0; r < (len / 64); r++) {
-		hl += sprintrw(outaddr+hl, addr+inl, 64);
-		inl += 64;
-	}
-	rx = len % 64;
-	if (rx) {
-		hl += sprintrw(outaddr+hl, addr+inl, rx);
-		inl += rx;
-	}
-
-	hl += sprintf(outaddr+hl, "\n");
-
-	return hl;
-}
-
-static inline int
-sprinthx4(unsigned char *title, unsigned char *outaddr,
-	  unsigned int *array, unsigned int len)
-{
-	int hl, r;
-
-	hl = sprintf(outaddr, "\n%s\n", title);
-
-	for (r = 0; r < len; r++) {
-		if ((r % 8) == 0)
-			hl += sprintf(outaddr+hl, "    ");
-		hl += sprintf(outaddr+hl, "%08X ", array[r]);
-		if ((r % 8) == 7)
-			hl += sprintf(outaddr+hl, "\n");
-	}
-
-	hl += sprintf(outaddr+hl, "\n");
-
-	return hl;
-}
-
-static int
-z90crypt_status(char *resp_buff, char **start, off_t offset,
-		int count, int *eof, void *data)
-{
-	unsigned char *workarea;
-	int len;
-
-	/* resp_buff is a page. Use the right half for a work area */
-	workarea = resp_buff+2000;
-	len = 0;
-	len += sprintf(resp_buff+len, "\nz90crypt version: %d.%d.%d\n",
-		z90crypt_VERSION, z90crypt_RELEASE, z90crypt_VARIANT);
-	len += sprintf(resp_buff+len, "Cryptographic domain: %d\n",
-		get_status_domain_index());
-	len += sprintf(resp_buff+len, "Total device count: %d\n",
-		get_status_totalcount());
-	len += sprintf(resp_buff+len, "PCICA count: %d\n",
-		get_status_PCICAcount());
-	len += sprintf(resp_buff+len, "PCICC count: %d\n",
-		get_status_PCICCcount());
-	len += sprintf(resp_buff+len, "PCIXCC MCL2 count: %d\n",
-		get_status_PCIXCCMCL2count());
-	len += sprintf(resp_buff+len, "PCIXCC MCL3 count: %d\n",
-		get_status_PCIXCCMCL3count());
-	len += sprintf(resp_buff+len, "CEX2C count: %d\n",
-		get_status_CEX2Ccount());
-	len += sprintf(resp_buff+len, "CEX2A count: %d\n",
-		get_status_CEX2Acount());
-	len += sprintf(resp_buff+len, "requestq count: %d\n",
-		get_status_requestq_count());
-	len += sprintf(resp_buff+len, "pendingq count: %d\n",
-		get_status_pendingq_count());
-	len += sprintf(resp_buff+len, "Total open handles: %d\n\n",
-		get_status_totalopen_count());
-	len += sprinthx(
-		"Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) "
-		"4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A",
-		resp_buff+len,
-		get_status_status_mask(workarea),
-		Z90CRYPT_NUM_APS);
-	len += sprinthx("Waiting work element counts",
-		resp_buff+len,
-		get_status_qdepth_mask(workarea),
-		Z90CRYPT_NUM_APS);
-	len += sprinthx4(
-		"Per-device successfully completed request counts",
-		resp_buff+len,
-		get_status_perdevice_reqcnt((unsigned int *)workarea),
-		Z90CRYPT_NUM_APS);
-	*eof = 1;
-	memset(workarea, 0, Z90CRYPT_NUM_APS * sizeof(unsigned int));
-	return len;
-}
-
-static inline void
-disable_card(int card_index)
-{
-	struct device *devp;
-
-	devp = LONG2DEVPTR(card_index);
-	if (!devp || devp->user_disabled)
-		return;
-	devp->user_disabled = 1;
-	z90crypt.hdware_info->hdware_mask.user_disabled_count++;
-	if (devp->dev_type == -1)
-		return;
-	z90crypt.hdware_info->type_mask[devp->dev_type].user_disabled_count++;
-}
-
-static inline void
-enable_card(int card_index)
-{
-	struct device *devp;
-
-	devp = LONG2DEVPTR(card_index);
-	if (!devp || !devp->user_disabled)
-		return;
-	devp->user_disabled = 0;
-	z90crypt.hdware_info->hdware_mask.user_disabled_count--;
-	if (devp->dev_type == -1)
-		return;
-	z90crypt.hdware_info->type_mask[devp->dev_type].user_disabled_count--;
-}
-
-static int
-z90crypt_status_write(struct file *file, const char __user *buffer,
-		      unsigned long count, void *data)
-{
-	int j, eol;
-	unsigned char *lbuf, *ptr;
-	unsigned int local_count;
-
-#define LBUFSIZE 1200
-	lbuf = kmalloc(LBUFSIZE, GFP_KERNEL);
-	if (!lbuf) {
-		PRINTK("kmalloc failed!\n");
-		return 0;
-	}
-
-	if (count <= 0)
-		return 0;
-
-	local_count = UMIN((unsigned int)count, LBUFSIZE-1);
-
-	if (copy_from_user(lbuf, buffer, local_count) != 0) {
-		kfree(lbuf);
-		return -EFAULT;
-	}
-
-	lbuf[local_count] = '\0';
-
-	ptr = strstr(lbuf, "Online devices");
-	if (ptr == 0) {
-		PRINTK("Unable to parse data (missing \"Online devices\")\n");
-		kfree(lbuf);
-		return count;
-	}
-
-	ptr = strstr(ptr, "\n");
-	if (ptr == 0) {
-		PRINTK("Unable to parse data (missing newline after \"Online devices\")\n");
-		kfree(lbuf);
-		return count;
-	}
-	ptr++;
-
-	if (strstr(ptr, "Waiting work element counts") == NULL) {
-		PRINTK("Unable to parse data (missing \"Waiting work element counts\")\n");
-		kfree(lbuf);
-		return count;
-	}
-
-	j = 0;
-	eol = 0;
-	while ((j < 64) && (*ptr != '\0')) {
-		switch (*ptr) {
-		case '\t':
-		case ' ':
-			break;
-		case '\n':
-		default:
-			eol = 1;
-			break;
-		case '0':	// no device
-		case '1':	// PCICA
-		case '2':	// PCICC
-		case '3':	// PCIXCC_MCL2
-		case '4':	// PCIXCC_MCL3
-		case '5':	// CEX2C
-		case '6':       // CEX2A
-			j++;
-			break;
-		case 'd':
-		case 'D':
-			disable_card(j);
-			j++;
-			break;
-		case 'e':
-		case 'E':
-			enable_card(j);
-			j++;
-			break;
-		}
-		if (eol)
-			break;
-		ptr++;
-	}
-
-	kfree(lbuf);
-	return count;
-}
-
-/**
- * Functions that run under a timer, with no process id
- *
- * The task functions:
- *     z90crypt_reader_task
- *	 helper_send_work
- *	 helper_handle_work_element
- *	 helper_receive_rc
- *     z90crypt_config_task
- *     z90crypt_cleanup_task
- *
- * Helper functions:
- *     z90crypt_schedule_reader_timer
- *     z90crypt_schedule_reader_task
- *     z90crypt_schedule_config_task
- *     z90crypt_schedule_cleanup_task
- */
-static inline int
-receive_from_crypto_device(int index, unsigned char *psmid, int *buff_len_p,
-			   unsigned char *buff, unsigned char __user **dest_p_p)
-{
-	int dv, rv;
-	struct device *dev_ptr;
-	struct caller *caller_p;
-	struct ica_rsa_modexpo *icaMsg_p;
-	struct list_head *ptr, *tptr;
-
-	memcpy(psmid, NULL_psmid, sizeof(NULL_psmid));
-
-	if (z90crypt.terminating)
-		return REC_FATAL_ERROR;
-
-	caller_p = 0;
-	dev_ptr = z90crypt.device_p[index];
-	rv = 0;
-	do {
-		if (!dev_ptr || dev_ptr->disabled) {
-			rv = REC_NO_WORK; // a disabled device can't return work
-			break;
-		}
-		if (dev_ptr->dev_self_x != index) {
-			PRINTKC("Corrupt dev ptr\n");
-			z90crypt.terminating = 1;
-			rv = REC_FATAL_ERROR;
-			break;
-		}
-		if (!dev_ptr->dev_resp_l || !dev_ptr->dev_resp_p) {
-			dv = DEV_REC_EXCEPTION;
-			PRINTK("dev_resp_l = %d, dev_resp_p = %p\n",
-			       dev_ptr->dev_resp_l, dev_ptr->dev_resp_p);
-		} else {
-			PDEBUG("Dequeue called for device %d\n", index);
-			dv = receive_from_AP(index, z90crypt.cdx,
-					     dev_ptr->dev_resp_l,
-					     dev_ptr->dev_resp_p, psmid);
-		}
-		switch (dv) {
-		case DEV_REC_EXCEPTION:
-			rv = REC_FATAL_ERROR;
-			z90crypt.terminating = 1;
-			PRINTKC("Exception in receive from device %d\n",
-				index);
-			break;
-		case DEV_ONLINE:
-			rv = 0;
-			break;
-		case DEV_EMPTY:
-			rv = REC_EMPTY;
-			break;
-		case DEV_NO_WORK:
-			rv = REC_NO_WORK;
-			break;
-		case DEV_BAD_MESSAGE:
-		case DEV_GONE:
-		case REC_HARDWAR_ERR:
-		default:
-			rv = REC_NO_RESPONSE;
-			break;
-		}
-		if (rv)
-			break;
-		if (dev_ptr->dev_caller_count <= 0) {
-			rv = REC_USER_GONE;
-			break;
-	        }
-
-		list_for_each_safe(ptr, tptr, &dev_ptr->dev_caller_list) {
-			caller_p = list_entry(ptr, struct caller, caller_liste);
-			if (!memcmp(caller_p->caller_id, psmid,
-				    sizeof(caller_p->caller_id))) {
-				if (!list_empty(&caller_p->caller_liste)) {
-					list_del_init(ptr);
-					dev_ptr->dev_caller_count--;
-					break;
-				}
-			}
-			caller_p = 0;
-		}
-		if (!caller_p) {
-			PRINTKW("Unable to locate PSMID %02X%02X%02X%02X%02X"
-				"%02X%02X%02X in device list\n",
-				psmid[0], psmid[1], psmid[2], psmid[3],
-				psmid[4], psmid[5], psmid[6], psmid[7]);
-			rv = REC_USER_GONE;
-			break;
-		}
-
-		PDEBUG("caller_p after successful receive: %p\n", caller_p);
-		rv = convert_response(dev_ptr->dev_resp_p,
-				      caller_p->caller_buf_p, buff_len_p, buff);
-		switch (rv) {
-		case REC_USE_PCICA:
-			break;
-		case REC_OPERAND_INV:
-		case REC_OPERAND_SIZE:
-		case REC_EVEN_MOD:
-		case REC_INVALID_PAD:
-			PDEBUG("device %d: 'user error' %d\n", index, rv);
-			break;
-		case WRONG_DEVICE_TYPE:
-		case REC_HARDWAR_ERR:
-		case REC_BAD_MESSAGE:
-			PRINTKW("device %d: hardware error %d\n", index, rv);
-			rv = REC_NO_RESPONSE;
-			break;
-		default:
-			PDEBUG("device %d: rv = %d\n", index, rv);
-			break;
-		}
-	} while (0);
-
-	switch (rv) {
-	case 0:
-		PDEBUG("Successful receive from device %d\n", index);
-		icaMsg_p = (struct ica_rsa_modexpo *)caller_p->caller_buf_p;
-		*dest_p_p = icaMsg_p->outputdata;
-		if (*buff_len_p == 0)
-			PRINTK("Zero *buff_len_p\n");
-		break;
-	case REC_NO_RESPONSE:
-		PRINTKW("Removing device %d from availability\n", index);
-		remove_device(dev_ptr);
-		break;
-	}
-
-	if (caller_p)
-		unbuild_caller(dev_ptr, caller_p);
-
-	return rv;
-}
-
-static inline void
-helper_send_work(int index)
-{
-	struct work_element *rq_p;
-	int rv;
-
-	if (list_empty(&request_list))
-		return;
-	requestq_count--;
-	rq_p = list_entry(request_list.next, struct work_element, liste);
-	list_del_init(&rq_p->liste);
-	rq_p->audit[1] |= FP_REMREQUEST;
-	if (rq_p->devtype == SHRT2DEVPTR(index)->dev_type) {
-		rq_p->devindex = SHRT2LONG(index);
-		rv = send_to_crypto_device(rq_p);
-		if (rv == 0) {
-			rq_p->requestsent = jiffies;
-			rq_p->audit[0] |= FP_SENT;
-			list_add_tail(&rq_p->liste, &pending_list);
-			++pendingq_count;
-			rq_p->audit[0] |= FP_PENDING;
-		} else {
-			switch (rv) {
-			case REC_OPERAND_INV:
-			case REC_OPERAND_SIZE:
-			case REC_EVEN_MOD:
-			case REC_INVALID_PAD:
-				rq_p->retcode = -EINVAL;
-				break;
-			case SEN_NOT_AVAIL:
-			case SEN_RETRY:
-			case REC_NO_RESPONSE:
-			default:
-				if (z90crypt.mask.st_count > 1)
-					rq_p->retcode =
-						-ERESTARTSYS;
-				else
-					rq_p->retcode = -ENODEV;
-				break;
-			}
-			rq_p->status[0] |= STAT_FAILED;
-			rq_p->audit[1] |= FP_AWAKENING;
-			atomic_set(&rq_p->alarmrung, 1);
-			wake_up(&rq_p->waitq);
-		}
-	} else {
-		if (z90crypt.mask.st_count > 1)
-			rq_p->retcode = -ERESTARTSYS;
-		else
-			rq_p->retcode = -ENODEV;
-		rq_p->status[0] |= STAT_FAILED;
-		rq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&rq_p->alarmrung, 1);
-		wake_up(&rq_p->waitq);
-	}
-}
-
-static inline void
-helper_handle_work_element(int index, unsigned char psmid[8], int rc,
-			   int buff_len, unsigned char *buff,
-			   unsigned char __user *resp_addr)
-{
-	struct work_element *pq_p;
-	struct list_head *lptr, *tptr;
-
-	pq_p = 0;
-	list_for_each_safe(lptr, tptr, &pending_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		if (!memcmp(pq_p->caller_id, psmid, sizeof(pq_p->caller_id))) {
-			list_del_init(lptr);
-			pendingq_count--;
-			pq_p->audit[1] |= FP_NOTPENDING;
-			break;
-		}
-		pq_p = 0;
-	}
-
-	if (!pq_p) {
-		PRINTK("device %d has work but no caller exists on pending Q\n",
-		       SHRT2LONG(index));
-		return;
-	}
-
-	switch (rc) {
-		case 0:
-			pq_p->resp_buff_size = buff_len;
-			pq_p->audit[1] |= FP_RESPSIZESET;
-			if (buff_len) {
-				pq_p->resp_addr = resp_addr;
-				pq_p->audit[1] |= FP_RESPADDRCOPIED;
-				memcpy(pq_p->resp_buff, buff, buff_len);
-				pq_p->audit[1] |= FP_RESPBUFFCOPIED;
-			}
-			break;
-		case REC_OPERAND_INV:
-		case REC_OPERAND_SIZE:
-		case REC_EVEN_MOD:
-		case REC_INVALID_PAD:
-			PDEBUG("-EINVAL after application error %d\n", rc);
-			pq_p->retcode = -EINVAL;
-			pq_p->status[0] |= STAT_FAILED;
-			break;
-		case REC_USE_PCICA:
-			pq_p->retcode = -ERESTARTSYS;
-			pq_p->status[0] |= STAT_FAILED;
-			break;
-		case REC_NO_RESPONSE:
-		default:
-			if (z90crypt.mask.st_count > 1)
-				pq_p->retcode = -ERESTARTSYS;
-			else
-				pq_p->retcode = -ENODEV;
-			pq_p->status[0] |= STAT_FAILED;
-			break;
-	}
-	if ((pq_p->status[0] != STAT_FAILED) || (pq_p->retcode != -ERELEASED)) {
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-}
-
-/**
- * return TRUE if the work element should be removed from the queue
- */
-static inline int
-helper_receive_rc(int index, int *rc_p)
-{
-	switch (*rc_p) {
-	case 0:
-	case REC_OPERAND_INV:
-	case REC_OPERAND_SIZE:
-	case REC_EVEN_MOD:
-	case REC_INVALID_PAD:
-	case REC_USE_PCICA:
-		break;
-
-	case REC_BUSY:
-	case REC_NO_WORK:
-	case REC_EMPTY:
-	case REC_RETRY_DEV:
-	case REC_FATAL_ERROR:
-		return 0;
-
-	case REC_NO_RESPONSE:
-		break;
-
-	default:
-		PRINTK("rc %d, device %d converted to REC_NO_RESPONSE\n",
-		       *rc_p, SHRT2LONG(index));
-		*rc_p = REC_NO_RESPONSE;
-		break;
-	}
-	return 1;
-}
-
-static inline void
-z90crypt_schedule_reader_timer(void)
-{
-	if (timer_pending(&reader_timer))
-		return;
-	if (mod_timer(&reader_timer, jiffies+(READERTIME*HZ/1000)) != 0)
-		PRINTK("Timer pending while modifying reader timer\n");
-}
-
-static void
-z90crypt_reader_task(unsigned long ptr)
-{
-	int workavail, index, rc, buff_len;
-	unsigned char	psmid[8];
-	unsigned char __user *resp_addr;
-	static unsigned char buff[1024];
-
-	/**
-	 * we use workavail = 2 to ensure 2 passes with nothing dequeued before
-	 * exiting the loop. If (pendingq_count+requestq_count) == 0 after the
-	 * loop, there is no work remaining on the queues.
-	 */
-	resp_addr = 0;
-	workavail = 2;
-	buff_len = 0;
-	while (workavail) {
-		workavail--;
-		rc = 0;
-		spin_lock_irq(&queuespinlock);
-		memset(buff, 0x00, sizeof(buff));
-
-		/* Dequeue once from each device in round robin. */
-		for (index = 0; index < z90crypt.mask.st_count; index++) {
-			PDEBUG("About to receive.\n");
-			rc = receive_from_crypto_device(SHRT2LONG(index),
-							psmid,
-							&buff_len,
-							buff,
-							&resp_addr);
-			PDEBUG("Dequeued: rc = %d.\n", rc);
-
-			if (helper_receive_rc(index, &rc)) {
-				if (rc != REC_NO_RESPONSE) {
-					helper_send_work(index);
-					workavail = 2;
-				}
-
-				helper_handle_work_element(index, psmid, rc,
-							   buff_len, buff,
-							   resp_addr);
-			}
-
-			if (rc == REC_FATAL_ERROR)
-				PRINTKW("REC_FATAL_ERROR from device %d!\n",
-					SHRT2LONG(index));
-		}
-		spin_unlock_irq(&queuespinlock);
-	}
-
-	if (pendingq_count + requestq_count)
-		z90crypt_schedule_reader_timer();
-}
-
-static inline void
-z90crypt_schedule_config_task(unsigned int expiration)
-{
-	if (timer_pending(&config_timer))
-		return;
-	if (mod_timer(&config_timer, jiffies+(expiration*HZ)) != 0)
-		PRINTK("Timer pending while modifying config timer\n");
-}
-
-static void
-z90crypt_config_task(unsigned long ptr)
-{
-	int rc;
-
-	PDEBUG("jiffies %ld\n", jiffies);
-
-	if ((rc = refresh_z90crypt(&z90crypt.cdx)))
-		PRINTK("Error %d detected in refresh_z90crypt.\n", rc);
-	/* If return was fatal, don't bother reconfiguring */
-	if ((rc != TSQ_FATAL_ERROR) && (rc != RSQ_FATAL_ERROR))
-		z90crypt_schedule_config_task(CONFIGTIME);
-}
-
-static inline void
-z90crypt_schedule_cleanup_task(void)
-{
-	if (timer_pending(&cleanup_timer))
-		return;
-	if (mod_timer(&cleanup_timer, jiffies+(CLEANUPTIME*HZ)) != 0)
-		PRINTK("Timer pending while modifying cleanup timer\n");
-}
-
-static inline void
-helper_drain_queues(void)
-{
-	struct work_element *pq_p;
-	struct list_head *lptr, *tptr;
-
-	list_for_each_safe(lptr, tptr, &pending_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		pq_p->retcode = -ENODEV;
-		pq_p->status[0] |= STAT_FAILED;
-		unbuild_caller(LONG2DEVPTR(pq_p->devindex),
-			       (struct caller *)pq_p->requestptr);
-		list_del_init(lptr);
-		pendingq_count--;
-		pq_p->audit[1] |= FP_NOTPENDING;
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-
-	list_for_each_safe(lptr, tptr, &request_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		pq_p->retcode = -ENODEV;
-		pq_p->status[0] |= STAT_FAILED;
-		list_del_init(lptr);
-		requestq_count--;
-		pq_p->audit[1] |= FP_REMREQUEST;
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-}
-
-static inline void
-helper_timeout_requests(void)
-{
-	struct work_element *pq_p;
-	struct list_head *lptr, *tptr;
-	long timelimit;
-
-	timelimit = jiffies - (CLEANUPTIME * HZ);
-	/* The list is in strict chronological order */
-	list_for_each_safe(lptr, tptr, &pending_list) {
-		pq_p = list_entry(lptr, struct work_element, liste);
-		if (pq_p->requestsent >= timelimit)
-			break;
-		PRINTKW("Purging(PQ) PSMID %02X%02X%02X%02X%02X%02X%02X%02X\n",
-		       ((struct caller *)pq_p->requestptr)->caller_id[0],
-		       ((struct caller *)pq_p->requestptr)->caller_id[1],
-		       ((struct caller *)pq_p->requestptr)->caller_id[2],
-		       ((struct caller *)pq_p->requestptr)->caller_id[3],
-		       ((struct caller *)pq_p->requestptr)->caller_id[4],
-		       ((struct caller *)pq_p->requestptr)->caller_id[5],
-		       ((struct caller *)pq_p->requestptr)->caller_id[6],
-		       ((struct caller *)pq_p->requestptr)->caller_id[7]);
-		pq_p->retcode = -ETIMEOUT;
-		pq_p->status[0] |= STAT_FAILED;
-		/* get this off any caller queue it may be on */
-		unbuild_caller(LONG2DEVPTR(pq_p->devindex),
-			       (struct caller *) pq_p->requestptr);
-		list_del_init(lptr);
-		pendingq_count--;
-		pq_p->audit[1] |= FP_TIMEDOUT;
-		pq_p->audit[1] |= FP_NOTPENDING;
-		pq_p->audit[1] |= FP_AWAKENING;
-		atomic_set(&pq_p->alarmrung, 1);
-		wake_up(&pq_p->waitq);
-	}
-
-	/**
-	 * If pending count is zero, items left on the request queue may
-	 * never be processed.
-	 */
-	if (pendingq_count <= 0) {
-		list_for_each_safe(lptr, tptr, &request_list) {
-			pq_p = list_entry(lptr, struct work_element, liste);
-			if (pq_p->requestsent >= timelimit)
-				break;
-		PRINTKW("Purging(RQ) PSMID %02X%02X%02X%02X%02X%02X%02X%02X\n",
-		       ((struct caller *)pq_p->requestptr)->caller_id[0],
-		       ((struct caller *)pq_p->requestptr)->caller_id[1],
-		       ((struct caller *)pq_p->requestptr)->caller_id[2],
-		       ((struct caller *)pq_p->requestptr)->caller_id[3],
-		       ((struct caller *)pq_p->requestptr)->caller_id[4],
-		       ((struct caller *)pq_p->requestptr)->caller_id[5],
-		       ((struct caller *)pq_p->requestptr)->caller_id[6],
-		       ((struct caller *)pq_p->requestptr)->caller_id[7]);
-			pq_p->retcode = -ETIMEOUT;
-			pq_p->status[0] |= STAT_FAILED;
-			list_del_init(lptr);
-			requestq_count--;
-			pq_p->audit[1] |= FP_TIMEDOUT;
-			pq_p->audit[1] |= FP_REMREQUEST;
-			pq_p->audit[1] |= FP_AWAKENING;
-			atomic_set(&pq_p->alarmrung, 1);
-			wake_up(&pq_p->waitq);
-		}
-	}
-}
-
-static void
-z90crypt_cleanup_task(unsigned long ptr)
-{
-	PDEBUG("jiffies %ld\n", jiffies);
-	spin_lock_irq(&queuespinlock);
-	if (z90crypt.mask.st_count <= 0) // no devices!
-		helper_drain_queues();
-	else
-		helper_timeout_requests();
-	spin_unlock_irq(&queuespinlock);
-	z90crypt_schedule_cleanup_task();
-}
-
-static void
-z90crypt_schedule_reader_task(unsigned long ptr)
-{
-	tasklet_schedule(&reader_tasklet);
-}
-
-/**
- * Lowlevel Functions:
- *
- *   create_z90crypt:  creates and initializes basic data structures
- *   refresh_z90crypt:	re-initializes basic data structures
- *   find_crypto_devices: returns a count and mask of hardware status
- *   create_crypto_device:  builds the descriptor for a device
- *   destroy_crypto_device:  unallocates the descriptor for a device
- *   destroy_z90crypt:	drains all work, unallocates structs
- */
-
-/**
- * build the z90crypt root structure using the given domain index
- */
-static int
-create_z90crypt(int *cdx_p)
-{
-	struct hdware_block *hdware_blk_p;
-
-	memset(&z90crypt, 0x00, sizeof(struct z90crypt));
-	z90crypt.domain_established = 0;
-	z90crypt.len = sizeof(struct z90crypt);
-	z90crypt.max_count = Z90CRYPT_NUM_DEVS;
-	z90crypt.cdx = *cdx_p;
-
-	hdware_blk_p = kzalloc(sizeof(struct hdware_block), GFP_ATOMIC);
-	if (!hdware_blk_p) {
-		PDEBUG("kmalloc for hardware block failed\n");
-		return ENOMEM;
-	}
-	z90crypt.hdware_info = hdware_blk_p;
-
-	return 0;
-}
-
-static inline int
-helper_scan_devices(int cdx_array[16], int *cdx_p, int *correct_cdx_found)
-{
-	enum hdstat hd_stat;
-	int q_depth, dev_type;
-	int indx, chkdom, numdomains;
-
-	q_depth = dev_type = numdomains = 0;
-	for (chkdom = 0; chkdom <= 15; cdx_array[chkdom++] = -1);
-	for (indx = 0; indx < z90crypt.max_count; indx++) {
-		hd_stat = HD_NOT_THERE;
-		numdomains = 0;
-		for (chkdom = 0; chkdom <= 15; chkdom++) {
-			hd_stat = query_online(indx, chkdom, MAX_RESET,
-					       &q_depth, &dev_type);
-			if (hd_stat == HD_TSQ_EXCEPTION) {
-				z90crypt.terminating = 1;
-				PRINTKC("exception taken!\n");
-				break;
-			}
-			if (hd_stat == HD_ONLINE) {
-				cdx_array[numdomains++] = chkdom;
-				if (*cdx_p == chkdom) {
-					*correct_cdx_found  = 1;
-					break;
-				}
-			}
-		}
-		if ((*correct_cdx_found == 1) || (numdomains != 0))
-			break;
-		if (z90crypt.terminating)
-			break;
-	}
-	return numdomains;
-}
-
-static inline int
-probe_crypto_domain(int *cdx_p)
-{
-	int cdx_array[16];
-	char cdx_array_text[53], temp[5];
-	int correct_cdx_found, numdomains;
-
-	correct_cdx_found = 0;
-	numdomains = helper_scan_devices(cdx_array, cdx_p, &correct_cdx_found);
-
-	if (z90crypt.terminating)
-		return TSQ_FATAL_ERROR;
-
-	if (correct_cdx_found)
-		return 0;
-
-	if (numdomains == 0) {
-		PRINTKW("Unable to find crypto domain: No devices found\n");
-		return Z90C_NO_DEVICES;
-	}
-
-	if (numdomains == 1) {
-		if (*cdx_p == -1) {
-			*cdx_p = cdx_array[0];
-			return 0;
-		}
-		PRINTKW("incorrect domain: specified = %d, found = %d\n",
-		       *cdx_p, cdx_array[0]);
-		return Z90C_INCORRECT_DOMAIN;
-	}
-
-	numdomains--;
-	sprintf(cdx_array_text, "%d", cdx_array[numdomains]);
-	while (numdomains) {
-		numdomains--;
-		sprintf(temp, ", %d", cdx_array[numdomains]);
-		strcat(cdx_array_text, temp);
-	}
-
-	PRINTKW("ambiguous domain detected: specified = %d, found array = %s\n",
-		*cdx_p, cdx_array_text);
-	return Z90C_AMBIGUOUS_DOMAIN;
-}
-
-static int
-refresh_z90crypt(int *cdx_p)
-{
-	int i, j, indx, rv;
-	static struct status local_mask;
-	struct device *devPtr;
-	unsigned char oldStat, newStat;
-	int return_unchanged;
-
-	if (z90crypt.len != sizeof(z90crypt))
-		return ENOTINIT;
-	if (z90crypt.terminating)
-		return TSQ_FATAL_ERROR;
-	rv = 0;
-	if (!z90crypt.hdware_info->hdware_mask.st_count &&
-	    !z90crypt.domain_established) {
-		rv = probe_crypto_domain(cdx_p);
-		if (z90crypt.terminating)
-			return TSQ_FATAL_ERROR;
-		if (rv == Z90C_NO_DEVICES)
-			return 0; // try later
-		if (rv)
-			return rv;
-		z90crypt.cdx = *cdx_p;
-		z90crypt.domain_established = 1;
-	}
-	rv = find_crypto_devices(&local_mask);
-	if (rv) {
-		PRINTK("find crypto devices returned %d\n", rv);
-		return rv;
-	}
-	if (!memcmp(&local_mask, &z90crypt.hdware_info->hdware_mask,
-		    sizeof(struct status))) {
-		return_unchanged = 1;
-		for (i = 0; i < Z90CRYPT_NUM_TYPES; i++) {
-			/**
-			 * Check for disabled cards.  If any device is marked
-			 * disabled, destroy it.
-			 */
-			for (j = 0;
-			     j < z90crypt.hdware_info->type_mask[i].st_count;
-			     j++) {
-				indx = z90crypt.hdware_info->type_x_addr[i].
-								device_index[j];
-				devPtr = z90crypt.device_p[indx];
-				if (devPtr && devPtr->disabled) {
-					local_mask.st_mask[indx] = HD_NOT_THERE;
-					return_unchanged = 0;
-				}
-			}
-		}
-		if (return_unchanged == 1)
-			return 0;
-	}
-
-	spin_lock_irq(&queuespinlock);
-	for (i = 0; i < z90crypt.max_count; i++) {
-		oldStat = z90crypt.hdware_info->hdware_mask.st_mask[i];
-		newStat = local_mask.st_mask[i];
-		if ((oldStat == HD_ONLINE) && (newStat != HD_ONLINE))
-			destroy_crypto_device(i);
-		else if ((oldStat != HD_ONLINE) && (newStat == HD_ONLINE)) {
-			rv = create_crypto_device(i);
-			if (rv >= REC_FATAL_ERROR)
-				return rv;
-			if (rv != 0) {
-				local_mask.st_mask[i] = HD_NOT_THERE;
-				local_mask.st_count--;
-			}
-		}
-	}
-	memcpy(z90crypt.hdware_info->hdware_mask.st_mask, local_mask.st_mask,
-	       sizeof(local_mask.st_mask));
-	z90crypt.hdware_info->hdware_mask.st_count = local_mask.st_count;
-	z90crypt.hdware_info->hdware_mask.disabled_count =
-						      local_mask.disabled_count;
-	refresh_index_array(&z90crypt.mask, &z90crypt.overall_device_x);
-	for (i = 0; i < Z90CRYPT_NUM_TYPES; i++)
-		refresh_index_array(&(z90crypt.hdware_info->type_mask[i]),
-				    &(z90crypt.hdware_info->type_x_addr[i]));
-	spin_unlock_irq(&queuespinlock);
-
-	return rv;
-}
-
-static int
-find_crypto_devices(struct status *deviceMask)
-{
-	int i, q_depth, dev_type;
-	enum hdstat hd_stat;
-
-	deviceMask->st_count = 0;
-	deviceMask->disabled_count = 0;
-	deviceMask->user_disabled_count = 0;
-
-	for (i = 0; i < z90crypt.max_count; i++) {
-		hd_stat = query_online(i, z90crypt.cdx, MAX_RESET, &q_depth,
-				       &dev_type);
-		if (hd_stat == HD_TSQ_EXCEPTION) {
-			z90crypt.terminating = 1;
-			PRINTKC("Exception during probe for crypto devices\n");
-			return TSQ_FATAL_ERROR;
-		}
-		deviceMask->st_mask[i] = hd_stat;
-		if (hd_stat == HD_ONLINE) {
-			PDEBUG("Got an online crypto!: %d\n", i);
-			PDEBUG("Got a queue depth of %d\n", q_depth);
-			PDEBUG("Got a device type of %d\n", dev_type);
-			if (q_depth <= 0)
-				return TSQ_FATAL_ERROR;
-			deviceMask->st_count++;
-			z90crypt.q_depth_array[i] = q_depth;
-			z90crypt.dev_type_array[i] = dev_type;
-		}
-	}
-
-	return 0;
-}
-
-static int
-refresh_index_array(struct status *status_str, struct device_x *index_array)
-{
-	int i, count;
-	enum devstat stat;
-
-	i = -1;
-	count = 0;
-	do {
-		stat = status_str->st_mask[++i];
-		if (stat == DEV_ONLINE)
-			index_array->device_index[count++] = i;
-	} while ((i < Z90CRYPT_NUM_DEVS) && (count < status_str->st_count));
-
-	return count;
-}
-
-static int
-create_crypto_device(int index)
-{
-	int rv, devstat, total_size;
-	struct device *dev_ptr;
-	struct status *type_str_p;
-	int deviceType;
-
-	dev_ptr = z90crypt.device_p[index];
-	if (!dev_ptr) {
-		total_size = sizeof(struct device) +
-			     z90crypt.q_depth_array[index] * sizeof(int);
-
-		dev_ptr = kzalloc(total_size, GFP_ATOMIC);
-		if (!dev_ptr) {
-			PRINTK("kmalloc device %d failed\n", index);
-			return ENOMEM;
-		}
-		dev_ptr->dev_resp_p = kmalloc(MAX_RESPONSE_SIZE, GFP_ATOMIC);
-		if (!dev_ptr->dev_resp_p) {
-			kfree(dev_ptr);
-			PRINTK("kmalloc device %d rec buffer failed\n", index);
-			return ENOMEM;
-		}
-		dev_ptr->dev_resp_l = MAX_RESPONSE_SIZE;
-		INIT_LIST_HEAD(&(dev_ptr->dev_caller_list));
-	}
-
-	devstat = reset_device(index, z90crypt.cdx, MAX_RESET);
-	if (devstat == DEV_RSQ_EXCEPTION) {
-		PRINTK("exception during reset device %d\n", index);
-		kfree(dev_ptr->dev_resp_p);
-		kfree(dev_ptr);
-		return RSQ_FATAL_ERROR;
-	}
-	if (devstat == DEV_ONLINE) {
-		dev_ptr->dev_self_x = index;
-		dev_ptr->dev_type = z90crypt.dev_type_array[index];
-		if (dev_ptr->dev_type == NILDEV) {
-			rv = probe_device_type(dev_ptr);
-			if (rv) {
-				PRINTK("rv = %d from probe_device_type %d\n",
-				       rv, index);
-				kfree(dev_ptr->dev_resp_p);
-				kfree(dev_ptr);
-				return rv;
-			}
-		}
-		if (dev_ptr->dev_type == PCIXCC_UNK) {
-			rv = probe_PCIXCC_type(dev_ptr);
-			if (rv) {
-				PRINTK("rv = %d from probe_PCIXCC_type %d\n",
-				       rv, index);
-				kfree(dev_ptr->dev_resp_p);
-				kfree(dev_ptr);
-				return rv;
-			}
-		}
-		deviceType = dev_ptr->dev_type;
-		z90crypt.dev_type_array[index] = deviceType;
-		if (deviceType == PCICA)
-			z90crypt.hdware_info->device_type_array[index] = 1;
-		else if (deviceType == PCICC)
-			z90crypt.hdware_info->device_type_array[index] = 2;
-		else if (deviceType == PCIXCC_MCL2)
-			z90crypt.hdware_info->device_type_array[index] = 3;
-		else if (deviceType == PCIXCC_MCL3)
-			z90crypt.hdware_info->device_type_array[index] = 4;
-		else if (deviceType == CEX2C)
-			z90crypt.hdware_info->device_type_array[index] = 5;
-		else if (deviceType == CEX2A)
-			z90crypt.hdware_info->device_type_array[index] = 6;
-		else // No idea how this would happen.
-			z90crypt.hdware_info->device_type_array[index] = -1;
-	}
-
-	/**
-	 * 'q_depth' returned by the hardware is one less than
-	 * the actual depth
-	 */
-	dev_ptr->dev_q_depth = z90crypt.q_depth_array[index];
-	dev_ptr->dev_type = z90crypt.dev_type_array[index];
-	dev_ptr->dev_stat = devstat;
-	dev_ptr->disabled = 0;
-	z90crypt.device_p[index] = dev_ptr;
-
-	if (devstat == DEV_ONLINE) {
-		if (z90crypt.mask.st_mask[index] != DEV_ONLINE) {
-			z90crypt.mask.st_mask[index] = DEV_ONLINE;
-			z90crypt.mask.st_count++;
-		}
-		deviceType = dev_ptr->dev_type;
-		type_str_p = &z90crypt.hdware_info->type_mask[deviceType];
-		if (type_str_p->st_mask[index] != DEV_ONLINE) {
-			type_str_p->st_mask[index] = DEV_ONLINE;
-			type_str_p->st_count++;
-		}
-	}
-
-	return 0;
-}
-
-static int
-destroy_crypto_device(int index)
-{
-	struct device *dev_ptr;
-	int t, disabledFlag;
-
-	dev_ptr = z90crypt.device_p[index];
-
-	/* remember device type; get rid of device struct */
-	if (dev_ptr) {
-		disabledFlag = dev_ptr->disabled;
-		t = dev_ptr->dev_type;
-		kfree(dev_ptr->dev_resp_p);
-		kfree(dev_ptr);
-	} else {
-		disabledFlag = 0;
-		t = -1;
-	}
-	z90crypt.device_p[index] = 0;
-
-	/* if the type is valid, remove the device from the type_mask */
-	if ((t != -1) && z90crypt.hdware_info->type_mask[t].st_mask[index]) {
-		  z90crypt.hdware_info->type_mask[t].st_mask[index] = 0x00;
-		  z90crypt.hdware_info->type_mask[t].st_count--;
-		  if (disabledFlag == 1)
-			z90crypt.hdware_info->type_mask[t].disabled_count--;
-	}
-	if (z90crypt.mask.st_mask[index] != DEV_GONE) {
-		z90crypt.mask.st_mask[index] = DEV_GONE;
-		z90crypt.mask.st_count--;
-	}
-	z90crypt.hdware_info->device_type_array[index] = 0;
-
-	return 0;
-}
-
-static void
-destroy_z90crypt(void)
-{
-	int i;
-
-	for (i = 0; i < z90crypt.max_count; i++)
-		if (z90crypt.device_p[i])
-			destroy_crypto_device(i);
-	kfree(z90crypt.hdware_info);
-	memset((void *)&z90crypt, 0, sizeof(z90crypt));
-}
-
-static unsigned char static_testmsg[384] = {
-0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x00,0x06,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x43,0x43,
-0x41,0x2d,0x41,0x50,0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01,0x00,0x00,0x00,0x00,
-0x50,0x4b,0x00,0x00,0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x54,0x32,
-0x01,0x00,0xa0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0xb8,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x49,0x43,0x53,0x46,
-0x20,0x20,0x20,0x20,0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53,0x2d,0x31,0x2e,0x32,
-0x37,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,
-0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
-0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,
-0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77,0x88,0x1e,0x00,0x00,
-0x57,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00,0x03,0x02,0x00,0x00,
-0x40,0x01,0x00,0x01,0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c,0xf6,0xd2,0x7b,0x58,
-0x4b,0xf9,0x28,0x68,0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66,0x63,0x42,0xef,0xf8,
-0xfd,0xa4,0xf8,0xb0,0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8,0x53,0x8c,0x6f,0x4e,
-0x72,0x8f,0x6c,0x04,0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57,0xf7,0xdd,0xfd,0x4f,
-0x11,0x36,0x95,0x5d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
-};
-
-static int
-probe_device_type(struct device *devPtr)
-{
-	int rv, dv, i, index, length;
-	unsigned char psmid[8];
-	static unsigned char loc_testmsg[sizeof(static_testmsg)];
-
-	index = devPtr->dev_self_x;
-	rv = 0;
-	do {
-		memcpy(loc_testmsg, static_testmsg, sizeof(static_testmsg));
-		length = sizeof(static_testmsg) - 24;
-		/* the -24 allows for the header */
-		dv = send_to_AP(index, z90crypt.cdx, length, loc_testmsg);
-		if (dv) {
-			PDEBUG("dv returned by send during probe: %d\n", dv);
-			if (dv == DEV_SEN_EXCEPTION) {
-				rv = SEN_FATAL_ERROR;
-				PRINTKC("exception in send to AP %d\n", index);
-				break;
-			}
-			PDEBUG("return value from send_to_AP: %d\n", rv);
-			switch (dv) {
-			case DEV_GONE:
-				PDEBUG("dev %d not available\n", index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_NO_WORK:
-				rv = SEN_FATAL_ERROR;
-				break;
-			case DEV_BAD_MESSAGE:
-				rv = SEN_USER_ERROR;
-				break;
-			case DEV_QUEUE_FULL:
-				rv = SEN_QUEUE_FULL;
-				break;
-			default:
-				PRINTK("unknown dv=%d for dev %d\n", dv, index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			}
-		}
-
-		if (rv)
-			break;
-
-		for (i = 0; i < 6; i++) {
-			mdelay(300);
-			dv = receive_from_AP(index, z90crypt.cdx,
-					     devPtr->dev_resp_l,
-					     devPtr->dev_resp_p, psmid);
-			PDEBUG("dv returned by DQ = %d\n", dv);
-			if (dv == DEV_REC_EXCEPTION) {
-				rv = REC_FATAL_ERROR;
-				PRINTKC("exception in dequeue %d\n",
-					index);
-				break;
-			}
-			switch (dv) {
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = REC_EMPTY;
-				break;
-			case DEV_NO_WORK:
-				rv = REC_NO_WORK;
-				break;
-			case DEV_BAD_MESSAGE:
-			case DEV_GONE:
-			default:
-				rv = REC_NO_RESPONSE;
-				break;
-			}
-			if ((rv != 0) && (rv != REC_NO_WORK))
-				break;
-			if (rv == 0)
-				break;
-		}
-		if (rv)
-			break;
-		rv = (devPtr->dev_resp_p[0] == 0x00) &&
-		     (devPtr->dev_resp_p[1] == 0x86);
-		if (rv)
-			devPtr->dev_type = PCICC;
-		else
-			devPtr->dev_type = PCICA;
-		rv = 0;
-	} while (0);
-	/* In a general error case, the card is not marked online */
-	return rv;
-}
-
-static unsigned char MCL3_testmsg[] = {
-0x00,0x00,0x00,0x00,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,0xEE,
-0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00,0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A,0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20,
-0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,
-0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD,
-0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA,0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22,
-0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54,
-0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00,0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00,
-0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40,0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C,
-0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF,0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9,
-0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63,0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5,
-0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A,0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01,
-0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28,0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91,
-0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5,0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C,
-0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98,0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96,
-0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19,0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47,
-0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36,0xF1,0x3D,0x93,0x53
-};
-
-static int
-probe_PCIXCC_type(struct device *devPtr)
-{
-	int rv, dv, i, index, length;
-	unsigned char psmid[8];
-	static unsigned char loc_testmsg[548];
-	struct CPRBX *cprbx_p;
-
-	index = devPtr->dev_self_x;
-	rv = 0;
-	do {
-		memcpy(loc_testmsg, MCL3_testmsg, sizeof(MCL3_testmsg));
-		length = sizeof(MCL3_testmsg) - 0x0C;
-		dv = send_to_AP(index, z90crypt.cdx, length, loc_testmsg);
-		if (dv) {
-			PDEBUG("dv returned = %d\n", dv);
-			if (dv == DEV_SEN_EXCEPTION) {
-				rv = SEN_FATAL_ERROR;
-				PRINTKC("exception in send to AP %d\n", index);
-				break;
-			}
-			PDEBUG("return value from send_to_AP: %d\n", rv);
-			switch (dv) {
-			case DEV_GONE:
-				PDEBUG("dev %d not available\n", index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = SEN_NOT_AVAIL;
-				break;
-			case DEV_NO_WORK:
-				rv = SEN_FATAL_ERROR;
-				break;
-			case DEV_BAD_MESSAGE:
-				rv = SEN_USER_ERROR;
-				break;
-			case DEV_QUEUE_FULL:
-				rv = SEN_QUEUE_FULL;
-				break;
-			default:
-				PRINTK("unknown dv=%d for dev %d\n", dv, index);
-				rv = SEN_NOT_AVAIL;
-				break;
-			}
-		}
-
-		if (rv)
-			break;
-
-		for (i = 0; i < 6; i++) {
-			mdelay(300);
-			dv = receive_from_AP(index, z90crypt.cdx,
-					     devPtr->dev_resp_l,
-					     devPtr->dev_resp_p, psmid);
-			PDEBUG("dv returned by DQ = %d\n", dv);
-			if (dv == DEV_REC_EXCEPTION) {
-				rv = REC_FATAL_ERROR;
-				PRINTKC("exception in dequeue %d\n",
-					index);
-				break;
-			}
-			switch (dv) {
-			case DEV_ONLINE:
-				rv = 0;
-				break;
-			case DEV_EMPTY:
-				rv = REC_EMPTY;
-				break;
-			case DEV_NO_WORK:
-				rv = REC_NO_WORK;
-				break;
-			case DEV_BAD_MESSAGE:
-			case DEV_GONE:
-			default:
-				rv = REC_NO_RESPONSE;
-				break;
-			}
-			if ((rv != 0) && (rv != REC_NO_WORK))
-				break;
-			if (rv == 0)
-				break;
-		}
-		if (rv)
-			break;
-		cprbx_p = (struct CPRBX *) (devPtr->dev_resp_p + 48);
-		if ((cprbx_p->ccp_rtcode == 8) && (cprbx_p->ccp_rscode == 33)) {
-			devPtr->dev_type = PCIXCC_MCL2;
-			PDEBUG("device %d is MCL2\n", index);
-		} else {
-			devPtr->dev_type = PCIXCC_MCL3;
-			PDEBUG("device %d is MCL3\n", index);
-		}
-	} while (0);
-	/* In a general error case, the card is not marked online */
-	return rv;
-}
-
-module_init(z90crypt_init_module);
-module_exit(z90crypt_cleanup_module);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
new file mode 100644
index 0000000..1edc10a
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -0,0 +1,1091 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_api.c
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *	       Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/compat.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "zcrypt_api.h"
+
+/**
+ * Module description.
+ */
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("Cryptographic Coprocessor interface, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+
+static DEFINE_SPINLOCK(zcrypt_device_lock);
+static LIST_HEAD(zcrypt_device_list);
+static int zcrypt_device_count = 0;
+static atomic_t zcrypt_open_count = ATOMIC_INIT(0);
+
+/**
+ * Device attributes common for all crypto devices.
+ */
+static ssize_t zcrypt_type_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
+	return snprintf(buf, PAGE_SIZE, "%s\n", zdev->type_string);
+}
+
+static DEVICE_ATTR(type, 0444, zcrypt_type_show, NULL);
+
+static ssize_t zcrypt_online_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
+	return snprintf(buf, PAGE_SIZE, "%d\n", zdev->online);
+}
+
+static ssize_t zcrypt_online_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct zcrypt_device *zdev = to_ap_dev(dev)->private;
+	int online;
+
+	if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
+		return -EINVAL;
+	zdev->online = online;
+	if (!online)
+		ap_flush_queue(zdev->ap_dev);
+	return count;
+}
+
+static DEVICE_ATTR(online, 0644, zcrypt_online_show, zcrypt_online_store);
+
+static struct attribute * zcrypt_device_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_online.attr,
+	NULL,
+};
+
+static struct attribute_group zcrypt_device_attr_group = {
+	.attrs = zcrypt_device_attrs,
+};
+
+/**
+ * Move the device towards the head of the device list.
+ * Need to be called while holding the zcrypt device list lock.
+ * Note: cards with speed_rating of 0 are kept at the end of the list.
+ */
+static void __zcrypt_increase_preference(struct zcrypt_device *zdev)
+{
+	struct zcrypt_device *tmp;
+	struct list_head *l;
+
+	if (zdev->speed_rating == 0)
+		return;
+	for (l = zdev->list.prev; l != &zcrypt_device_list; l = l->prev) {
+		tmp = list_entry(l, struct zcrypt_device, list);
+		if ((tmp->request_count + 1) * tmp->speed_rating <=
+		    (zdev->request_count + 1) * zdev->speed_rating &&
+		    tmp->speed_rating != 0)
+			break;
+	}
+	if (l == zdev->list.prev)
+		return;
+	/* Move zdev behind l */
+	list_del(&zdev->list);
+	list_add(&zdev->list, l);
+}
+
+/**
+ * Move the device towards the tail of the device list.
+ * Need to be called while holding the zcrypt device list lock.
+ * Note: cards with speed_rating of 0 are kept at the end of the list.
+ */
+static void __zcrypt_decrease_preference(struct zcrypt_device *zdev)
+{
+	struct zcrypt_device *tmp;
+	struct list_head *l;
+
+	if (zdev->speed_rating == 0)
+		return;
+	for (l = zdev->list.next; l != &zcrypt_device_list; l = l->next) {
+		tmp = list_entry(l, struct zcrypt_device, list);
+		if ((tmp->request_count + 1) * tmp->speed_rating >
+		    (zdev->request_count + 1) * zdev->speed_rating ||
+		    tmp->speed_rating == 0)
+			break;
+	}
+	if (l == zdev->list.next)
+		return;
+	/* Move zdev before l */
+	list_del(&zdev->list);
+	list_add_tail(&zdev->list, l);
+}
+
+static void zcrypt_device_release(struct kref *kref)
+{
+	struct zcrypt_device *zdev =
+		container_of(kref, struct zcrypt_device, refcount);
+	zcrypt_device_free(zdev);
+}
+
+void zcrypt_device_get(struct zcrypt_device *zdev)
+{
+	kref_get(&zdev->refcount);
+}
+EXPORT_SYMBOL(zcrypt_device_get);
+
+int zcrypt_device_put(struct zcrypt_device *zdev)
+{
+	return kref_put(&zdev->refcount, zcrypt_device_release);
+}
+EXPORT_SYMBOL(zcrypt_device_put);
+
+struct zcrypt_device *zcrypt_device_alloc(size_t max_response_size)
+{
+	struct zcrypt_device *zdev;
+
+	zdev = kzalloc(sizeof(struct zcrypt_device), GFP_KERNEL);
+	if (!zdev)
+		return NULL;
+	zdev->reply.message = kmalloc(max_response_size, GFP_KERNEL);
+	if (!zdev->reply.message)
+		goto out_free;
+	zdev->reply.length = max_response_size;
+	spin_lock_init(&zdev->lock);
+	INIT_LIST_HEAD(&zdev->list);
+	return zdev;
+
+out_free:
+	kfree(zdev);
+	return NULL;
+}
+EXPORT_SYMBOL(zcrypt_device_alloc);
+
+void zcrypt_device_free(struct zcrypt_device *zdev)
+{
+	kfree(zdev->reply.message);
+	kfree(zdev);
+}
+EXPORT_SYMBOL(zcrypt_device_free);
+
+/**
+ * Register a crypto device.
+ */
+int zcrypt_device_register(struct zcrypt_device *zdev)
+{
+	int rc;
+
+	rc = sysfs_create_group(&zdev->ap_dev->device.kobj,
+				&zcrypt_device_attr_group);
+	if (rc)
+		goto out;
+	get_device(&zdev->ap_dev->device);
+	kref_init(&zdev->refcount);
+	spin_lock_bh(&zcrypt_device_lock);
+	zdev->online = 1;	/* New devices are online by default. */
+	list_add_tail(&zdev->list, &zcrypt_device_list);
+	__zcrypt_increase_preference(zdev);
+	zcrypt_device_count++;
+	spin_unlock_bh(&zcrypt_device_lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(zcrypt_device_register);
+
+/**
+ * Unregister a crypto device.
+ */
+void zcrypt_device_unregister(struct zcrypt_device *zdev)
+{
+	spin_lock_bh(&zcrypt_device_lock);
+	zcrypt_device_count--;
+	list_del_init(&zdev->list);
+	spin_unlock_bh(&zcrypt_device_lock);
+	sysfs_remove_group(&zdev->ap_dev->device.kobj,
+			   &zcrypt_device_attr_group);
+	put_device(&zdev->ap_dev->device);
+	zcrypt_device_put(zdev);
+}
+EXPORT_SYMBOL(zcrypt_device_unregister);
+
+/**
+ * zcrypt_read is not be supported beyond zcrypt 1.3.1
+ */
+static ssize_t zcrypt_read(struct file *filp, char __user *buf,
+			   size_t count, loff_t *f_pos)
+{
+	return -EPERM;
+}
+
+/**
+ * Write is is not allowed
+ */
+static ssize_t zcrypt_write(struct file *filp, const char __user *buf,
+			    size_t count, loff_t *f_pos)
+{
+	return -EPERM;
+}
+
+/**
+ * Device open/close functions to count number of users.
+ */
+static int zcrypt_open(struct inode *inode, struct file *filp)
+{
+	atomic_inc(&zcrypt_open_count);
+	return 0;
+}
+
+static int zcrypt_release(struct inode *inode, struct file *filp)
+{
+	atomic_dec(&zcrypt_open_count);
+	return 0;
+}
+
+/**
+ * zcrypt ioctls.
+ */
+static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	if (mex->outputdatalength < mex->inputdatalength)
+		return -EINVAL;
+	/**
+	 * As long as outputdatalength is big enough, we can set the
+	 * outputdatalength equal to the inputdatalength, since that is the
+	 * number of bytes we will copy in any case
+	 */
+	mex->outputdatalength = mex->inputdatalength;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online ||
+		    !zdev->ops->rsa_modexpo ||
+		    zdev->min_mod_size > mex->inputdatalength ||
+		    zdev->max_mod_size < mex->inputdatalength)
+			continue;
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			rc = zdev->ops->rsa_modexpo(zdev, mex);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		}
+		else
+			rc = -EAGAIN;
+		spin_lock_bh(&zcrypt_device_lock);
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
+static long zcrypt_rsa_crt(struct ica_rsa_modexpo_crt *crt)
+{
+	struct zcrypt_device *zdev;
+	unsigned long long z1, z2, z3;
+	int rc, copied;
+
+	if (crt->outputdatalength < crt->inputdatalength ||
+	    (crt->inputdatalength & 1))
+		return -EINVAL;
+	/**
+	 * As long as outputdatalength is big enough, we can set the
+	 * outputdatalength equal to the inputdatalength, since that is the
+	 * number of bytes we will copy in any case
+	 */
+	crt->outputdatalength = crt->inputdatalength;
+
+	copied = 0;
+ restart:
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online ||
+		    !zdev->ops->rsa_modexpo_crt ||
+		    zdev->min_mod_size > crt->inputdatalength ||
+		    zdev->max_mod_size < crt->inputdatalength)
+			continue;
+		if (zdev->short_crt && crt->inputdatalength > 240) {
+			/**
+			 * Check inputdata for leading zeros for cards
+			 * that can't handle np_prime, bp_key, or
+			 * u_mult_inv > 128 bytes.
+			 */
+			if (copied == 0) {
+				int len;
+				spin_unlock_bh(&zcrypt_device_lock);
+				/* len is max 256 / 2 - 120 = 8 */
+				len = crt->inputdatalength / 2 - 120;
+				z1 = z2 = z3 = 0;
+				if (copy_from_user(&z1, crt->np_prime, len) ||
+				    copy_from_user(&z2, crt->bp_key, len) ||
+				    copy_from_user(&z3, crt->u_mult_inv, len))
+					return -EFAULT;
+				copied = 1;
+				/**
+				 * We have to restart device lookup -
+				 * the device list may have changed by now.
+				 */
+				goto restart;
+			}
+			if (z1 != 0ULL || z2 != 0ULL || z3 != 0ULL)
+				/* The device can't handle this request. */
+				continue;
+		}
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			rc = zdev->ops->rsa_modexpo_crt(zdev, crt);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		}
+		else
+			rc = -EAGAIN;
+		spin_lock_bh(&zcrypt_device_lock);
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
+static long zcrypt_send_cprb(struct ica_xcRB *xcRB)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online || !zdev->ops->send_cprb ||
+		    (xcRB->user_defined != AUTOSELECT &&
+			AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined)
+		    )
+			continue;
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			rc = zdev->ops->send_cprb(zdev, xcRB);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		}
+		else
+			rc = -EAGAIN;
+		spin_lock_bh(&zcrypt_device_lock);
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
+static void zcrypt_status_mask(char status[AP_DEVICES])
+{
+	struct zcrypt_device *zdev;
+
+	memset(status, 0, sizeof(char) * AP_DEVICES);
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		status[AP_QID_DEVICE(zdev->ap_dev->qid)] =
+			zdev->online ? zdev->user_space_type : 0x0d;
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static void zcrypt_qdepth_mask(char qdepth[AP_DEVICES])
+{
+	struct zcrypt_device *zdev;
+
+	memset(qdepth, 0, sizeof(char)	* AP_DEVICES);
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		qdepth[AP_QID_DEVICE(zdev->ap_dev->qid)] =
+			zdev->ap_dev->pendingq_count +
+			zdev->ap_dev->requestq_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static void zcrypt_perdev_reqcnt(int reqcnt[AP_DEVICES])
+{
+	struct zcrypt_device *zdev;
+
+	memset(reqcnt, 0, sizeof(int) * AP_DEVICES);
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		reqcnt[AP_QID_DEVICE(zdev->ap_dev->qid)] =
+			zdev->ap_dev->total_request_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static int zcrypt_pendingq_count(void)
+{
+	struct zcrypt_device *zdev;
+	int pendingq_count = 0;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		pendingq_count += zdev->ap_dev->pendingq_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return pendingq_count;
+}
+
+static int zcrypt_requestq_count(void)
+{
+	struct zcrypt_device *zdev;
+	int requestq_count = 0;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		spin_lock(&zdev->ap_dev->lock);
+		requestq_count += zdev->ap_dev->requestq_count;
+		spin_unlock(&zdev->ap_dev->lock);
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return requestq_count;
+}
+
+static int zcrypt_count_type(int type)
+{
+	struct zcrypt_device *zdev;
+	int device_count = 0;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		if (zdev->user_space_type == type)
+			device_count++;
+	spin_unlock_bh(&zcrypt_device_lock);
+	return device_count;
+}
+
+/**
+ * Old, deprecated combi status call.
+ */
+static long zcrypt_ica_status(struct file *filp, unsigned long arg)
+{
+	struct ica_z90_status *pstat;
+	int ret;
+
+	pstat = kzalloc(sizeof(*pstat), GFP_KERNEL);
+	if (!pstat)
+		return -ENOMEM;
+	pstat->totalcount = zcrypt_device_count;
+	pstat->leedslitecount = zcrypt_count_type(ZCRYPT_PCICA);
+	pstat->leeds2count = zcrypt_count_type(ZCRYPT_PCICC);
+	pstat->requestqWaitCount = zcrypt_requestq_count();
+	pstat->pendingqWaitCount = zcrypt_pendingq_count();
+	pstat->totalOpenCount = atomic_read(&zcrypt_open_count);
+	pstat->cryptoDomain = ap_domain_index;
+	zcrypt_status_mask(pstat->status);
+	zcrypt_qdepth_mask(pstat->qdepth);
+	ret = 0;
+	if (copy_to_user((void __user *) arg, pstat, sizeof(*pstat)))
+		ret = -EFAULT;
+	kfree(pstat);
+	return ret;
+}
+
+static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
+				  unsigned long arg)
+{
+	int rc;
+
+	switch (cmd) {
+	case ICARSAMODEXPO: {
+		struct ica_rsa_modexpo __user *umex = (void __user *) arg;
+		struct ica_rsa_modexpo mex;
+		if (copy_from_user(&mex, umex, sizeof(mex)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_rsa_modexpo(&mex);
+		} while (rc == -EAGAIN);
+		if (rc)
+			return rc;
+		return put_user(mex.outputdatalength, &umex->outputdatalength);
+	}
+	case ICARSACRT: {
+		struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg;
+		struct ica_rsa_modexpo_crt crt;
+		if (copy_from_user(&crt, ucrt, sizeof(crt)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_rsa_crt(&crt);
+		} while (rc == -EAGAIN);
+		if (rc)
+			return rc;
+		return put_user(crt.outputdatalength, &ucrt->outputdatalength);
+	}
+	case ZSECSENDCPRB: {
+		struct ica_xcRB __user *uxcRB = (void __user *) arg;
+		struct ica_xcRB xcRB;
+		if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB)))
+			return -EFAULT;
+		do {
+			rc = zcrypt_send_cprb(&xcRB);
+		} while (rc == -EAGAIN);
+		if (copy_to_user(uxcRB, &xcRB, sizeof(xcRB)))
+			return -EFAULT;
+		return rc;
+	}
+	case Z90STAT_STATUS_MASK: {
+		char status[AP_DEVICES];
+		zcrypt_status_mask(status);
+		if (copy_to_user((char __user *) arg, status,
+				 sizeof(char) * AP_DEVICES))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_QDEPTH_MASK: {
+		char qdepth[AP_DEVICES];
+		zcrypt_qdepth_mask(qdepth);
+		if (copy_to_user((char __user *) arg, qdepth,
+				 sizeof(char) * AP_DEVICES))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_PERDEV_REQCNT: {
+		int reqcnt[AP_DEVICES];
+		zcrypt_perdev_reqcnt(reqcnt);
+		if (copy_to_user((int __user *) arg, reqcnt,
+				 sizeof(int) * AP_DEVICES))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_REQUESTQ_COUNT:
+		return put_user(zcrypt_requestq_count(), (int __user *) arg);
+	case Z90STAT_PENDINGQ_COUNT:
+		return put_user(zcrypt_pendingq_count(), (int __user *) arg);
+	case Z90STAT_TOTALOPEN_COUNT:
+		return put_user(atomic_read(&zcrypt_open_count),
+				(int __user *) arg);
+	case Z90STAT_DOMAIN_INDEX:
+		return put_user(ap_domain_index, (int __user *) arg);
+	/**
+	 * Deprecated ioctls. Don't add another device count ioctl,
+	 * you can count them yourself in the user space with the
+	 * output of the Z90STAT_STATUS_MASK ioctl.
+	 */
+	case ICAZ90STATUS:
+		return zcrypt_ica_status(filp, arg);
+	case Z90STAT_TOTALCOUNT:
+		return put_user(zcrypt_device_count, (int __user *) arg);
+	case Z90STAT_PCICACOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCICA),
+				(int __user *) arg);
+	case Z90STAT_PCICCCOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCICC),
+				(int __user *) arg);
+	case Z90STAT_PCIXCCMCL2COUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2),
+				(int __user *) arg);
+	case Z90STAT_PCIXCCMCL3COUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL3),
+				(int __user *) arg);
+	case Z90STAT_PCIXCCCOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2) +
+				zcrypt_count_type(ZCRYPT_PCIXCC_MCL3),
+				(int __user *) arg);
+	case Z90STAT_CEX2CCOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_CEX2C),
+				(int __user *) arg);
+	case Z90STAT_CEX2ACOUNT:
+		return put_user(zcrypt_count_type(ZCRYPT_CEX2A),
+				(int __user *) arg);
+	default:
+		/* unknown ioctl number */
+		return -ENOIOCTLCMD;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+/**
+ * ioctl32 conversion routines
+ */
+struct compat_ica_rsa_modexpo {
+	compat_uptr_t	inputdata;
+	unsigned int	inputdatalength;
+	compat_uptr_t	outputdata;
+	unsigned int	outputdatalength;
+	compat_uptr_t	b_key;
+	compat_uptr_t	n_modulus;
+};
+
+static long trans_modexpo32(struct file *filp, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct compat_ica_rsa_modexpo __user *umex32 = compat_ptr(arg);
+	struct compat_ica_rsa_modexpo mex32;
+	struct ica_rsa_modexpo mex64;
+	long rc;
+
+	if (copy_from_user(&mex32, umex32, sizeof(mex32)))
+		return -EFAULT;
+	mex64.inputdata = compat_ptr(mex32.inputdata);
+	mex64.inputdatalength = mex32.inputdatalength;
+	mex64.outputdata = compat_ptr(mex32.outputdata);
+	mex64.outputdatalength = mex32.outputdatalength;
+	mex64.b_key = compat_ptr(mex32.b_key);
+	mex64.n_modulus = compat_ptr(mex32.n_modulus);
+	do {
+		rc = zcrypt_rsa_modexpo(&mex64);
+	} while (rc == -EAGAIN);
+	if (!rc)
+		rc = put_user(mex64.outputdatalength,
+			      &umex32->outputdatalength);
+	return rc;
+}
+
+struct compat_ica_rsa_modexpo_crt {
+	compat_uptr_t	inputdata;
+	unsigned int	inputdatalength;
+	compat_uptr_t	outputdata;
+	unsigned int	outputdatalength;
+	compat_uptr_t	bp_key;
+	compat_uptr_t	bq_key;
+	compat_uptr_t	np_prime;
+	compat_uptr_t	nq_prime;
+	compat_uptr_t	u_mult_inv;
+};
+
+static long trans_modexpo_crt32(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	struct compat_ica_rsa_modexpo_crt __user *ucrt32 = compat_ptr(arg);
+	struct compat_ica_rsa_modexpo_crt crt32;
+	struct ica_rsa_modexpo_crt crt64;
+	long rc;
+
+	if (copy_from_user(&crt32, ucrt32, sizeof(crt32)))
+		return -EFAULT;
+	crt64.inputdata = compat_ptr(crt32.inputdata);
+	crt64.inputdatalength = crt32.inputdatalength;
+	crt64.outputdata=  compat_ptr(crt32.outputdata);
+	crt64.outputdatalength = crt32.outputdatalength;
+	crt64.bp_key = compat_ptr(crt32.bp_key);
+	crt64.bq_key = compat_ptr(crt32.bq_key);
+	crt64.np_prime = compat_ptr(crt32.np_prime);
+	crt64.nq_prime = compat_ptr(crt32.nq_prime);
+	crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv);
+	do {
+		rc = zcrypt_rsa_crt(&crt64);
+	} while (rc == -EAGAIN);
+	if (!rc)
+		rc = put_user(crt64.outputdatalength,
+			      &ucrt32->outputdatalength);
+	return rc;
+}
+
+struct compat_ica_xcRB {
+	unsigned short	agent_ID;
+	unsigned int	user_defined;
+	unsigned short	request_ID;
+	unsigned int	request_control_blk_length;
+	unsigned char	padding1[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	request_control_blk_addr;
+	unsigned int	request_data_length;
+	char		padding2[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	request_data_address;
+	unsigned int	reply_control_blk_length;
+	char		padding3[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	reply_control_blk_addr;
+	unsigned int	reply_data_length;
+	char		padding4[16 - sizeof (compat_uptr_t)];
+	compat_uptr_t	reply_data_addr;
+	unsigned short	priority_window;
+	unsigned int	status;
+} __attribute__((packed));
+
+static long trans_xcRB32(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg);
+	struct compat_ica_xcRB xcRB32;
+	struct ica_xcRB xcRB64;
+	long rc;
+
+	if (copy_from_user(&xcRB32, uxcRB32, sizeof(xcRB32)))
+		return -EFAULT;
+	xcRB64.agent_ID = xcRB32.agent_ID;
+	xcRB64.user_defined = xcRB32.user_defined;
+	xcRB64.request_ID = xcRB32.request_ID;
+	xcRB64.request_control_blk_length =
+		xcRB32.request_control_blk_length;
+	xcRB64.request_control_blk_addr =
+		compat_ptr(xcRB32.request_control_blk_addr);
+	xcRB64.request_data_length =
+		xcRB32.request_data_length;
+	xcRB64.request_data_address =
+		compat_ptr(xcRB32.request_data_address);
+	xcRB64.reply_control_blk_length =
+		xcRB32.reply_control_blk_length;
+	xcRB64.reply_control_blk_addr =
+		compat_ptr(xcRB32.reply_control_blk_addr);
+	xcRB64.reply_data_length = xcRB32.reply_data_length;
+	xcRB64.reply_data_addr =
+		compat_ptr(xcRB32.reply_data_addr);
+	xcRB64.priority_window = xcRB32.priority_window;
+	xcRB64.status = xcRB32.status;
+	do {
+		rc = zcrypt_send_cprb(&xcRB64);
+	} while (rc == -EAGAIN);
+	xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length;
+	xcRB32.reply_data_length = xcRB64.reply_data_length;
+	xcRB32.status = xcRB64.status;
+	if (copy_to_user(uxcRB32, &xcRB32, sizeof(xcRB32)))
+			return -EFAULT;
+	return rc;
+}
+
+long zcrypt_compat_ioctl(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	if (cmd == ICARSAMODEXPO)
+		return trans_modexpo32(filp, cmd, arg);
+	if (cmd == ICARSACRT)
+		return trans_modexpo_crt32(filp, cmd, arg);
+	if (cmd == ZSECSENDCPRB)
+		return trans_xcRB32(filp, cmd, arg);
+	return zcrypt_unlocked_ioctl(filp, cmd, arg);
+}
+#endif
+
+/**
+ * Misc device file operations.
+ */
+static struct file_operations zcrypt_fops = {
+	.owner		= THIS_MODULE,
+	.read		= zcrypt_read,
+	.write		= zcrypt_write,
+	.unlocked_ioctl	= zcrypt_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= zcrypt_compat_ioctl,
+#endif
+	.open		= zcrypt_open,
+	.release	= zcrypt_release
+};
+
+/**
+ * Misc device.
+ */
+static struct miscdevice zcrypt_misc_device = {
+	.minor	    = MISC_DYNAMIC_MINOR,
+	.name	    = "z90crypt",
+	.fops	    = &zcrypt_fops,
+};
+
+/**
+ * Deprecated /proc entry support.
+ */
+static struct proc_dir_entry *zcrypt_entry;
+
+static inline int sprintcl(unsigned char *outaddr, unsigned char *addr,
+			   unsigned int len)
+{
+	int hl, i;
+
+	hl = 0;
+	for (i = 0; i < len; i++)
+		hl += sprintf(outaddr+hl, "%01x", (unsigned int) addr[i]);
+	hl += sprintf(outaddr+hl, " ");
+	return hl;
+}
+
+static inline int sprintrw(unsigned char *outaddr, unsigned char *addr,
+			   unsigned int len)
+{
+	int hl, inl, c, cx;
+
+	hl = sprintf(outaddr, "	   ");
+	inl = 0;
+	for (c = 0; c < (len / 16); c++) {
+		hl += sprintcl(outaddr+hl, addr+inl, 16);
+		inl += 16;
+	}
+	cx = len%16;
+	if (cx) {
+		hl += sprintcl(outaddr+hl, addr+inl, cx);
+		inl += cx;
+	}
+	hl += sprintf(outaddr+hl, "\n");
+	return hl;
+}
+
+static inline int sprinthx(unsigned char *title, unsigned char *outaddr,
+			   unsigned char *addr, unsigned int len)
+{
+	int hl, inl, r, rx;
+
+	hl = sprintf(outaddr, "\n%s\n", title);
+	inl = 0;
+	for (r = 0; r < (len / 64); r++) {
+		hl += sprintrw(outaddr+hl, addr+inl, 64);
+		inl += 64;
+	}
+	rx = len % 64;
+	if (rx) {
+		hl += sprintrw(outaddr+hl, addr+inl, rx);
+		inl += rx;
+	}
+	hl += sprintf(outaddr+hl, "\n");
+	return hl;
+}
+
+static inline int sprinthx4(unsigned char *title, unsigned char *outaddr,
+			    unsigned int *array, unsigned int len)
+{
+	int hl, r;
+
+	hl = sprintf(outaddr, "\n%s\n", title);
+	for (r = 0; r < len; r++) {
+		if ((r % 8) == 0)
+			hl += sprintf(outaddr+hl, "    ");
+		hl += sprintf(outaddr+hl, "%08X ", array[r]);
+		if ((r % 8) == 7)
+			hl += sprintf(outaddr+hl, "\n");
+	}
+	hl += sprintf(outaddr+hl, "\n");
+	return hl;
+}
+
+static int zcrypt_status_read(char *resp_buff, char **start, off_t offset,
+			      int count, int *eof, void *data)
+{
+	unsigned char *workarea;
+	int len;
+
+	len = 0;
+
+	/* resp_buff is a page. Use the right half for a work area */
+	workarea = resp_buff + 2000;
+	len += sprintf(resp_buff + len, "\nzcrypt version: %d.%d.%d\n",
+		ZCRYPT_VERSION, ZCRYPT_RELEASE, ZCRYPT_VARIANT);
+	len += sprintf(resp_buff + len, "Cryptographic domain: %d\n",
+		       ap_domain_index);
+	len += sprintf(resp_buff + len, "Total device count: %d\n",
+		       zcrypt_device_count);
+	len += sprintf(resp_buff + len, "PCICA count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCICA));
+	len += sprintf(resp_buff + len, "PCICC count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCICC));
+	len += sprintf(resp_buff + len, "PCIXCC MCL2 count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCIXCC_MCL2));
+	len += sprintf(resp_buff + len, "PCIXCC MCL3 count: %d\n",
+		       zcrypt_count_type(ZCRYPT_PCIXCC_MCL3));
+	len += sprintf(resp_buff + len, "CEX2C count: %d\n",
+		       zcrypt_count_type(ZCRYPT_CEX2C));
+	len += sprintf(resp_buff + len, "CEX2A count: %d\n",
+		       zcrypt_count_type(ZCRYPT_CEX2A));
+	len += sprintf(resp_buff + len, "requestq count: %d\n",
+		       zcrypt_requestq_count());
+	len += sprintf(resp_buff + len, "pendingq count: %d\n",
+		       zcrypt_pendingq_count());
+	len += sprintf(resp_buff + len, "Total open handles: %d\n\n",
+		       atomic_read(&zcrypt_open_count));
+	zcrypt_status_mask(workarea);
+	len += sprinthx("Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) "
+			"4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A",
+			resp_buff+len, workarea, AP_DEVICES);
+	zcrypt_qdepth_mask(workarea);
+	len += sprinthx("Waiting work element counts",
+			resp_buff+len, workarea, AP_DEVICES);
+	zcrypt_perdev_reqcnt((unsigned int *) workarea);
+	len += sprinthx4("Per-device successfully completed request counts",
+			 resp_buff+len,(unsigned int *) workarea, AP_DEVICES);
+	*eof = 1;
+	memset((void *) workarea, 0x00, AP_DEVICES * sizeof(unsigned int));
+	return len;
+}
+
+static void zcrypt_disable_card(int index)
+{
+	struct zcrypt_device *zdev;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) {
+			zdev->online = 0;
+			ap_flush_queue(zdev->ap_dev);
+			break;
+		}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static void zcrypt_enable_card(int index)
+{
+	struct zcrypt_device *zdev;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list)
+		if (AP_QID_DEVICE(zdev->ap_dev->qid) == index) {
+			zdev->online = 1;
+			break;
+		}
+	spin_unlock_bh(&zcrypt_device_lock);
+}
+
+static int zcrypt_status_write(struct file *file, const char __user *buffer,
+			       unsigned long count, void *data)
+{
+	unsigned char *lbuf, *ptr;
+	unsigned long local_count;
+	int j;
+
+	if (count <= 0)
+		return 0;
+
+#define LBUFSIZE 1200UL
+	lbuf = kmalloc(LBUFSIZE, GFP_KERNEL);
+	if (!lbuf) {
+		PRINTK("kmalloc failed!\n");
+		return 0;
+	}
+
+	local_count = min(LBUFSIZE - 1, count);
+	if (copy_from_user(lbuf, buffer, local_count) != 0) {
+		kfree(lbuf);
+		return -EFAULT;
+	}
+	lbuf[local_count] = '\0';
+
+	ptr = strstr(lbuf, "Online devices");
+	if (!ptr) {
+		PRINTK("Unable to parse data (missing \"Online devices\")\n");
+		goto out;
+	}
+	ptr = strstr(ptr, "\n");
+	if (!ptr) {
+		PRINTK("Unable to parse data (missing newline "
+		       "after \"Online devices\")\n");
+		goto out;
+	}
+	ptr++;
+
+	if (strstr(ptr, "Waiting work element counts") == NULL) {
+		PRINTK("Unable to parse data (missing "
+		       "\"Waiting work element counts\")\n");
+		goto out;
+	}
+
+	for (j = 0; j < 64 && *ptr; ptr++) {
+		/**
+		 * '0' for no device, '1' for PCICA, '2' for PCICC,
+		 * '3' for PCIXCC_MCL2, '4' for PCIXCC_MCL3,
+		 * '5' for CEX2C and '6' for CEX2A'
+		 */
+		if (*ptr >= '0' && *ptr <= '6')
+			j++;
+		else if (*ptr == 'd' || *ptr == 'D')
+			zcrypt_disable_card(j++);
+		else if (*ptr == 'e' || *ptr == 'E')
+			zcrypt_enable_card(j++);
+		else if (*ptr != ' ' && *ptr != '\t')
+			break;
+	}
+out:
+	kfree(lbuf);
+	return count;
+}
+
+/**
+ * The module initialization code.
+ */
+int __init zcrypt_api_init(void)
+{
+	int rc;
+
+	/* Register the request sprayer. */
+	rc = misc_register(&zcrypt_misc_device);
+	if (rc < 0) {
+		PRINTKW(KERN_ERR "misc_register (minor %d) failed with %d\n",
+			zcrypt_misc_device.minor, rc);
+		goto out;
+	}
+
+	/* Set up the proc file system */
+	zcrypt_entry = create_proc_entry("driver/z90crypt", 0644, NULL);
+	if (!zcrypt_entry) {
+		PRINTK("Couldn't create z90crypt proc entry\n");
+		rc = -ENOMEM;
+		goto out_misc;
+	}
+	zcrypt_entry->nlink = 1;
+	zcrypt_entry->data = NULL;
+	zcrypt_entry->read_proc = zcrypt_status_read;
+	zcrypt_entry->write_proc = zcrypt_status_write;
+
+	return 0;
+
+out_misc:
+	misc_deregister(&zcrypt_misc_device);
+out:
+	return rc;
+}
+
+/**
+ * The module termination code.
+ */
+void zcrypt_api_exit(void)
+{
+	remove_proc_entry("driver/z90crypt", NULL);
+	misc_deregister(&zcrypt_misc_device);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_api_init);
+module_exit(zcrypt_api_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
new file mode 100644
index 0000000..de4877e
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -0,0 +1,141 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_api.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *	       Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_API_H_
+#define _ZCRYPT_API_H_
+
+/**
+ * Macro definitions
+ *
+ * PDEBUG debugs in the form "zcrypt: function_name -> message"
+ *
+ * PRINTK is like PDEBUG, except that it is always enabled
+ * PRINTKN is like PRINTK, except that it does not include the function name
+ * PRINTKW is like PRINTK, except that it uses KERN_WARNING
+ * PRINTKC is like PRINTK, except that it uses KERN_CRIT
+ */
+#define DEV_NAME	"zcrypt"
+
+#define PRINTK(fmt, args...) \
+	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+#define PRINTKN(fmt, args...) \
+	printk(KERN_DEBUG DEV_NAME ": " fmt, ## args)
+#define PRINTKW(fmt, args...) \
+	printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+#define PRINTKC(fmt, args...) \
+	printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+
+#ifdef ZCRYPT_DEBUG
+#define PDEBUG(fmt, args...) \
+	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+#else
+#define PDEBUG(fmt, args...) do {} while (0)
+#endif
+
+#include "ap_bus.h"
+#include <asm/zcrypt.h>
+
+/* deprecated status calls */
+#define ICAZ90STATUS		_IOR(ZCRYPT_IOCTL_MAGIC, 0x10, struct ica_z90_status)
+#define Z90STAT_PCIXCCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x43, int)
+
+/**
+ * This structure is deprecated and the corresponding ioctl() has been
+ * replaced with individual ioctl()s for each piece of data!
+ */
+struct ica_z90_status {
+	int totalcount;
+	int leedslitecount; // PCICA
+	int leeds2count;    // PCICC
+	// int PCIXCCCount; is not in struct for backward compatibility
+	int requestqWaitCount;
+	int pendingqWaitCount;
+	int totalOpenCount;
+	int cryptoDomain;
+	// status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3,
+	//	   5=CEX2C
+	unsigned char status[64];
+	// qdepth: # work elements waiting for each device
+	unsigned char qdepth[64];
+};
+
+/**
+ * device type for an actual device is either PCICA, PCICC, PCIXCC_MCL2,
+ * PCIXCC_MCL3, CEX2C, or CEX2A
+ *
+ * NOTE: PCIXCC_MCL3 refers to a PCIXCC with May 2004 version of Licensed
+ *	 Internal Code (LIC) (EC J12220 level 29).
+ *	 PCIXCC_MCL2 refers to any LIC before this level.
+ */
+#define ZCRYPT_PCICA		1
+#define ZCRYPT_PCICC		2
+#define ZCRYPT_PCIXCC_MCL2	3
+#define ZCRYPT_PCIXCC_MCL3	4
+#define ZCRYPT_CEX2C		5
+#define ZCRYPT_CEX2A		6
+
+struct zcrypt_device;
+
+struct zcrypt_ops {
+	long (*rsa_modexpo)(struct zcrypt_device *, struct ica_rsa_modexpo *);
+	long (*rsa_modexpo_crt)(struct zcrypt_device *,
+				struct ica_rsa_modexpo_crt *);
+	long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *);
+};
+
+struct zcrypt_device {
+	struct list_head list;		/* Device list. */
+	spinlock_t lock;		/* Per device lock. */
+	struct kref refcount;		/* device refcounting */
+	struct ap_device *ap_dev;	/* The "real" ap device. */
+	struct zcrypt_ops *ops;		/* Crypto operations. */
+	int online;			/* User online/offline */
+
+	int user_space_type;		/* User space device id. */
+	char *type_string;		/* User space device name. */
+	int min_mod_size;		/* Min number of bits. */
+	int max_mod_size;		/* Max number of bits. */
+	int short_crt;			/* Card has crt length restriction. */
+	int speed_rating;		/* Speed of the crypto device. */
+
+	int request_count;		/* # current requests. */
+
+	struct ap_message reply;	/* Per-device reply structure. */
+};
+
+struct zcrypt_device *zcrypt_device_alloc(size_t);
+void zcrypt_device_free(struct zcrypt_device *);
+void zcrypt_device_get(struct zcrypt_device *);
+int zcrypt_device_put(struct zcrypt_device *);
+int zcrypt_device_register(struct zcrypt_device *);
+void zcrypt_device_unregister(struct zcrypt_device *);
+int zcrypt_api_init(void);
+void zcrypt_api_exit(void);
+
+#endif /* _ZCRYPT_API_H_ */
diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h
new file mode 100644
index 0000000..8dbcf0e
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_cca_key.h
@@ -0,0 +1,350 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_cca_key.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_CCA_KEY_H_
+#define _ZCRYPT_CCA_KEY_H_
+
+struct T6_keyBlock_hdr {
+	unsigned short blen;
+	unsigned short ulen;
+	unsigned short flags;
+};
+
+/**
+ * mapping for the cca private ME key token.
+ * Three parts of interest here: the header, the private section and
+ * the public section.
+ *
+ * mapping for the cca key token header
+ */
+struct cca_token_hdr {
+	unsigned char  token_identifier;
+	unsigned char  version;
+	unsigned short token_length;
+	unsigned char  reserved[4];
+} __attribute__((packed));
+
+#define CCA_TKN_HDR_ID_EXT 0x1E
+
+/**
+ * mapping for the cca private ME section
+ */
+struct cca_private_ext_ME_sec {
+	unsigned char  section_identifier;
+	unsigned char  version;
+	unsigned short section_length;
+	unsigned char  private_key_hash[20];
+	unsigned char  reserved1[4];
+	unsigned char  key_format;
+	unsigned char  reserved2;
+	unsigned char  key_name_hash[20];
+	unsigned char  key_use_flags[4];
+	unsigned char  reserved3[6];
+	unsigned char  reserved4[24];
+	unsigned char  confounder[24];
+	unsigned char  exponent[128];
+	unsigned char  modulus[128];
+} __attribute__((packed));
+
+#define CCA_PVT_USAGE_ALL 0x80
+
+/**
+ * mapping for the cca public section
+ * In a private key, the modulus doesn't appear in the public
+ * section. So, an arbitrary public exponent of 0x010001 will be
+ * used, for a section length of 0x0F always.
+ */
+struct cca_public_sec {
+	unsigned char  section_identifier;
+	unsigned char  version;
+	unsigned short section_length;
+	unsigned char  reserved[2];
+	unsigned short exponent_len;
+	unsigned short modulus_bit_len;
+	unsigned short modulus_byte_len;    /* In a private key, this is 0 */
+} __attribute__((packed));
+
+/**
+ * mapping for the cca private CRT key 'token'
+ * The first three parts (the only parts considered in this release)
+ * are: the header, the private section and the public section.
+ * The header and public section are the same as for the
+ * struct cca_private_ext_ME
+ *
+ * Following the structure are the quantities p, q, dp, dq, u, pad,
+ * and modulus, in that order, where pad_len is the modulo 8
+ * complement of the residue modulo 8 of the sum of
+ * (p_len + q_len + dp_len + dq_len + u_len).
+ */
+struct cca_pvt_ext_CRT_sec {
+	unsigned char  section_identifier;
+	unsigned char  version;
+	unsigned short section_length;
+	unsigned char  private_key_hash[20];
+	unsigned char  reserved1[4];
+	unsigned char  key_format;
+	unsigned char  reserved2;
+	unsigned char  key_name_hash[20];
+	unsigned char  key_use_flags[4];
+	unsigned short p_len;
+	unsigned short q_len;
+	unsigned short dp_len;
+	unsigned short dq_len;
+	unsigned short u_len;
+	unsigned short mod_len;
+	unsigned char  reserved3[4];
+	unsigned short pad_len;
+	unsigned char  reserved4[52];
+	unsigned char  confounder[8];
+} __attribute__((packed));
+
+#define CCA_PVT_EXT_CRT_SEC_ID_PVT 0x08
+#define CCA_PVT_EXT_CRT_SEC_FMT_CL 0x40
+
+/**
+ * Set up private key fields of a type6 MEX message.
+ * Note that all numerics in the key token are big-endian,
+ * while the entries in the key block header are little-endian.
+ *
+ * @mex: pointer to user input data
+ * @p: pointer to memory area for the key
+ *
+ * Returns the size of the key area or -EFAULT
+ */
+static inline int zcrypt_type6_mex_key_de(struct ica_rsa_modexpo *mex,
+					  void *p, int big_endian)
+{
+	static struct cca_token_hdr static_pvt_me_hdr = {
+		.token_identifier	=  0x1E,
+		.token_length		=  0x0183,
+	};
+	static struct cca_private_ext_ME_sec static_pvt_me_sec = {
+		.section_identifier	=  0x02,
+		.section_length		=  0x016C,
+		.key_use_flags		= {0x80,0x00,0x00,0x00},
+	};
+	static struct cca_public_sec static_pub_me_sec = {
+		.section_identifier	=  0x04,
+		.section_length		=  0x000F,
+		.exponent_len		=  0x0003,
+	};
+	static char pk_exponent[3] = { 0x01, 0x00, 0x01 };
+	struct {
+		struct T6_keyBlock_hdr t6_hdr;
+		struct cca_token_hdr pvtMeHdr;
+		struct cca_private_ext_ME_sec pvtMeSec;
+		struct cca_public_sec pubMeSec;
+		char exponent[3];
+	} __attribute__((packed)) *key = p;
+	unsigned char *temp;
+
+	memset(key, 0, sizeof(*key));
+
+	if (big_endian) {
+		key->t6_hdr.blen = cpu_to_be16(0x189);
+		key->t6_hdr.ulen = cpu_to_be16(0x189 - 2);
+	} else {
+		key->t6_hdr.blen = cpu_to_le16(0x189);
+		key->t6_hdr.ulen = cpu_to_le16(0x189 - 2);
+	}
+	key->pvtMeHdr = static_pvt_me_hdr;
+	key->pvtMeSec = static_pvt_me_sec;
+	key->pubMeSec = static_pub_me_sec;
+	/**
+	 * In a private key, the modulus doesn't appear in the public
+	 * section. So, an arbitrary public exponent of 0x010001 will be
+	 * used.
+	 */
+	memcpy(key->exponent, pk_exponent, 3);
+
+	/* key parameter block */
+	temp = key->pvtMeSec.exponent +
+		sizeof(key->pvtMeSec.exponent) - mex->inputdatalength;
+	if (copy_from_user(temp, mex->b_key, mex->inputdatalength))
+		return -EFAULT;
+
+	/* modulus */
+	temp = key->pvtMeSec.modulus +
+		sizeof(key->pvtMeSec.modulus) - mex->inputdatalength;
+	if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength))
+		return -EFAULT;
+	key->pubMeSec.modulus_bit_len = 8 * mex->inputdatalength;
+	return sizeof(*key);
+}
+
+/**
+ * Set up private key fields of a type6 MEX message. The _pad variant
+ * strips leading zeroes from the b_key.
+ * Note that all numerics in the key token are big-endian,
+ * while the entries in the key block header are little-endian.
+ *
+ * @mex: pointer to user input data
+ * @p: pointer to memory area for the key
+ *
+ * Returns the size of the key area or -EFAULT
+ */
+static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex,
+					  void *p, int big_endian)
+{
+	static struct cca_token_hdr static_pub_hdr = {
+		.token_identifier	=  0x1E,
+	};
+	static struct cca_public_sec static_pub_sec = {
+		.section_identifier	=  0x04,
+	};
+	struct {
+		struct T6_keyBlock_hdr t6_hdr;
+		struct cca_token_hdr pubHdr;
+		struct cca_public_sec pubSec;
+		char exponent[0];
+	} __attribute__((packed)) *key = p;
+	unsigned char *temp;
+	int i;
+
+	memset(key, 0, sizeof(*key));
+
+	key->pubHdr = static_pub_hdr;
+	key->pubSec = static_pub_sec;
+
+	/* key parameter block */
+	temp = key->exponent;
+	if (copy_from_user(temp, mex->b_key, mex->inputdatalength))
+		return -EFAULT;
+	/* Strip leading zeroes from b_key. */
+	for (i = 0; i < mex->inputdatalength; i++)
+		if (temp[i])
+			break;
+	if (i >= mex->inputdatalength)
+		return -EINVAL;
+	memmove(temp, temp + i, mex->inputdatalength - i);
+	temp += mex->inputdatalength - i;
+	/* modulus */
+	if (copy_from_user(temp, mex->n_modulus, mex->inputdatalength))
+		return -EFAULT;
+
+	key->pubSec.modulus_bit_len = 8 * mex->inputdatalength;
+	key->pubSec.modulus_byte_len = mex->inputdatalength;
+	key->pubSec.exponent_len = mex->inputdatalength - i;
+	key->pubSec.section_length = sizeof(key->pubSec) +
+					2*mex->inputdatalength - i;
+	key->pubHdr.token_length =
+		key->pubSec.section_length + sizeof(key->pubHdr);
+	if (big_endian) {
+		key->t6_hdr.ulen = cpu_to_be16(key->pubHdr.token_length + 4);
+		key->t6_hdr.blen = cpu_to_be16(key->pubHdr.token_length + 6);
+	} else {
+		key->t6_hdr.ulen = cpu_to_le16(key->pubHdr.token_length + 4);
+		key->t6_hdr.blen = cpu_to_le16(key->pubHdr.token_length + 6);
+	}
+	return sizeof(*key) + 2*mex->inputdatalength - i;
+}
+
+/**
+ * Set up private key fields of a type6 CRT message.
+ * Note that all numerics in the key token are big-endian,
+ * while the entries in the key block header are little-endian.
+ *
+ * @mex: pointer to user input data
+ * @p: pointer to memory area for the key
+ *
+ * Returns the size of the key area or -EFAULT
+ */
+static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt,
+				       void *p, int big_endian)
+{
+	static struct cca_public_sec static_cca_pub_sec = {
+		.section_identifier = 4,
+		.section_length = 0x000f,
+		.exponent_len = 0x0003,
+	};
+	static char pk_exponent[3] = { 0x01, 0x00, 0x01 };
+	struct {
+		struct T6_keyBlock_hdr t6_hdr;
+		struct cca_token_hdr token;
+		struct cca_pvt_ext_CRT_sec pvt;
+		char key_parts[0];
+	} __attribute__((packed)) *key = p;
+	struct cca_public_sec *pub;
+	int short_len, long_len, pad_len, key_len, size;
+
+	memset(key, 0, sizeof(*key));
+
+	short_len = crt->inputdatalength / 2;
+	long_len = short_len + 8;
+	pad_len = -(3*long_len + 2*short_len) & 7;
+	key_len = 3*long_len + 2*short_len + pad_len + crt->inputdatalength;
+	size = sizeof(*key) + key_len + sizeof(*pub) + 3;
+
+	/* parameter block.key block */
+	if (big_endian) {
+		key->t6_hdr.blen = cpu_to_be16(size);
+		key->t6_hdr.ulen = cpu_to_be16(size - 2);
+	} else {
+		key->t6_hdr.blen = cpu_to_le16(size);
+		key->t6_hdr.ulen = cpu_to_le16(size - 2);
+	}
+
+	/* key token header */
+	key->token.token_identifier = CCA_TKN_HDR_ID_EXT;
+	key->token.token_length = size - 6;
+
+	/* private section */
+	key->pvt.section_identifier = CCA_PVT_EXT_CRT_SEC_ID_PVT;
+	key->pvt.section_length = sizeof(key->pvt) + key_len;
+	key->pvt.key_format = CCA_PVT_EXT_CRT_SEC_FMT_CL;
+	key->pvt.key_use_flags[0] = CCA_PVT_USAGE_ALL;
+	key->pvt.p_len = key->pvt.dp_len = key->pvt.u_len = long_len;
+	key->pvt.q_len = key->pvt.dq_len = short_len;
+	key->pvt.mod_len = crt->inputdatalength;
+	key->pvt.pad_len = pad_len;
+
+	/* key parts */
+	if (copy_from_user(key->key_parts, crt->np_prime, long_len) ||
+	    copy_from_user(key->key_parts + long_len,
+					crt->nq_prime, short_len) ||
+	    copy_from_user(key->key_parts + long_len + short_len,
+					crt->bp_key, long_len) ||
+	    copy_from_user(key->key_parts + 2*long_len + short_len,
+					crt->bq_key, short_len) ||
+	    copy_from_user(key->key_parts + 2*long_len + 2*short_len,
+					crt->u_mult_inv, long_len))
+		return -EFAULT;
+	memset(key->key_parts + 3*long_len + 2*short_len + pad_len,
+	       0xff, crt->inputdatalength);
+	pub = (struct cca_public_sec *)(key->key_parts + key_len);
+	*pub = static_cca_pub_sec;
+	pub->modulus_bit_len = 8 * crt->inputdatalength;
+	/**
+	 * In a private key, the modulus doesn't appear in the public
+	 * section. So, an arbitrary public exponent of 0x010001 will be
+	 * used.
+	 */
+	memcpy((char *) (pub + 1), pk_exponent, 3);
+	return size;
+}
+
+#endif /* _ZCRYPT_CCA_KEY_H_ */
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
new file mode 100644
index 0000000..a62b000
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -0,0 +1,435 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_cex2a.c
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_cex2a.h"
+
+#define CEX2A_MIN_MOD_SIZE	  1	/*    8 bits	*/
+#define CEX2A_MAX_MOD_SIZE	256	/* 2048 bits	*/
+
+#define CEX2A_SPEED_RATING	970
+
+#define CEX2A_MAX_MESSAGE_SIZE	0x390	/* sizeof(struct type50_crb2_msg)    */
+#define CEX2A_MAX_RESPONSE_SIZE 0x110	/* max outputdatalength + type80_hdr */
+
+#define CEX2A_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_cex2a_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2A) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("CEX2A Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_cex2a_probe(struct ap_device *ap_dev);
+static void zcrypt_cex2a_remove(struct ap_device *ap_dev);
+static void zcrypt_cex2a_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_cex2a_driver = {
+	.probe = zcrypt_cex2a_probe,
+	.remove = zcrypt_cex2a_remove,
+	.receive = zcrypt_cex2a_receive,
+	.ids = zcrypt_cex2a_ids,
+};
+
+/**
+ * Convert a ICAMEX message to a type50 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo *mex)
+{
+	unsigned char *mod, *exp, *inp;
+	int mod_len;
+
+	mod_len = mex->inputdatalength;
+
+	if (mod_len <= 128) {
+		struct type50_meb1_msg *meb1 = ap_msg->message;
+		memset(meb1, 0, sizeof(*meb1));
+		ap_msg->length = sizeof(*meb1);
+		meb1->header.msg_type_code = TYPE50_TYPE_CODE;
+		meb1->header.msg_len = sizeof(*meb1);
+		meb1->keyblock_type = TYPE50_MEB1_FMT;
+		mod = meb1->modulus + sizeof(meb1->modulus) - mod_len;
+		exp = meb1->exponent + sizeof(meb1->exponent) - mod_len;
+		inp = meb1->message + sizeof(meb1->message) - mod_len;
+	} else {
+		struct type50_meb2_msg *meb2 = ap_msg->message;
+		memset(meb2, 0, sizeof(*meb2));
+		ap_msg->length = sizeof(*meb2);
+		meb2->header.msg_type_code = TYPE50_TYPE_CODE;
+		meb2->header.msg_len = sizeof(*meb2);
+		meb2->keyblock_type = TYPE50_MEB2_FMT;
+		mod = meb2->modulus + sizeof(meb2->modulus) - mod_len;
+		exp = meb2->exponent + sizeof(meb2->exponent) - mod_len;
+		inp = meb2->message + sizeof(meb2->message) - mod_len;
+	}
+
+	if (copy_from_user(mod, mex->n_modulus, mod_len) ||
+	    copy_from_user(exp, mex->b_key, mod_len) ||
+	    copy_from_user(inp, mex->inputdata, mod_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type50 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo_crt *crt)
+{
+	int mod_len, short_len, long_len, long_offset;
+	unsigned char *p, *q, *dp, *dq, *u, *inp;
+
+	mod_len = crt->inputdatalength;
+	short_len = mod_len / 2;
+	long_len = mod_len / 2 + 8;
+
+	/*
+	 * CEX2A cannot handle p, dp, or U > 128 bytes.
+	 * If we have one of these, we need to do extra checking.
+	 */
+	if (long_len > 128) {
+		/*
+		 * zcrypt_rsa_crt already checked for the leading
+		 * zeroes of np_prime, bp_key and u_mult_inc.
+		 */
+		long_offset = long_len - 128;
+		long_len = 128;
+	} else
+		long_offset = 0;
+
+	/*
+	 * Instead of doing extra work for p, dp, U > 64 bytes, we'll just use
+	 * the larger message structure.
+	 */
+	if (long_len <= 64) {
+		struct type50_crb1_msg *crb1 = ap_msg->message;
+		memset(crb1, 0, sizeof(*crb1));
+		ap_msg->length = sizeof(*crb1);
+		crb1->header.msg_type_code = TYPE50_TYPE_CODE;
+		crb1->header.msg_len = sizeof(*crb1);
+		crb1->keyblock_type = TYPE50_CRB1_FMT;
+		p = crb1->p + sizeof(crb1->p) - long_len;
+		q = crb1->q + sizeof(crb1->q) - short_len;
+		dp = crb1->dp + sizeof(crb1->dp) - long_len;
+		dq = crb1->dq + sizeof(crb1->dq) - short_len;
+		u = crb1->u + sizeof(crb1->u) - long_len;
+		inp = crb1->message + sizeof(crb1->message) - mod_len;
+	} else {
+		struct type50_crb2_msg *crb2 = ap_msg->message;
+		memset(crb2, 0, sizeof(*crb2));
+		ap_msg->length = sizeof(*crb2);
+		crb2->header.msg_type_code = TYPE50_TYPE_CODE;
+		crb2->header.msg_len = sizeof(*crb2);
+		crb2->keyblock_type = TYPE50_CRB2_FMT;
+		p = crb2->p + sizeof(crb2->p) - long_len;
+		q = crb2->q + sizeof(crb2->q) - short_len;
+		dp = crb2->dp + sizeof(crb2->dp) - long_len;
+		dq = crb2->dq + sizeof(crb2->dq) - short_len;
+		u = crb2->u + sizeof(crb2->u) - long_len;
+		inp = crb2->message + sizeof(crb2->message) - mod_len;
+	}
+
+	if (copy_from_user(p, crt->np_prime + long_offset, long_len) ||
+	    copy_from_user(q, crt->nq_prime, short_len) ||
+	    copy_from_user(dp, crt->bp_key + long_offset, long_len) ||
+	    copy_from_user(dq, crt->bq_key, short_len) ||
+	    copy_from_user(u, crt->u_mult_inv + long_offset, long_len) ||
+	    copy_from_user(inp, crt->inputdata, mod_len))
+		return -EFAULT;
+
+
+	return 0;
+}
+
+/**
+ * Copy results from a type 80 reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int convert_type80(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char __user *outputdata,
+			  unsigned int outputdatalength)
+{
+	struct type80_hdr *t80h = reply->message;
+	unsigned char *data;
+
+	if (t80h->len < sizeof(*t80h) + outputdatalength) {
+		/* The result is too short, the CEX2A card may not do that.. */
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
+	data = reply->message + t80h->len - outputdatalength;
+	if (copy_to_user(outputdata, data, outputdatalength))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE80_RSP_CODE:
+		return convert_type80(zdev, reply,
+				      outputdata, outputdatalength);
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_cex2a_receive(struct ap_device *ap_dev,
+				 struct ap_message *msg,
+				 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type80_hdr *t80h = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t80h->type == TYPE80_RSP_CODE) {
+		length = min(CEX2A_MAX_RESPONSE_SIZE, (int) t80h->len);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the CEX2A
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  CEX2A device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_cex2a_modexpo(struct zcrypt_device *zdev,
+				 struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(CEX2A_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type50MEX_msg(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, CEX2A_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, mex->outputdata,
+				      mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the CEX2A
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  CEX2A device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_cex2a_modexpo_crt(struct zcrypt_device *zdev,
+				     struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(CEX2A_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type50CRT_msg(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, CEX2A_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, crt->outputdata,
+				      crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a CEX2A card.
+ */
+static struct zcrypt_ops zcrypt_cex2a_ops = {
+	.rsa_modexpo = zcrypt_cex2a_modexpo,
+	.rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt,
+};
+
+/**
+ * Probe function for CEX2A cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_cex2a_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(CEX2A_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_cex2a_ops;
+	zdev->online = 1;
+	zdev->user_space_type = ZCRYPT_CEX2A;
+	zdev->type_string = "CEX2A";
+	zdev->min_mod_size = CEX2A_MIN_MOD_SIZE;
+	zdev->max_mod_size = CEX2A_MAX_MOD_SIZE;
+	zdev->short_crt = 1;
+	zdev->speed_rating = CEX2A_SPEED_RATING;
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended CEX2A driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_cex2a_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_cex2a_init(void)
+{
+	return ap_driver_register(&zcrypt_cex2a_driver, THIS_MODULE, "cex2a");
+}
+
+void __exit zcrypt_cex2a_exit(void)
+{
+	ap_driver_unregister(&zcrypt_cex2a_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_cex2a_init);
+module_exit(zcrypt_cex2a_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h
new file mode 100644
index 0000000..8f69d1d
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_cex2a.h
@@ -0,0 +1,126 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_cex2a.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_CEX2A_H_
+#define _ZCRYPT_CEX2A_H_
+
+/**
+ * The type 50 message family is associated with a CEX2A card.
+ *
+ * The four members of the family are described below.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type50_hdr {
+	unsigned char	reserved1;
+	unsigned char	msg_type_code;	/* 0x50 */
+	unsigned short	msg_len;
+	unsigned char	reserved2;
+	unsigned char	ignored;
+	unsigned short	reserved3;
+} __attribute__((packed));
+
+#define TYPE50_TYPE_CODE	0x50
+
+#define TYPE50_MEB1_FMT		0x0001
+#define TYPE50_MEB2_FMT		0x0002
+#define TYPE50_CRB1_FMT		0x0011
+#define TYPE50_CRB2_FMT		0x0012
+
+/* Mod-Exp, with a small modulus */
+struct type50_meb1_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0001 */
+	unsigned char	reserved[6];
+	unsigned char	exponent[128];
+	unsigned char	modulus[128];
+	unsigned char	message[128];
+} __attribute__((packed));
+
+/* Mod-Exp, with a large modulus */
+struct type50_meb2_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0002 */
+	unsigned char	reserved[6];
+	unsigned char	exponent[256];
+	unsigned char	modulus[256];
+	unsigned char	message[256];
+} __attribute__((packed));
+
+/* CRT, with a small modulus */
+struct type50_crb1_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0011 */
+	unsigned char	reserved[6];
+	unsigned char	p[64];
+	unsigned char	q[64];
+	unsigned char	dp[64];
+	unsigned char	dq[64];
+	unsigned char	u[64];
+	unsigned char	message[128];
+} __attribute__((packed));
+
+/* CRT, with a large modulus */
+struct type50_crb2_msg {
+	struct type50_hdr header;
+	unsigned short	keyblock_type;	/* 0x0012 */
+	unsigned char	reserved[6];
+	unsigned char	p[128];
+	unsigned char	q[128];
+	unsigned char	dp[128];
+	unsigned char	dq[128];
+	unsigned char	u[128];
+	unsigned char	message[256];
+} __attribute__((packed));
+
+/**
+ * The type 80 response family is associated with a CEX2A card.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+
+#define TYPE80_RSP_CODE 0x80
+
+struct type80_hdr {
+	unsigned char	reserved1;
+	unsigned char	type;		/* 0x80 */
+	unsigned short	len;
+	unsigned char	code;		/* 0x00 */
+	unsigned char	reserved2[3];
+	unsigned char	reserved3[8];
+} __attribute__((packed));
+
+int zcrypt_cex2a_init(void);
+void zcrypt_cex2a_exit(void);
+
+#endif /* _ZCRYPT_CEX2A_H_ */
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
new file mode 100644
index 0000000..2cb616b
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -0,0 +1,133 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_error.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_ERROR_H_
+#define _ZCRYPT_ERROR_H_
+
+#include "zcrypt_api.h"
+
+/**
+ * Reply Messages
+ *
+ * Error reply messages are of two types:
+ *    82:  Error (see below)
+ *    88:  Error (see below)
+ * Both type 82 and type 88 have the same structure in the header.
+ *
+ * Request reply messages are of three known types:
+ *    80:  Reply from a Type 50 Request (see CEX2A-RELATED STRUCTS)
+ *    84:  Reply from a Type 4 Request (see PCICA-RELATED STRUCTS)
+ *    86:  Reply from a Type 6 Request (see PCICC/PCIXCC/CEX2C-RELATED STRUCTS)
+ *
+ */
+struct error_hdr {
+	unsigned char reserved1;	/* 0x00			*/
+	unsigned char type;		/* 0x82 or 0x88		*/
+	unsigned char reserved2[2];	/* 0x0000		*/
+	unsigned char reply_code;	/* reply code		*/
+	unsigned char reserved3[3];	/* 0x000000		*/
+};
+
+#define TYPE82_RSP_CODE 0x82
+#define TYPE88_RSP_CODE 0x88
+
+#define REP82_ERROR_MACHINE_FAILURE  0x10
+#define REP82_ERROR_PREEMPT_FAILURE  0x12
+#define REP82_ERROR_CHECKPT_FAILURE  0x14
+#define REP82_ERROR_MESSAGE_TYPE     0x20
+#define REP82_ERROR_INVALID_COMM_CD  0x21	/* Type 84	*/
+#define REP82_ERROR_INVALID_MSG_LEN  0x23
+#define REP82_ERROR_RESERVD_FIELD    0x24	/* was 0x50	*/
+#define REP82_ERROR_FORMAT_FIELD     0x29
+#define REP82_ERROR_INVALID_COMMAND  0x30
+#define REP82_ERROR_MALFORMED_MSG    0x40
+#define REP82_ERROR_RESERVED_FIELDO  0x50	/* old value	*/
+#define REP82_ERROR_WORD_ALIGNMENT   0x60
+#define REP82_ERROR_MESSAGE_LENGTH   0x80
+#define REP82_ERROR_OPERAND_INVALID  0x82
+#define REP82_ERROR_OPERAND_SIZE     0x84
+#define REP82_ERROR_EVEN_MOD_IN_OPND 0x85
+#define REP82_ERROR_RESERVED_FIELD   0x88
+#define REP82_ERROR_TRANSPORT_FAIL   0x90
+#define REP82_ERROR_PACKET_TRUNCATED 0xA0
+#define REP82_ERROR_ZERO_BUFFER_LEN  0xB0
+
+#define REP88_ERROR_MODULE_FAILURE   0x10
+
+#define REP88_ERROR_MESSAGE_TYPE     0x20
+#define REP88_ERROR_MESSAGE_MALFORMD 0x22
+#define REP88_ERROR_MESSAGE_LENGTH   0x23
+#define REP88_ERROR_RESERVED_FIELD   0x24
+#define REP88_ERROR_KEY_TYPE	     0x34
+#define REP88_ERROR_INVALID_KEY      0x82	/* CEX2A	*/
+#define REP88_ERROR_OPERAND	     0x84	/* CEX2A	*/
+#define REP88_ERROR_OPERAND_EVEN_MOD 0x85	/* CEX2A	*/
+
+static inline int convert_error(struct zcrypt_device *zdev,
+				struct ap_message *reply)
+{
+	struct error_hdr *ehdr = reply->message;
+
+	PRINTK("Hardware error : Type %02x Message Header: %08x%08x\n",
+	       ehdr->type, *(unsigned int *) reply->message,
+	       *(unsigned int *) (reply->message + 4));
+
+	switch (ehdr->reply_code) {
+	case REP82_ERROR_OPERAND_INVALID:
+	case REP82_ERROR_OPERAND_SIZE:
+	case REP82_ERROR_EVEN_MOD_IN_OPND:
+	case REP88_ERROR_MESSAGE_MALFORMD:
+	//   REP88_ERROR_INVALID_KEY		// '82' CEX2A
+	//   REP88_ERROR_OPERAND		// '84' CEX2A
+	//   REP88_ERROR_OPERAND_EVEN_MOD	// '85' CEX2A
+		/* Invalid input data. */
+		return -EINVAL;
+	case REP82_ERROR_MESSAGE_TYPE:
+	//   REP88_ERROR_MESSAGE_TYPE		// '20' CEX2A
+		/**
+		 * To sent a message of the wrong type is a bug in the
+		 * device driver. Warn about it, disable the device
+		 * and then repeat the request.
+		 */
+		WARN_ON(1);
+		zdev->online = 0;
+		return -EAGAIN;
+	case REP82_ERROR_TRANSPORT_FAIL:
+	case REP82_ERROR_MACHINE_FAILURE:
+	//   REP88_ERROR_MODULE_FAILURE		// '10' CEX2A
+		/* If a card fails disable it and repeat the request. */
+		zdev->online = 0;
+		return -EAGAIN;
+	default:
+		PRINTKW("unknown type %02x reply code = %d\n",
+			ehdr->type, ehdr->reply_code);
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+#endif /* _ZCRYPT_ERROR_H_ */
diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c
new file mode 100644
index 0000000..2a9349a
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_mono.c
@@ -0,0 +1,100 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_mono.c
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/compat.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_pcica.h"
+#include "zcrypt_pcicc.h"
+#include "zcrypt_pcixcc.h"
+#include "zcrypt_cex2a.h"
+
+/**
+ * The module initialization code.
+ */
+int __init zcrypt_init(void)
+{
+	int rc;
+
+	rc = ap_module_init();
+	if (rc)
+		goto out;
+	rc = zcrypt_api_init();
+	if (rc)
+		goto out_ap;
+	rc = zcrypt_pcica_init();
+	if (rc)
+		goto out_api;
+	rc = zcrypt_pcicc_init();
+	if (rc)
+		goto out_pcica;
+	rc = zcrypt_pcixcc_init();
+	if (rc)
+		goto out_pcicc;
+	rc = zcrypt_cex2a_init();
+	if (rc)
+		goto out_pcixcc;
+	return 0;
+
+out_pcixcc:
+	zcrypt_pcixcc_exit();
+out_pcicc:
+	zcrypt_pcicc_exit();
+out_pcica:
+	zcrypt_pcica_exit();
+out_api:
+	zcrypt_api_exit();
+out_ap:
+	ap_module_exit();
+out:
+	return rc;
+}
+
+/**
+ * The module termination code.
+ */
+void __exit zcrypt_exit(void)
+{
+	zcrypt_cex2a_exit();
+	zcrypt_pcixcc_exit();
+	zcrypt_pcicc_exit();
+	zcrypt_pcica_exit();
+	zcrypt_api_exit();
+	ap_module_exit();
+}
+
+module_init(zcrypt_init);
+module_exit(zcrypt_exit);
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
new file mode 100644
index 0000000..b6a4ecd
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcica.c
@@ -0,0 +1,418 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcica.c
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_pcica.h"
+
+#define PCICA_MIN_MOD_SIZE	  1	/*    8 bits	*/
+#define PCICA_MAX_MOD_SIZE	256	/* 2048 bits	*/
+
+#define PCICA_SPEED_RATING	2800
+
+#define PCICA_MAX_MESSAGE_SIZE	0x3a0	/* sizeof(struct type4_lcr)	     */
+#define PCICA_MAX_RESPONSE_SIZE 0x110	/* max outputdatalength + type80_hdr */
+
+#define PCICA_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_pcica_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_PCICA) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_pcica_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("PCICA Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_pcica_probe(struct ap_device *ap_dev);
+static void zcrypt_pcica_remove(struct ap_device *ap_dev);
+static void zcrypt_pcica_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_pcica_driver = {
+	.probe = zcrypt_pcica_probe,
+	.remove = zcrypt_pcica_remove,
+	.receive = zcrypt_pcica_receive,
+	.ids = zcrypt_pcica_ids,
+};
+
+/**
+ * Convert a ICAMEX message to a type4 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type4MEX_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo *mex)
+{
+	unsigned char *modulus, *exponent, *message;
+	int mod_len;
+
+	mod_len = mex->inputdatalength;
+
+	if (mod_len <= 128) {
+		struct type4_sme *sme = ap_msg->message;
+		memset(sme, 0, sizeof(*sme));
+		ap_msg->length = sizeof(*sme);
+		sme->header.msg_fmt = TYPE4_SME_FMT;
+		sme->header.msg_len = sizeof(*sme);
+		sme->header.msg_type_code = TYPE4_TYPE_CODE;
+		sme->header.request_code = TYPE4_REQU_CODE;
+		modulus = sme->modulus + sizeof(sme->modulus) - mod_len;
+		exponent = sme->exponent + sizeof(sme->exponent) - mod_len;
+		message = sme->message + sizeof(sme->message) - mod_len;
+	} else {
+		struct type4_lme *lme = ap_msg->message;
+		memset(lme, 0, sizeof(*lme));
+		ap_msg->length = sizeof(*lme);
+		lme->header.msg_fmt = TYPE4_LME_FMT;
+		lme->header.msg_len = sizeof(*lme);
+		lme->header.msg_type_code = TYPE4_TYPE_CODE;
+		lme->header.request_code = TYPE4_REQU_CODE;
+		modulus = lme->modulus + sizeof(lme->modulus) - mod_len;
+		exponent = lme->exponent + sizeof(lme->exponent) - mod_len;
+		message = lme->message + sizeof(lme->message) - mod_len;
+	}
+
+	if (copy_from_user(modulus, mex->n_modulus, mod_len) ||
+	    copy_from_user(exponent, mex->b_key, mod_len) ||
+	    copy_from_user(message, mex->inputdata, mod_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type4 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type4CRT_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo_crt *crt)
+{
+	unsigned char *p, *q, *dp, *dq, *u, *inp;
+	int mod_len, short_len, long_len;
+
+	mod_len = crt->inputdatalength;
+	short_len = mod_len / 2;
+	long_len = mod_len / 2 + 8;
+
+	if (mod_len <= 128) {
+		struct type4_scr *scr = ap_msg->message;
+		memset(scr, 0, sizeof(*scr));
+		ap_msg->length = sizeof(*scr);
+		scr->header.msg_type_code = TYPE4_TYPE_CODE;
+		scr->header.request_code = TYPE4_REQU_CODE;
+		scr->header.msg_fmt = TYPE4_SCR_FMT;
+		scr->header.msg_len = sizeof(*scr);
+		p = scr->p + sizeof(scr->p) - long_len;
+		q = scr->q + sizeof(scr->q) - short_len;
+		dp = scr->dp + sizeof(scr->dp) - long_len;
+		dq = scr->dq + sizeof(scr->dq) - short_len;
+		u = scr->u + sizeof(scr->u) - long_len;
+		inp = scr->message + sizeof(scr->message) - mod_len;
+	} else {
+		struct type4_lcr *lcr = ap_msg->message;
+		memset(lcr, 0, sizeof(*lcr));
+		ap_msg->length = sizeof(*lcr);
+		lcr->header.msg_type_code = TYPE4_TYPE_CODE;
+		lcr->header.request_code = TYPE4_REQU_CODE;
+		lcr->header.msg_fmt = TYPE4_LCR_FMT;
+		lcr->header.msg_len = sizeof(*lcr);
+		p = lcr->p + sizeof(lcr->p) - long_len;
+		q = lcr->q + sizeof(lcr->q) - short_len;
+		dp = lcr->dp + sizeof(lcr->dp) - long_len;
+		dq = lcr->dq + sizeof(lcr->dq) - short_len;
+		u = lcr->u + sizeof(lcr->u) - long_len;
+		inp = lcr->message + sizeof(lcr->message) - mod_len;
+	}
+
+	if (copy_from_user(p, crt->np_prime, long_len) ||
+	    copy_from_user(q, crt->nq_prime, short_len) ||
+	    copy_from_user(dp, crt->bp_key, long_len) ||
+	    copy_from_user(dq, crt->bq_key, short_len) ||
+	    copy_from_user(u, crt->u_mult_inv, long_len) ||
+	    copy_from_user(inp, crt->inputdata, mod_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Copy results from a type 84 reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static inline int convert_type84(struct zcrypt_device *zdev,
+				 struct ap_message *reply,
+				 char __user *outputdata,
+				 unsigned int outputdatalength)
+{
+	struct type84_hdr *t84h = reply->message;
+	char *data;
+
+	if (t84h->len < sizeof(*t84h) + outputdatalength) {
+		/* The result is too short, the PCICA card may not do that.. */
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE);
+	data = reply->message + t84h->len - outputdatalength;
+	if (copy_to_user(outputdata, data, outputdatalength))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE84_RSP_CODE:
+		return convert_type84(zdev, reply,
+				      outputdata, outputdatalength);
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_pcica_receive(struct ap_device *ap_dev,
+				 struct ap_message *msg,
+				 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type84_hdr *t84h = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t84h->code == TYPE84_RSP_CODE) {
+		length = min(PCICA_MAX_RESPONSE_SIZE, (int) t84h->len);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the PCICA
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICA device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_pcica_modexpo(struct zcrypt_device *zdev,
+				 struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type4MEX_msg(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICA_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, mex->outputdata,
+				      mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCICA
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICA device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_pcica_modexpo_crt(struct zcrypt_device *zdev,
+				     struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type4CRT_msg(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICA_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, crt->outputdata,
+				      crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a PCICA card.
+ */
+static struct zcrypt_ops zcrypt_pcica_ops = {
+	.rsa_modexpo = zcrypt_pcica_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcica_modexpo_crt,
+};
+
+/**
+ * Probe function for PCICA cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcica_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(PCICA_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_pcica_ops;
+	zdev->online = 1;
+	zdev->user_space_type = ZCRYPT_PCICA;
+	zdev->type_string = "PCICA";
+	zdev->min_mod_size = PCICA_MIN_MOD_SIZE;
+	zdev->max_mod_size = PCICA_MAX_MOD_SIZE;
+	zdev->speed_rating = PCICA_SPEED_RATING;
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended PCICA driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_pcica_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_pcica_init(void)
+{
+	return ap_driver_register(&zcrypt_pcica_driver, THIS_MODULE, "pcica");
+}
+
+void zcrypt_pcica_exit(void)
+{
+	ap_driver_unregister(&zcrypt_pcica_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_pcica_init);
+module_exit(zcrypt_pcica_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_pcica.h b/drivers/s390/crypto/zcrypt_pcica.h
new file mode 100644
index 0000000..3be1118
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcica.h
@@ -0,0 +1,117 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcica.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_PCICA_H_
+#define _ZCRYPT_PCICA_H_
+
+/**
+ * The type 4 message family is associated with a PCICA card.
+ *
+ * The four members of the family are described below.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type4_hdr {
+	unsigned char  reserved1;
+	unsigned char  msg_type_code;	/* 0x04 */
+	unsigned short msg_len;
+	unsigned char  request_code;	/* 0x40 */
+	unsigned char  msg_fmt;
+	unsigned short reserved2;
+} __attribute__((packed));
+
+#define TYPE4_TYPE_CODE 0x04
+#define TYPE4_REQU_CODE 0x40
+
+#define TYPE4_SME_FMT 0x00
+#define TYPE4_LME_FMT 0x10
+#define TYPE4_SCR_FMT 0x40
+#define TYPE4_LCR_FMT 0x50
+
+/* Mod-Exp, with a small modulus */
+struct type4_sme {
+	struct type4_hdr header;
+	unsigned char	 message[128];
+	unsigned char	 exponent[128];
+	unsigned char	 modulus[128];
+} __attribute__((packed));
+
+/* Mod-Exp, with a large modulus */
+struct type4_lme {
+	struct type4_hdr header;
+	unsigned char	 message[256];
+	unsigned char	 exponent[256];
+	unsigned char	 modulus[256];
+} __attribute__((packed));
+
+/* CRT, with a small modulus */
+struct type4_scr {
+	struct type4_hdr header;
+	unsigned char	 message[128];
+	unsigned char	 dp[72];
+	unsigned char	 dq[64];
+	unsigned char	 p[72];
+	unsigned char	 q[64];
+	unsigned char	 u[72];
+} __attribute__((packed));
+
+/* CRT, with a large modulus */
+struct type4_lcr {
+	struct type4_hdr header;
+	unsigned char	 message[256];
+	unsigned char	 dp[136];
+	unsigned char	 dq[128];
+	unsigned char	 p[136];
+	unsigned char	 q[128];
+	unsigned char	 u[136];
+} __attribute__((packed));
+
+/**
+ * The type 84 response family is associated with a PCICA card.
+ *
+ * Note that all unsigned char arrays are right-justified and left-padded
+ * with zeroes.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+
+struct type84_hdr {
+	unsigned char  reserved1;
+	unsigned char  code;
+	unsigned short len;
+	unsigned char  reserved2[4];
+} __attribute__((packed));
+
+#define TYPE84_RSP_CODE 0x84
+
+int zcrypt_pcica_init(void);
+void zcrypt_pcica_exit(void);
+
+#endif /* _ZCRYPT_PCICA_H_ */
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
new file mode 100644
index 0000000..f295a40
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -0,0 +1,630 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcicc.c
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_pcicc.h"
+#include "zcrypt_cca_key.h"
+
+#define PCICC_MIN_MOD_SIZE	 64	/*  512 bits */
+#define PCICC_MAX_MOD_SIZE_OLD	128	/* 1024 bits */
+#define PCICC_MAX_MOD_SIZE	256	/* 2048 bits */
+
+/**
+ * PCICC cards need a speed rating of 0. This keeps them at the end of
+ * the zcrypt device list (see zcrypt_api.c). PCICC cards are only
+ * used if no other cards are present because they are slow and can only
+ * cope with PKCS12 padded requests. The logic is queer. PKCS11 padded
+ * requests are rejected. The modexpo function encrypts PKCS12 padded data
+ * and decrypts any non-PKCS12 padded data (except PKCS11) in the assumption
+ * that it's encrypted PKCS12 data. The modexpo_crt function always decrypts
+ * the data in the assumption that its PKCS12 encrypted data.
+ */
+#define PCICC_SPEED_RATING	0
+
+#define PCICC_MAX_MESSAGE_SIZE 0x710	/* max size type6 v1 crt message */
+#define PCICC_MAX_RESPONSE_SIZE 0x710	/* max size type86 v1 reply	 */
+
+#define PCICC_CLEANUP_TIME	(15*HZ)
+
+static struct ap_device_id zcrypt_pcicc_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_PCICC) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_pcicc_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("PCICC Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_pcicc_probe(struct ap_device *ap_dev);
+static void zcrypt_pcicc_remove(struct ap_device *ap_dev);
+static void zcrypt_pcicc_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_pcicc_driver = {
+	.probe = zcrypt_pcicc_probe,
+	.remove = zcrypt_pcicc_remove,
+	.receive = zcrypt_pcicc_receive,
+	.ids = zcrypt_pcicc_ids,
+};
+
+/**
+ * The following is used to initialize the CPRB passed to the PCICC card
+ * in a type6 message. The 3 fields that must be filled in at execution
+ * time are  req_parml, rpl_parml and usage_domain. Note that all three
+ * fields are *little*-endian. Actually, everything about this interface
+ * is ascii/little-endian, since the device has 'Intel inside'.
+ *
+ * The CPRB is followed immediately by the parm block.
+ * The parm block contains:
+ * - function code ('PD' 0x5044 or 'PK' 0x504B)
+ * - rule block (0x0A00 'PKCS-1.2' or 0x0A00 'ZERO-PAD')
+ * - VUD block
+ */
+static struct CPRB static_cprb = {
+	.cprb_len	= __constant_cpu_to_le16(0x0070),
+	.cprb_ver_id	=  0x41,
+	.func_id	= {0x54,0x32},
+	.checkpoint_flag=  0x01,
+	.svr_namel	= __constant_cpu_to_le16(0x0008),
+	.svr_name	= {'I','C','S','F',' ',' ',' ',' '}
+};
+
+/**
+ * Check the message for PKCS11 padding.
+ */
+static inline int is_PKCS11_padded(unsigned char *buffer, int length)
+{
+	int i;
+	if ((buffer[0] != 0x00) || (buffer[1] != 0x01))
+		return 0;
+	for (i = 2; i < length; i++)
+		if (buffer[i] != 0xFF)
+			break;
+	if (i < 10 || i == length)
+		return 0;
+	if (buffer[i] != 0x00)
+		return 0;
+	return 1;
+}
+
+/**
+ * Check the message for PKCS12 padding.
+ */
+static inline int is_PKCS12_padded(unsigned char *buffer, int length)
+{
+	int i;
+	if ((buffer[0] != 0x00) || (buffer[1] != 0x02))
+		return 0;
+	for (i = 2; i < length; i++)
+		if (buffer[i] == 0x00)
+			break;
+	if ((i < 10) || (i == length))
+		return 0;
+	if (buffer[i] != 0x00)
+		return 0;
+	return 1;
+}
+
+/**
+ * Convert a ICAMEX message to a type6 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type6MEX_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo *mex)
+{
+	static struct type6_hdr static_type6_hdr = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
+				   0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
+		.function_code	= {'P','K'},
+	};
+	static struct function_and_rules_block static_pke_function_and_rules ={
+		.function_code	= {'P','K'},
+		.ulen		= __constant_cpu_to_le16(10),
+		.only_rule	= {'P','K','C','S','-','1','.','2'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRB cprb;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int vud_len, pad_len, size;
+
+	/* VUD.ciphertext */
+	if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength))
+		return -EFAULT;
+
+	if (is_PKCS11_padded(msg->text, mex->inputdatalength))
+		return -EINVAL;
+
+	/* static message header and f&r */
+	msg->hdr = static_type6_hdr;
+	msg->fr = static_pke_function_and_rules;
+
+	if (is_PKCS12_padded(msg->text, mex->inputdatalength)) {
+		/* strip the padding and adjust the data length */
+		pad_len = strnlen(msg->text + 2, mex->inputdatalength - 2) + 3;
+		if (pad_len <= 9 || pad_len >= mex->inputdatalength)
+			return -ENODEV;
+		vud_len = mex->inputdatalength - pad_len;
+		memmove(msg->text, msg->text + pad_len, vud_len);
+		msg->length = cpu_to_le16(vud_len + 2);
+
+		/* Set up key after the variable length text. */
+		size = zcrypt_type6_mex_key_en(mex, msg->text + vud_len, 0);
+		if (size < 0)
+			return size;
+		size += sizeof(*msg) + vud_len;	/* total size of msg */
+	} else {
+		vud_len = mex->inputdatalength;
+		msg->length = cpu_to_le16(2 + vud_len);
+
+		msg->hdr.function_code[1] = 'D';
+		msg->fr.function_code[1] = 'D';
+
+		/* Set up key after the variable length text. */
+		size = zcrypt_type6_mex_key_de(mex, msg->text + vud_len, 0);
+		if (size < 0)
+			return size;
+		size += sizeof(*msg) + vud_len;	/* total size of msg */
+	}
+
+	/* message header, cprb and f&r */
+	msg->hdr.ToCardLen1 = (size - sizeof(msg->hdr) + 3) & -4;
+	msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprb = static_cprb;
+	msg->cprb.usage_domain[0]= AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprb.req_parml = cpu_to_le16(size - sizeof(msg->hdr) -
+					   sizeof(msg->cprb));
+	msg->cprb.rpl_parml = cpu_to_le16(msg->hdr.FromCardLen1);
+
+	ap_msg->length = (size + 3) & -4;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type6 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @zreq: crypto request pointer
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type6CRT_msg(struct zcrypt_device *zdev,
+				      struct ap_message *ap_msg,
+				      struct ica_rsa_modexpo_crt *crt)
+{
+	static struct type6_hdr static_type6_hdr = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
+				   0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
+		.function_code	= {'P','D'},
+	};
+	static struct function_and_rules_block static_pkd_function_and_rules ={
+		.function_code	= {'P','D'},
+		.ulen		= __constant_cpu_to_le16(10),
+		.only_rule	= {'P','K','C','S','-','1','.','2'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRB cprb;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int size;
+
+	/* VUD.ciphertext */
+	msg->length = cpu_to_le16(2 + crt->inputdatalength);
+	if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength))
+		return -EFAULT;
+
+	if (is_PKCS11_padded(msg->text, crt->inputdatalength))
+		return -EINVAL;
+
+	/* Set up key after the variable length text. */
+	size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 0);
+	if (size < 0)
+		return size;
+	size += sizeof(*msg) + crt->inputdatalength;	/* total size of msg */
+
+	/* message header, cprb and f&r */
+	msg->hdr = static_type6_hdr;
+	msg->hdr.ToCardLen1 = (size -  sizeof(msg->hdr) + 3) & -4;
+	msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprb = static_cprb;
+	msg->cprb.usage_domain[0] = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprb.req_parml = msg->cprb.rpl_parml =
+		cpu_to_le16(size - sizeof(msg->hdr) - sizeof(msg->cprb));
+
+	msg->fr = static_pkd_function_and_rules;
+
+	ap_msg->length = (size + 3) & -4;
+	return 0;
+}
+
+/**
+ * Copy results from a type 86 reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+struct type86_reply {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+	struct CPRB cprb;
+	unsigned char pad[4];	/* 4 byte function code/rules block ? */
+	unsigned short length;
+	char text[0];
+} __attribute__((packed));
+
+static int convert_type86(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char __user *outputdata,
+			  unsigned int outputdatalength)
+{
+	static unsigned char static_pad[] = {
+		0x00,0x02,
+		0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,
+		0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
+		0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,
+		0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
+		0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,
+		0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
+		0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,
+		0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
+		0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,
+		0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
+		0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,
+		0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
+		0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,
+		0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
+		0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,
+		0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
+		0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,
+		0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
+		0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,
+		0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
+		0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,
+		0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
+		0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,
+		0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
+		0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,
+		0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
+		0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,
+		0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
+		0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,
+		0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
+		0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,
+		0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
+	};
+	struct type86_reply *msg = reply->message;
+	unsigned short service_rc, service_rs;
+	unsigned int reply_len, pad_len;
+	char *data;
+
+	service_rc = le16_to_cpu(msg->cprb.ccp_rtcode);
+	if (unlikely(service_rc != 0)) {
+		service_rs = le16_to_cpu(msg->cprb.ccp_rscode);
+		if (service_rc == 8 && service_rs == 66) {
+			PDEBUG("Bad block format on PCICC\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 65) {
+			PDEBUG("Probably an even modulus on PCICC\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 770) {
+			PDEBUG("Invalid key length on PCICC\n");
+			zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		if (service_rc == 8 && service_rs == 783) {
+			PDEBUG("Extended bitlengths not enabled on PCICC\n");
+			zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		PRINTK("Unknown service rc/rs (PCICC): %d/%d\n",
+		       service_rc, service_rs);
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	data = msg->text;
+	reply_len = le16_to_cpu(msg->length) - 2;
+	if (reply_len > outputdatalength)
+		return -EINVAL;
+	/**
+	 * For all encipher requests, the length of the ciphertext (reply_len)
+	 * will always equal the modulus length. For MEX decipher requests
+	 * the output needs to get padded. Minimum pad size is 10.
+	 *
+	 * Currently, the cases where padding will be added is for:
+	 * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support
+	 *   ZERO-PAD and CRT is only supported for PKD requests)
+	 * - PCICC, always
+	 */
+	pad_len = outputdatalength - reply_len;
+	if (pad_len > 0) {
+		if (pad_len < 10)
+			return -EINVAL;
+		/* 'restore' padding left in the PCICC/PCIXCC card. */
+		if (copy_to_user(outputdata, static_pad, pad_len - 1))
+			return -EFAULT;
+		if (put_user(0, outputdata + pad_len - 1))
+			return -EFAULT;
+	}
+	/* Copy the crypto response to user space. */
+	if (copy_to_user(outputdata + pad_len, data, reply_len))
+		return -EFAULT;
+	return 0;
+}
+
+static int convert_response(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	struct type86_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (msg->hdr.type) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return convert_error(zdev, reply);
+		if (msg->cprb.cprb_ver_id == 0x01)
+			return convert_type86(zdev, reply,
+					      outputdata, outputdatalength);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_pcicc_receive(struct ap_device *ap_dev,
+				 struct ap_message *msg,
+				 struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct type86_reply *t86r = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t86r->hdr.type == TYPE86_RSP_CODE &&
+		 t86r->cprb.cprb_ver_id == 0x01) {
+		length = sizeof(struct type86_reply) + t86r->length - 2;
+		length = min(PCICC_MAX_RESPONSE_SIZE, length);
+		memcpy(msg->message, reply->message, length);
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete((struct completion *) msg->private);
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the PCICC
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICC device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_pcicc_modexpo(struct zcrypt_device *zdev,
+				 struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.length = PAGE_SIZE;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICAMEX_msg_to_type6MEX_msg(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, mex->outputdata,
+				      mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCICC
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCICC device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_pcicc_modexpo_crt(struct zcrypt_device *zdev,
+				     struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct completion work;
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.length = PAGE_SIZE;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &work;
+	rc = ICACRT_msg_to_type6CRT_msg(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&work, PCICC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response(zdev, &ap_msg, crt->outputdata,
+				      crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a PCICC card.
+ */
+static struct zcrypt_ops zcrypt_pcicc_ops = {
+	.rsa_modexpo = zcrypt_pcicc_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcicc_modexpo_crt,
+};
+
+/**
+ * Probe function for PCICC cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcicc_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(PCICC_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_pcicc_ops;
+	zdev->online = 1;
+	zdev->user_space_type = ZCRYPT_PCICC;
+	zdev->type_string = "PCICC";
+	zdev->min_mod_size = PCICC_MIN_MOD_SIZE;
+	zdev->max_mod_size = PCICC_MAX_MOD_SIZE;
+	zdev->speed_rating = PCICC_SPEED_RATING;
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+ out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended PCICC driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_pcicc_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_pcicc_init(void)
+{
+	return ap_driver_register(&zcrypt_pcicc_driver, THIS_MODULE, "pcicc");
+}
+
+void zcrypt_pcicc_exit(void)
+{
+	ap_driver_unregister(&zcrypt_pcicc_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_pcicc_init);
+module_exit(zcrypt_pcicc_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_pcicc.h b/drivers/s390/crypto/zcrypt_pcicc.h
new file mode 100644
index 0000000..6d44548
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcicc.h
@@ -0,0 +1,176 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcicc.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_PCICC_H_
+#define _ZCRYPT_PCICC_H_
+
+/**
+ * The type 6 message family is associated with PCICC or PCIXCC cards.
+ *
+ * It contains a message header followed by a CPRB, both of which
+ * are described below.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type6_hdr {
+	unsigned char reserved1;	/* 0x00				*/
+	unsigned char type;		/* 0x06				*/
+	unsigned char reserved2[2];	/* 0x0000			*/
+	unsigned char right[4];		/* 0x00000000			*/
+	unsigned char reserved3[2];	/* 0x0000			*/
+	unsigned char reserved4[2];	/* 0x0000			*/
+	unsigned char apfs[4];		/* 0x00000000			*/
+	unsigned int  offset1;		/* 0x00000058 (offset to CPRB)	*/
+	unsigned int  offset2;		/* 0x00000000			*/
+	unsigned int  offset3;		/* 0x00000000			*/
+	unsigned int  offset4;		/* 0x00000000			*/
+	unsigned char agent_id[16];	/* PCICC:			*/
+					/*    0x0100			*/
+					/*    0x4343412d4150504c202020	*/
+					/*    0x010101			*/
+					/* PCIXCC:			*/
+					/*    0x4341000000000000	*/
+					/*    0x0000000000000000	*/
+	unsigned char rqid[2];		/* rqid.  internal to 603	*/
+	unsigned char reserved5[2];	/* 0x0000			*/
+	unsigned char function_code[2];	/* for PKD, 0x5044 (ascii 'PD')	*/
+	unsigned char reserved6[2];	/* 0x0000			*/
+	unsigned int  ToCardLen1;	/* (request CPRB len + 3) & -4	*/
+	unsigned int  ToCardLen2;	/* db len 0x00000000 for PKD	*/
+	unsigned int  ToCardLen3;	/* 0x00000000			*/
+	unsigned int  ToCardLen4;	/* 0x00000000			*/
+	unsigned int  FromCardLen1;	/* response buffer length	*/
+	unsigned int  FromCardLen2;	/* db len 0x00000000 for PKD	*/
+	unsigned int  FromCardLen3;	/* 0x00000000			*/
+	unsigned int  FromCardLen4;	/* 0x00000000			*/
+} __attribute__((packed));
+
+/**
+ * CPRB
+ *	  Note that all shorts, ints and longs are little-endian.
+ *	  All pointer fields are 32-bits long, and mean nothing
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct CPRB {
+	unsigned short cprb_len;	/* CPRB length			 */
+	unsigned char cprb_ver_id;	/* CPRB version id.		 */
+	unsigned char pad_000;		/* Alignment pad byte.		 */
+	unsigned char srpi_rtcode[4];	/* SRPI return code LELONG	 */
+	unsigned char srpi_verb;	/* SRPI verb type		 */
+	unsigned char flags;		/* flags			 */
+	unsigned char func_id[2];	/* function id			 */
+	unsigned char checkpoint_flag;	/*				 */
+	unsigned char resv2;		/* reserved			 */
+	unsigned short req_parml;	/* request parameter buffer	 */
+					/* length 16-bit little endian	 */
+	unsigned char req_parmp[4];	/* request parameter buffer	 *
+					 * pointer (means nothing: the	 *
+					 * parameter buffer follows	 *
+					 * the CPRB).			 */
+	unsigned char req_datal[4];	/* request data buffer		 */
+					/* length	  ULELONG	 */
+	unsigned char req_datap[4];	/* request data buffer		 */
+					/* pointer			 */
+	unsigned short rpl_parml;	/* reply  parameter buffer	 */
+					/* length 16-bit little endian	 */
+	unsigned char pad_001[2];	/* Alignment pad bytes. ULESHORT */
+	unsigned char rpl_parmp[4];	/* reply parameter buffer	 *
+					 * pointer (means nothing: the	 *
+					 * parameter buffer follows	 *
+					 * the CPRB).			 */
+	unsigned char rpl_datal[4];	/* reply data buffer len ULELONG */
+	unsigned char rpl_datap[4];	/* reply data buffer		 */
+					/* pointer			 */
+	unsigned short ccp_rscode;	/* server reason code	ULESHORT */
+	unsigned short ccp_rtcode;	/* server return code	ULESHORT */
+	unsigned char repd_parml[2];	/* replied parameter len ULESHORT*/
+	unsigned char mac_data_len[2];	/* Mac Data Length	ULESHORT */
+	unsigned char repd_datal[4];	/* replied data length	ULELONG	 */
+	unsigned char req_pc[2];	/* PC identifier		 */
+	unsigned char res_origin[8];	/* resource origin		 */
+	unsigned char mac_value[8];	/* Mac Value			 */
+	unsigned char logon_id[8];	/* Logon Identifier		 */
+	unsigned char usage_domain[2];	/* cdx				 */
+	unsigned char resv3[18];	/* reserved for requestor	 */
+	unsigned short svr_namel;	/* server name length  ULESHORT	 */
+	unsigned char svr_name[8];	/* server name			 */
+} __attribute__((packed));
+
+/**
+ * The type 86 message family is associated with PCICC and PCIXCC cards.
+ *
+ * It contains a message header followed by a CPRB.  The CPRB is
+ * the same as the request CPRB, which is described above.
+ *
+ * If format is 1, an error condition exists and no data beyond
+ * the 8-byte message header is of interest.
+ *
+ * The non-error message is shown below.
+ *
+ * Note that all reserved fields must be zeroes.
+ */
+struct type86_hdr {
+	unsigned char reserved1;	/* 0x00				*/
+	unsigned char type;		/* 0x86				*/
+	unsigned char format;		/* 0x01 (error) or 0x02 (ok)	*/
+	unsigned char reserved2;	/* 0x00				*/
+	unsigned char reply_code;	/* reply code (see above)	*/
+	unsigned char reserved3[3];	/* 0x000000			*/
+} __attribute__((packed));
+
+#define TYPE86_RSP_CODE 0x86
+#define TYPE86_FMT2	0x02
+
+struct type86_fmt2_ext {
+	unsigned char	  reserved[4];	/* 0x00000000			*/
+	unsigned char	  apfs[4];	/* final status			*/
+	unsigned int	  count1;	/* length of CPRB + parameters	*/
+	unsigned int	  offset1;	/* offset to CPRB		*/
+	unsigned int	  count2;	/* 0x00000000			*/
+	unsigned int	  offset2;	/* db offset 0x00000000 for PKD	*/
+	unsigned int	  count3;	/* 0x00000000			*/
+	unsigned int	  offset3;	/* 0x00000000			*/
+	unsigned int	  count4;	/* 0x00000000			*/
+	unsigned int	  offset4;	/* 0x00000000			*/
+} __attribute__((packed));
+
+struct function_and_rules_block {
+	unsigned char function_code[2];
+	unsigned short ulen;
+	unsigned char only_rule[8];
+} __attribute__((packed));
+
+int zcrypt_pcicc_init(void);
+void zcrypt_pcicc_exit(void);
+
+#endif /* _ZCRYPT_PCICC_H_ */
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
new file mode 100644
index 0000000..2da8b93
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -0,0 +1,951 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcixcc.c
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#include "ap_bus.h"
+#include "zcrypt_api.h"
+#include "zcrypt_error.h"
+#include "zcrypt_pcicc.h"
+#include "zcrypt_pcixcc.h"
+#include "zcrypt_cca_key.h"
+
+#define PCIXCC_MIN_MOD_SIZE	 16	/*  128 bits	*/
+#define PCIXCC_MIN_MOD_SIZE_OLD	 64	/*  512 bits	*/
+#define PCIXCC_MAX_MOD_SIZE	256	/* 2048 bits	*/
+
+#define PCIXCC_MCL2_SPEED_RATING	7870	/* FIXME: needs finetuning */
+#define PCIXCC_MCL3_SPEED_RATING	7870
+#define CEX2C_SPEED_RATING		8540
+
+#define PCIXCC_MAX_ICA_MESSAGE_SIZE 0x77c  /* max size type6 v2 crt message */
+#define PCIXCC_MAX_ICA_RESPONSE_SIZE 0x77c /* max size type86 v2 reply	    */
+
+#define PCIXCC_MAX_XCRB_MESSAGE_SIZE (12*1024)
+#define PCIXCC_MAX_XCRB_RESPONSE_SIZE PCIXCC_MAX_XCRB_MESSAGE_SIZE
+#define PCIXCC_MAX_XCRB_DATA_SIZE (11*1024)
+#define PCIXCC_MAX_XCRB_REPLY_SIZE (5*1024)
+
+#define PCIXCC_MAX_RESPONSE_SIZE PCIXCC_MAX_XCRB_RESPONSE_SIZE
+
+#define PCIXCC_CLEANUP_TIME	(15*HZ)
+
+#define CEIL4(x) ((((x)+3)/4)*4)
+
+struct response_type {
+	struct completion work;
+	int type;
+};
+#define PCIXCC_RESPONSE_TYPE_ICA  0
+#define PCIXCC_RESPONSE_TYPE_XCRB 1
+
+static struct ap_device_id zcrypt_pcixcc_ids[] = {
+	{ AP_DEVICE(AP_DEVICE_TYPE_PCIXCC) },
+	{ AP_DEVICE(AP_DEVICE_TYPE_CEX2C) },
+	{ /* end of list */ },
+};
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+MODULE_DEVICE_TABLE(ap, zcrypt_pcixcc_ids);
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("PCIXCC Cryptographic Coprocessor device driver, "
+		   "Copyright 2001, 2006 IBM Corporation");
+MODULE_LICENSE("GPL");
+#endif
+
+static int zcrypt_pcixcc_probe(struct ap_device *ap_dev);
+static void zcrypt_pcixcc_remove(struct ap_device *ap_dev);
+static void zcrypt_pcixcc_receive(struct ap_device *, struct ap_message *,
+				 struct ap_message *);
+
+static struct ap_driver zcrypt_pcixcc_driver = {
+	.probe = zcrypt_pcixcc_probe,
+	.remove = zcrypt_pcixcc_remove,
+	.receive = zcrypt_pcixcc_receive,
+	.ids = zcrypt_pcixcc_ids,
+};
+
+/**
+ * The following is used to initialize the CPRBX passed to the PCIXCC/CEX2C
+ * card in a type6 message. The 3 fields that must be filled in at execution
+ * time are  req_parml, rpl_parml and usage_domain.
+ * Everything about this interface is ascii/big-endian, since the
+ * device does *not* have 'Intel inside'.
+ *
+ * The CPRBX is followed immediately by the parm block.
+ * The parm block contains:
+ * - function code ('PD' 0x5044 or 'PK' 0x504B)
+ * - rule block (one of:)
+ *   + 0x000A 'PKCS-1.2' (MCL2 'PD')
+ *   + 0x000A 'ZERO-PAD' (MCL2 'PK')
+ *   + 0x000A 'ZERO-PAD' (MCL3 'PD' or CEX2C 'PD')
+ *   + 0x000A 'MRP     ' (MCL3 'PK' or CEX2C 'PK')
+ * - VUD block
+ */
+static struct CPRBX static_cprbx = {
+	.cprb_len	=  0x00DC,
+	.cprb_ver_id	=  0x02,
+	.func_id	= {0x54,0x32},
+};
+
+/**
+ * Convert a ICAMEX message to a type6 MEX message.
+ *
+ * @zdev: crypto device pointer
+ * @ap_msg: pointer to AP message
+ * @mex: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo *mex)
+{
+	static struct type6_hdr static_type6_hdrX = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {'C','A',},
+		.function_code	= {'P','K'},
+	};
+	static struct function_and_rules_block static_pke_fnr = {
+		.function_code	= {'P','K'},
+		.ulen		= 10,
+		.only_rule	= {'M','R','P',' ',' ',' ',' ',' '}
+	};
+	static struct function_and_rules_block static_pke_fnr_MCL2 = {
+		.function_code	= {'P','K'},
+		.ulen		= 10,
+		.only_rule	= {'Z','E','R','O','-','P','A','D'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRBX cprbx;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int size;
+
+	/* VUD.ciphertext */
+	msg->length = mex->inputdatalength + 2;
+	if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength))
+		return -EFAULT;
+
+	/* Set up key which is located after the variable length text. */
+	size = zcrypt_type6_mex_key_en(mex, msg->text+mex->inputdatalength, 1);
+	if (size < 0)
+		return size;
+	size += sizeof(*msg) + mex->inputdatalength;
+
+	/* message header, cprbx and f&r */
+	msg->hdr = static_type6_hdrX;
+	msg->hdr.ToCardLen1 = size - sizeof(msg->hdr);
+	msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprbx = static_cprbx;
+	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.rpl_msgbl = msg->hdr.FromCardLen1;
+
+	msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ?
+		static_pke_fnr_MCL2 : static_pke_fnr;
+
+	msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx);
+
+	ap_msg->length = size;
+	return 0;
+}
+
+/**
+ * Convert a ICACRT message to a type6 CRT message.
+ *
+ * @zdev: crypto device pointer
+ * @ap_msg: pointer to AP message
+ * @crt: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_rsa_modexpo_crt *crt)
+{
+	static struct type6_hdr static_type6_hdrX = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+		.agent_id	= {'C','A',},
+		.function_code	= {'P','D'},
+	};
+	static struct function_and_rules_block static_pkd_fnr = {
+		.function_code	= {'P','D'},
+		.ulen		= 10,
+		.only_rule	= {'Z','E','R','O','-','P','A','D'}
+	};
+
+	static struct function_and_rules_block static_pkd_fnr_MCL2 = {
+		.function_code	= {'P','D'},
+		.ulen		= 10,
+		.only_rule	= {'P','K','C','S','-','1','.','2'}
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct CPRBX cprbx;
+		struct function_and_rules_block fr;
+		unsigned short length;
+		char text[0];
+	} __attribute__((packed)) *msg = ap_msg->message;
+	int size;
+
+	/* VUD.ciphertext */
+	msg->length = crt->inputdatalength + 2;
+	if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength))
+		return -EFAULT;
+
+	/* Set up key which is located after the variable length text. */
+	size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 1);
+	if (size < 0)
+		return size;
+	size += sizeof(*msg) + crt->inputdatalength;	/* total size of msg */
+
+	/* message header, cprbx and f&r */
+	msg->hdr = static_type6_hdrX;
+	msg->hdr.ToCardLen1 = size -  sizeof(msg->hdr);
+	msg->hdr.FromCardLen1 = PCIXCC_MAX_ICA_RESPONSE_SIZE - sizeof(msg->hdr);
+
+	msg->cprbx = static_cprbx;
+	msg->cprbx.domain = AP_QID_QUEUE(zdev->ap_dev->qid);
+	msg->cprbx.req_parml = msg->cprbx.rpl_msgbl =
+		size - sizeof(msg->hdr) - sizeof(msg->cprbx);
+
+	msg->fr = (zdev->user_space_type == ZCRYPT_PCIXCC_MCL2) ?
+		static_pkd_fnr_MCL2 : static_pkd_fnr;
+
+	ap_msg->length = size;
+	return 0;
+}
+
+/**
+ * Convert a XCRB message to a type6 CPRB message.
+ *
+ * @zdev: crypto device pointer
+ * @ap_msg: pointer to AP message
+ * @xcRB: pointer to user input data
+ *
+ * Returns 0 on success or -EFAULT.
+ */
+struct type86_fmt2_msg {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+} __attribute__((packed));
+
+static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev,
+				       struct ap_message *ap_msg,
+				       struct ica_xcRB *xcRB)
+{
+	static struct type6_hdr static_type6_hdrX = {
+		.type		=  0x06,
+		.offset1	=  0x00000058,
+	};
+	struct {
+		struct type6_hdr hdr;
+		struct ica_CPRBX cprbx;
+	} __attribute__((packed)) *msg = ap_msg->message;
+
+	int rcblen = CEIL4(xcRB->request_control_blk_length);
+	int replylen;
+	char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen;
+	char *function_code;
+
+	/* length checks */
+	ap_msg->length = sizeof(struct type6_hdr) +
+		CEIL4(xcRB->request_control_blk_length) +
+		xcRB->request_data_length;
+	if (ap_msg->length > PCIXCC_MAX_XCRB_MESSAGE_SIZE) {
+		PRINTK("Combined message is too large (%ld/%d/%d).\n",
+		    sizeof(struct type6_hdr),
+		    xcRB->request_control_blk_length,
+		    xcRB->request_data_length);
+		return -EFAULT;
+	}
+	if (CEIL4(xcRB->reply_control_blk_length) >
+	    PCIXCC_MAX_XCRB_REPLY_SIZE) {
+		PDEBUG("Reply CPRB length is too large (%d).\n",
+		    xcRB->request_control_blk_length);
+		return -EFAULT;
+	}
+	if (CEIL4(xcRB->reply_data_length) > PCIXCC_MAX_XCRB_DATA_SIZE) {
+		PDEBUG("Reply data block length is too large (%d).\n",
+		    xcRB->reply_data_length);
+		return -EFAULT;
+	}
+	replylen = CEIL4(xcRB->reply_control_blk_length) +
+		CEIL4(xcRB->reply_data_length) +
+		sizeof(struct type86_fmt2_msg);
+	if (replylen > PCIXCC_MAX_XCRB_RESPONSE_SIZE) {
+		PDEBUG("Reply CPRB + data block > PCIXCC_MAX_XCRB_RESPONSE_SIZE"
+		       " (%d/%d/%d).\n",
+		       sizeof(struct type86_fmt2_msg),
+		       xcRB->reply_control_blk_length,
+		       xcRB->reply_data_length);
+		xcRB->reply_control_blk_length = PCIXCC_MAX_XCRB_RESPONSE_SIZE -
+			(sizeof(struct type86_fmt2_msg) +
+			    CEIL4(xcRB->reply_data_length));
+		PDEBUG("Capping Reply CPRB length at %d\n",
+		       xcRB->reply_control_blk_length);
+	}
+
+	/* prepare type6 header */
+	msg->hdr = static_type6_hdrX;
+	memcpy(msg->hdr.agent_id , &(xcRB->agent_ID), sizeof(xcRB->agent_ID));
+	msg->hdr.ToCardLen1 = xcRB->request_control_blk_length;
+	if (xcRB->request_data_length) {
+		msg->hdr.offset2 = msg->hdr.offset1 + rcblen;
+		msg->hdr.ToCardLen2 = xcRB->request_data_length;
+	}
+	msg->hdr.FromCardLen1 = xcRB->reply_control_blk_length;
+	msg->hdr.FromCardLen2 = xcRB->reply_data_length;
+
+	/* prepare CPRB */
+	if (copy_from_user(&(msg->cprbx), xcRB->request_control_blk_addr,
+		    xcRB->request_control_blk_length))
+		return -EFAULT;
+	if (msg->cprbx.cprb_len + sizeof(msg->hdr.function_code) >
+	    xcRB->request_control_blk_length) {
+		PDEBUG("cprb_len too large (%d/%d)\n", msg->cprbx.cprb_len,
+		    xcRB->request_control_blk_length);
+		return -EFAULT;
+	}
+	function_code = ((unsigned char *)&msg->cprbx) + msg->cprbx.cprb_len;
+	memcpy(msg->hdr.function_code, function_code, sizeof(msg->hdr.function_code));
+
+	/* copy data block */
+	if (xcRB->request_data_length &&
+	    copy_from_user(req_data, xcRB->request_data_address,
+		xcRB->request_data_length))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Copy results from a type 86 ICA reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @data: pointer to user output data
+ * @length: size of user output data
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+struct type86x_reply {
+	struct type86_hdr hdr;
+	struct type86_fmt2_ext fmt2;
+	struct CPRBX cprbx;
+	unsigned char pad[4];	/* 4 byte function code/rules block ? */
+	unsigned short length;
+	char text[0];
+} __attribute__((packed));
+
+static int convert_type86_ica(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char __user *outputdata,
+			  unsigned int outputdatalength)
+{
+	static unsigned char static_pad[] = {
+		0x00,0x02,
+		0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,
+		0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
+		0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,
+		0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
+		0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,
+		0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
+		0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,
+		0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
+		0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,
+		0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
+		0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,
+		0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
+		0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,
+		0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
+		0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,
+		0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
+		0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,
+		0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
+		0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,
+		0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
+		0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,
+		0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
+		0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,
+		0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
+		0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,
+		0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
+		0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,
+		0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
+		0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,
+		0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
+		0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,
+		0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
+	};
+	struct type86x_reply *msg = reply->message;
+	unsigned short service_rc, service_rs;
+	unsigned int reply_len, pad_len;
+	char *data;
+
+	service_rc = msg->cprbx.ccp_rtcode;
+	if (unlikely(service_rc != 0)) {
+		service_rs = msg->cprbx.ccp_rscode;
+		if (service_rc == 8 && service_rs == 66) {
+			PDEBUG("Bad block format on PCIXCC/CEX2C\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 65) {
+			PDEBUG("Probably an even modulus on PCIXCC/CEX2C\n");
+			return -EINVAL;
+		}
+		if (service_rc == 8 && service_rs == 770) {
+			PDEBUG("Invalid key length on PCIXCC/CEX2C\n");
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		if (service_rc == 8 && service_rs == 783) {
+			PDEBUG("Extended bitlengths not enabled on PCIXCC/CEX2C\n");
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			return -EAGAIN;
+		}
+		PRINTK("Unknown service rc/rs (PCIXCC/CEX2C): %d/%d\n",
+		       service_rc, service_rs);
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+	data = msg->text;
+	reply_len = msg->length - 2;
+	if (reply_len > outputdatalength)
+		return -EINVAL;
+	/**
+	 * For all encipher requests, the length of the ciphertext (reply_len)
+	 * will always equal the modulus length. For MEX decipher requests
+	 * the output needs to get padded. Minimum pad size is 10.
+	 *
+	 * Currently, the cases where padding will be added is for:
+	 * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support
+	 *   ZERO-PAD and CRT is only supported for PKD requests)
+	 * - PCICC, always
+	 */
+	pad_len = outputdatalength - reply_len;
+	if (pad_len > 0) {
+		if (pad_len < 10)
+			return -EINVAL;
+		/* 'restore' padding left in the PCICC/PCIXCC card. */
+		if (copy_to_user(outputdata, static_pad, pad_len - 1))
+			return -EFAULT;
+		if (put_user(0, outputdata + pad_len - 1))
+			return -EFAULT;
+	}
+	/* Copy the crypto response to user space. */
+	if (copy_to_user(outputdata + pad_len, data, reply_len))
+		return -EFAULT;
+	return 0;
+}
+
+/**
+ * Copy results from a type 86 XCRB reply message back to user space.
+ *
+ * @zdev: crypto device pointer
+ * @reply: reply AP message.
+ * @xcRB: pointer to XCRB
+ *
+ * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
+ */
+static int convert_type86_xcrb(struct zcrypt_device *zdev,
+			       struct ap_message *reply,
+			       struct ica_xcRB *xcRB)
+{
+	struct type86_fmt2_msg *msg = reply->message;
+	char *data = reply->message;
+
+	/* Copy CPRB to user */
+	if (copy_to_user(xcRB->reply_control_blk_addr,
+		data + msg->fmt2.offset1, msg->fmt2.count1))
+		return -EFAULT;
+	xcRB->reply_control_blk_length = msg->fmt2.count1;
+
+	/* Copy data buffer to user */
+	if (msg->fmt2.count2)
+		if (copy_to_user(xcRB->reply_data_addr,
+			data + msg->fmt2.offset2, msg->fmt2.count2))
+			return -EFAULT;
+	xcRB->reply_data_length = msg->fmt2.count2;
+	return 0;
+}
+
+static int convert_response_ica(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
+{
+	struct type86x_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return convert_error(zdev, reply);
+		if (msg->cprbx.cprb_ver_id == 0x02)
+			return convert_type86_ica(zdev, reply,
+						  outputdata, outputdatalength);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+static int convert_response_xcrb(struct zcrypt_device *zdev,
+			    struct ap_message *reply,
+			    struct ica_xcRB *xcRB)
+{
+	struct type86x_reply *msg = reply->message;
+
+	/* Response type byte is the second byte in the response. */
+	switch (((unsigned char *) reply->message)[1]) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
+		return convert_error(zdev, reply);
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code) {
+			memcpy(&(xcRB->status), msg->fmt2.apfs, sizeof(u32));
+			return convert_error(zdev, reply);
+		}
+		if (msg->cprbx.cprb_ver_id == 0x02)
+			return convert_type86_xcrb(zdev, reply, xcRB);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
+/**
+ * This function is called from the AP bus code after a crypto request
+ * "msg" has finished with the reply message "reply".
+ * It is called from tasklet context.
+ * @ap_dev: pointer to the AP device
+ * @msg: pointer to the AP message
+ * @reply: pointer to the AP reply message
+ */
+static void zcrypt_pcixcc_receive(struct ap_device *ap_dev,
+				  struct ap_message *msg,
+				  struct ap_message *reply)
+{
+	static struct error_hdr error_reply = {
+		.type = TYPE82_RSP_CODE,
+		.reply_code = REP82_ERROR_MACHINE_FAILURE,
+	};
+	struct response_type *resp_type =
+		(struct response_type *) msg->private;
+	struct type86x_reply *t86r = reply->message;
+	int length;
+
+	/* Copy the reply message to the request message buffer. */
+	if (IS_ERR(reply))
+		memcpy(msg->message, &error_reply, sizeof(error_reply));
+	else if (t86r->hdr.type == TYPE86_RSP_CODE &&
+		 t86r->cprbx.cprb_ver_id == 0x02) {
+		switch (resp_type->type) {
+		case PCIXCC_RESPONSE_TYPE_ICA:
+			length = sizeof(struct type86x_reply)
+				+ t86r->length - 2;
+			length = min(PCIXCC_MAX_ICA_RESPONSE_SIZE, length);
+			memcpy(msg->message, reply->message, length);
+			break;
+		case PCIXCC_RESPONSE_TYPE_XCRB:
+			length = t86r->fmt2.offset2 + t86r->fmt2.count2;
+			length = min(PCIXCC_MAX_XCRB_RESPONSE_SIZE, length);
+			memcpy(msg->message, reply->message, length);
+			break;
+		default:
+			PRINTK("Invalid internal response type: %i\n",
+			    resp_type->type);
+			memcpy(msg->message, &error_reply,
+			    sizeof error_reply);
+		}
+	} else
+		memcpy(msg->message, reply->message, sizeof error_reply);
+	complete(&(resp_type->work));
+}
+
+static atomic_t zcrypt_step = ATOMIC_INIT(0);
+
+/**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to handle a modexpo request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @mex: pointer to the modexpo request buffer
+ */
+static long zcrypt_pcixcc_modexpo(struct zcrypt_device *zdev,
+				  struct ica_rsa_modexpo *mex)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_ICA,
+	};
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rc = ICAMEX_msg_to_type6MEX_msgX(zdev, &ap_msg, mex);
+	if (rc)
+		goto out_free;
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&resp_type.work, PCIXCC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response_ica(zdev, &ap_msg, mex->outputdata,
+					  mex->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to handle a modexpo_crt request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @crt: pointer to the modexpoc_crt request buffer
+ */
+static long zcrypt_pcixcc_modexpo_crt(struct zcrypt_device *zdev,
+				      struct ica_rsa_modexpo_crt *crt)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_ICA,
+	};
+	int rc;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rc = ICACRT_msg_to_type6CRT_msgX(zdev, &ap_msg, crt);
+	if (rc)
+		goto out_free;
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&resp_type.work, PCIXCC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response_ica(zdev, &ap_msg, crt->outputdata,
+					  crt->outputdatalength);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to handle a send_cprb request.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @xcRB: pointer to the send_cprb request buffer
+ */
+long zcrypt_pcixcc_send_cprb(struct zcrypt_device *zdev, struct ica_xcRB *xcRB)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_XCRB,
+	};
+	int rc;
+
+	ap_msg.message = (void *) kmalloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rc = XCRB_msg_to_type6CPRB_msgX(zdev, &ap_msg, xcRB);
+	if (rc)
+		goto out_free;
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible_timeout(
+				&resp_type.work, PCIXCC_CLEANUP_TIME);
+	if (rc > 0)
+		rc = convert_response_xcrb(zdev, &ap_msg, xcRB);
+	else {
+		/* Signal pending or message timed out. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+		if (rc == 0)
+			/* Message timed out. */
+			rc = -ETIME;
+	}
+out_free:
+	memset(ap_msg.message, 0x0, ap_msg.length);
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
+ * The crypto operations for a PCIXCC/CEX2C card.
+ */
+static struct zcrypt_ops zcrypt_pcixcc_ops = {
+	.rsa_modexpo = zcrypt_pcixcc_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcixcc_modexpo_crt,
+	.send_cprb = zcrypt_pcixcc_send_cprb,
+};
+
+/**
+ * Micro-code detection function. Its sends a message to a pcixcc card
+ * to find out the microcode level.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcixcc_mcl(struct ap_device *ap_dev)
+{
+	static unsigned char msg[] = {
+		0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x43,0x41,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x00,
+		0x00,0x00,0x01,0xC4,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x07,0x24,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0xDC,0x02,0x00,0x00,0x00,0x54,0x32,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xE8,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x24,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x00,0x00,0x00,0x00,0x50,0x4B,0x00,0x0A,
+		0x4D,0x52,0x50,0x20,0x20,0x20,0x20,0x20,
+		0x00,0x42,0x00,0x01,0x02,0x03,0x04,0x05,
+		0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,
+		0x0E,0x0F,0x00,0x11,0x22,0x33,0x44,0x55,
+		0x66,0x77,0x88,0x99,0xAA,0xBB,0xCC,0xDD,
+		0xEE,0xFF,0xFF,0xEE,0xDD,0xCC,0xBB,0xAA,
+		0x99,0x88,0x77,0x66,0x55,0x44,0x33,0x22,
+		0x11,0x00,0x01,0x23,0x45,0x67,0x89,0xAB,
+		0xCD,0xEF,0xFE,0xDC,0xBA,0x98,0x76,0x54,
+		0x32,0x10,0x00,0x9A,0x00,0x98,0x00,0x00,
+		0x1E,0x00,0x00,0x94,0x00,0x00,0x00,0x00,
+		0x04,0x00,0x00,0x8C,0x00,0x00,0x00,0x40,
+		0x02,0x00,0x00,0x40,0xBA,0xE8,0x23,0x3C,
+		0x75,0xF3,0x91,0x61,0xD6,0x73,0x39,0xCF,
+		0x7B,0x6D,0x8E,0x61,0x97,0x63,0x9E,0xD9,
+		0x60,0x55,0xD6,0xC7,0xEF,0xF8,0x1E,0x63,
+		0x95,0x17,0xCC,0x28,0x45,0x60,0x11,0xC5,
+		0xC4,0x4E,0x66,0xC6,0xE6,0xC3,0xDE,0x8A,
+		0x19,0x30,0xCF,0x0E,0xD7,0xAA,0xDB,0x01,
+		0xD8,0x00,0xBB,0x8F,0x39,0x9F,0x64,0x28,
+		0xF5,0x7A,0x77,0x49,0xCC,0x6B,0xA3,0x91,
+		0x97,0x70,0xE7,0x60,0x1E,0x39,0xE1,0xE5,
+		0x33,0xE1,0x15,0x63,0x69,0x08,0x80,0x4C,
+		0x67,0xC4,0x41,0x8F,0x48,0xDF,0x26,0x98,
+		0xF1,0xD5,0x8D,0x88,0xD9,0x6A,0xA4,0x96,
+		0xC5,0x84,0xD9,0x30,0x49,0x67,0x7D,0x19,
+		0xB1,0xB3,0x45,0x4D,0xB2,0x53,0x9A,0x47,
+		0x3C,0x7C,0x55,0xBF,0xCC,0x85,0x00,0x36,
+		0xF1,0x3D,0x93,0x53
+	};
+	unsigned long long psmid;
+	struct CPRBX *cprbx;
+	char *reply;
+	int rc, i;
+
+	reply = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reply)
+		return -ENOMEM;
+
+	rc = ap_send(ap_dev->qid, 0x0102030405060708ULL, msg, sizeof(msg));
+	if (rc)
+		goto out_free;
+
+	/* Wait for the test message to complete. */
+	for (i = 0; i < 6; i++) {
+		mdelay(300);
+		rc = ap_recv(ap_dev->qid, &psmid, reply, 4096);
+		if (rc == 0 && psmid == 0x0102030405060708ULL)
+			break;
+	}
+
+	if (i >= 6) {
+		/* Got no answer. */
+		rc = -ENODEV;
+		goto out_free;
+	}
+
+	cprbx = (struct CPRBX *) (reply + 48);
+	if (cprbx->ccp_rtcode == 8 && cprbx->ccp_rscode == 33)
+		rc = ZCRYPT_PCIXCC_MCL2;
+	else
+		rc = ZCRYPT_PCIXCC_MCL3;
+out_free:
+	free_page((unsigned long) reply);
+	return rc;
+}
+
+/**
+ * Probe function for PCIXCC/CEX2C cards. It always accepts the AP device
+ * since the bus_match already checked the hardware type. The PCIXCC
+ * cards come in two flavours: micro code level 2 and micro code level 3.
+ * This is checked by sending a test message to the device.
+ * @ap_dev: pointer to the AP device.
+ */
+static int zcrypt_pcixcc_probe(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	zdev = zcrypt_device_alloc(PCIXCC_MAX_RESPONSE_SIZE);
+	if (!zdev)
+		return -ENOMEM;
+	zdev->ap_dev = ap_dev;
+	zdev->ops = &zcrypt_pcixcc_ops;
+	zdev->online = 1;
+	if (ap_dev->device_type == AP_DEVICE_TYPE_PCIXCC) {
+		rc = zcrypt_pcixcc_mcl(ap_dev);
+		if (rc < 0) {
+			zcrypt_device_free(zdev);
+			return rc;
+		}
+		zdev->user_space_type = rc;
+		if (rc == ZCRYPT_PCIXCC_MCL2) {
+			zdev->type_string = "PCIXCC_MCL2";
+			zdev->speed_rating = PCIXCC_MCL2_SPEED_RATING;
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD;
+			zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+		} else {
+			zdev->type_string = "PCIXCC_MCL3";
+			zdev->speed_rating = PCIXCC_MCL3_SPEED_RATING;
+			zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
+			zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+		}
+	} else {
+		zdev->user_space_type = ZCRYPT_CEX2C;
+		zdev->type_string = "CEX2C";
+		zdev->speed_rating = CEX2C_SPEED_RATING;
+		zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
+		zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
+	}
+	ap_dev->reply = &zdev->reply;
+	ap_dev->private = zdev;
+	rc = zcrypt_device_register(zdev);
+	if (rc)
+		goto out_free;
+	return 0;
+
+ out_free:
+	ap_dev->private = NULL;
+	zcrypt_device_free(zdev);
+	return rc;
+}
+
+/**
+ * This is called to remove the extended PCIXCC/CEX2C driver information
+ * if an AP device is removed.
+ */
+static void zcrypt_pcixcc_remove(struct ap_device *ap_dev)
+{
+	struct zcrypt_device *zdev = ap_dev->private;
+
+	zcrypt_device_unregister(zdev);
+}
+
+int __init zcrypt_pcixcc_init(void)
+{
+	return ap_driver_register(&zcrypt_pcixcc_driver, THIS_MODULE, "pcixcc");
+}
+
+void zcrypt_pcixcc_exit(void)
+{
+	ap_driver_unregister(&zcrypt_pcixcc_driver);
+}
+
+#ifndef CONFIG_ZCRYPT_MONOLITHIC
+module_init(zcrypt_pcixcc_init);
+module_exit(zcrypt_pcixcc_exit);
+#endif
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h
new file mode 100644
index 0000000..a78ff30
--- /dev/null
+++ b/drivers/s390/crypto/zcrypt_pcixcc.h
@@ -0,0 +1,79 @@
+/*
+ *  linux/drivers/s390/crypto/zcrypt_pcixcc.h
+ *
+ *  zcrypt 2.1.0
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _ZCRYPT_PCIXCC_H_
+#define _ZCRYPT_PCIXCC_H_
+
+/**
+ * CPRBX
+ *	  Note that all shorts and ints are big-endian.
+ *	  All pointer fields are 16 bytes long, and mean nothing.
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct CPRBX {
+	unsigned short cprb_len;	/* CPRB length	      220	 */
+	unsigned char  cprb_ver_id;	/* CPRB version id.   0x02	 */
+	unsigned char  pad_000[3];	/* Alignment pad bytes		 */
+	unsigned char  func_id[2];	/* function id	      0x5432	 */
+	unsigned char  cprb_flags[4];	/* Flags			 */
+	unsigned int   req_parml;	/* request parameter buffer len	 */
+	unsigned int   req_datal;	/* request data buffer		 */
+	unsigned int   rpl_msgbl;	/* reply  message block length	 */
+	unsigned int   rpld_parml;	/* replied parameter block len	 */
+	unsigned int   rpl_datal;	/* reply data block len		 */
+	unsigned int   rpld_datal;	/* replied data block len	 */
+	unsigned int   req_extbl;	/* request extension block len	 */
+	unsigned char  pad_001[4];	/* reserved			 */
+	unsigned int   rpld_extbl;	/* replied extension block len	 */
+	unsigned char  req_parmb[16];	/* request parm block 'address'	 */
+	unsigned char  req_datab[16];	/* request data block 'address'	 */
+	unsigned char  rpl_parmb[16];	/* reply parm block 'address'	 */
+	unsigned char  rpl_datab[16];	/* reply data block 'address'	 */
+	unsigned char  req_extb[16];	/* request extension block 'addr'*/
+	unsigned char  rpl_extb[16];	/* reply extension block 'addres'*/
+	unsigned short ccp_rtcode;	/* server return code		 */
+	unsigned short ccp_rscode;	/* server reason code		 */
+	unsigned int   mac_data_len;	/* Mac Data Length		 */
+	unsigned char  logon_id[8];	/* Logon Identifier		 */
+	unsigned char  mac_value[8];	/* Mac Value			 */
+	unsigned char  mac_content_flgs;/* Mac content flag byte	 */
+	unsigned char  pad_002;		/* Alignment			 */
+	unsigned short domain;		/* Domain			 */
+	unsigned char  pad_003[12];	/* Domain masks			 */
+	unsigned char  pad_004[36];	/* reserved			 */
+} __attribute__((packed));
+
+int zcrypt_pcixcc_init(void);
+void zcrypt_pcixcc_exit(void);
+
+#endif /* _ZCRYPT_PCIXCC_H_ */
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 5399c5d..a914129 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -19,9 +19,6 @@
 
 #include "s390mach.h"
 
-#define DBG printk
-// #define DBG(args,...) do {} while (0);
-
 static struct semaphore m_sem;
 
 extern int css_process_crw(int, int);
@@ -83,11 +80,11 @@
 		ccode = stcrw(&crw[chain]);
 		if (ccode != 0)
 			break;
-		DBG(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
-		    "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
-		    crw[chain].slct, crw[chain].oflw, crw[chain].chn,
-		    crw[chain].rsc, crw[chain].anc, crw[chain].erc,
-		    crw[chain].rsid);
+		printk(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
+		       "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
+		       crw[chain].slct, crw[chain].oflw, crw[chain].chn,
+		       crw[chain].rsc, crw[chain].anc, crw[chain].erc,
+		       crw[chain].rsid);
 		/* Check for overflows. */
 		if (crw[chain].oflw) {
 			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
@@ -117,8 +114,8 @@
 			 * reported to the common I/O layer.
 			 */
 			if (crw[chain].slct) {
-				DBG(KERN_INFO"solicited machine check for "
-				    "channel path %02X\n", crw[0].rsid);
+				pr_debug("solicited machine check for "
+					 "channel path %02X\n", crw[0].rsid);
 				break;
 			}
 			switch (crw[0].erc) {
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index adc9d8f..5d39b2d 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -189,6 +189,10 @@
 	struct zfcp_fsf_req *request, *tmp;
 	unsigned int i;
 
+	/* 0 is reserved as an invalid req_id */
+	if (req_id == 0)
+		return NULL;
+
 	i = req_id % REQUEST_LIST_SIZE;
 
 	list_for_each_entry_safe(request, tmp, &adapter->req_list[i], list)
@@ -299,11 +303,45 @@
 	return;
 }
 
+static int calc_alignment(int size)
+{
+	int align = 1;
+
+	if (!size)
+		return 0;
+
+	while ((size - align) > 0)
+		align <<= 1;
+
+	return align;
+}
+
 static int __init
 zfcp_module_init(void)
 {
+	int retval = -ENOMEM;
+	int size, align;
 
-	int retval = 0;
+	size = sizeof(struct zfcp_fsf_req_qtcb);
+	align = calc_alignment(size);
+	zfcp_data.fsf_req_qtcb_cache =
+		kmem_cache_create("zfcp_fsf", size, align, 0, NULL, NULL);
+	if (!zfcp_data.fsf_req_qtcb_cache)
+		goto out;
+
+	size = sizeof(struct fsf_status_read_buffer);
+	align = calc_alignment(size);
+	zfcp_data.sr_buffer_cache =
+		kmem_cache_create("zfcp_sr", size, align, 0, NULL, NULL);
+	if (!zfcp_data.sr_buffer_cache)
+		goto out_sr_cache;
+
+	size = sizeof(struct zfcp_gid_pn_data);
+	align = calc_alignment(size);
+	zfcp_data.gid_pn_cache =
+		kmem_cache_create("zfcp_gid", size, align, 0, NULL, NULL);
+	if (!zfcp_data.gid_pn_cache)
+		goto out_gid_cache;
 
 	atomic_set(&zfcp_data.loglevel, loglevel);
 
@@ -313,15 +351,16 @@
 	/* initialize adapters to be removed list head */
 	INIT_LIST_HEAD(&zfcp_data.adapter_remove_lh);
 
-	zfcp_transport_template = fc_attach_transport(&zfcp_transport_functions);
-	if (!zfcp_transport_template)
-		return -ENODEV;
+	zfcp_data.scsi_transport_template =
+		fc_attach_transport(&zfcp_transport_functions);
+	if (!zfcp_data.scsi_transport_template)
+		goto out_transport;
 
 	retval = misc_register(&zfcp_cfdc_misc);
 	if (retval != 0) {
 		ZFCP_LOG_INFO("registration of misc device "
 			      "zfcp_cfdc failed\n");
-		goto out;
+		goto out_misc;
 	}
 
 	ZFCP_LOG_TRACE("major/minor for zfcp_cfdc: %d/%d\n",
@@ -333,9 +372,6 @@
 	/* initialise configuration rw lock */
 	rwlock_init(&zfcp_data.config_lock);
 
-	/* save address of data structure managing the driver module */
-	zfcp_data.scsi_host_template.module = THIS_MODULE;
-
 	/* setup dynamic I/O */
 	retval = zfcp_ccw_register();
 	if (retval) {
@@ -350,6 +386,14 @@
 
  out_ccw_register:
 	misc_deregister(&zfcp_cfdc_misc);
+ out_misc:
+	fc_release_transport(zfcp_data.scsi_transport_template);
+ out_transport:
+	kmem_cache_destroy(zfcp_data.gid_pn_cache);
+ out_gid_cache:
+	kmem_cache_destroy(zfcp_data.sr_buffer_cache);
+ out_sr_cache:
+	kmem_cache_destroy(zfcp_data.fsf_req_qtcb_cache);
  out:
 	return retval;
 }
@@ -935,20 +979,20 @@
 zfcp_allocate_low_mem_buffers(struct zfcp_adapter *adapter)
 {
 	adapter->pool.fsf_req_erp =
-		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ERP_NR,
-				sizeof(struct zfcp_fsf_req_pool_element));
+		mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_ERP_NR,
+					 zfcp_data.fsf_req_qtcb_cache);
 	if (!adapter->pool.fsf_req_erp)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_scsi =
-		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_SCSI_NR,
-				sizeof(struct zfcp_fsf_req_pool_element));
+		mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_SCSI_NR,
+					 zfcp_data.fsf_req_qtcb_cache);
 	if (!adapter->pool.fsf_req_scsi)
 		return -ENOMEM;
 
 	adapter->pool.fsf_req_abort =
-		mempool_create_kmalloc_pool(ZFCP_POOL_FSF_REQ_ABORT_NR,
-				sizeof(struct zfcp_fsf_req_pool_element));
+		mempool_create_slab_pool(ZFCP_POOL_FSF_REQ_ABORT_NR,
+					 zfcp_data.fsf_req_qtcb_cache);
 	if (!adapter->pool.fsf_req_abort)
 		return -ENOMEM;
 
@@ -959,14 +1003,14 @@
 		return -ENOMEM;
 
 	adapter->pool.data_status_read =
-		mempool_create_kmalloc_pool(ZFCP_POOL_STATUS_READ_NR,
-					sizeof(struct fsf_status_read_buffer));
+		mempool_create_slab_pool(ZFCP_POOL_STATUS_READ_NR,
+					 zfcp_data.sr_buffer_cache);
 	if (!adapter->pool.data_status_read)
 		return -ENOMEM;
 
 	adapter->pool.data_gid_pn =
-		mempool_create_kmalloc_pool(ZFCP_POOL_DATA_GID_PN_NR,
-					    sizeof(struct zfcp_gid_pn_data));
+		mempool_create_slab_pool(ZFCP_POOL_DATA_GID_PN_NR,
+					 zfcp_data.gid_pn_cache);
 	if (!adapter->pool.data_gid_pn)
 		return -ENOMEM;
 
@@ -1091,9 +1135,6 @@
 	/* initialize lock of associated request queue */
 	rwlock_init(&adapter->request_queue.queue_lock);
 
-	/* intitialise SCSI ER timer */
-	init_timer(&adapter->scsi_er_timer);
-
 	/* mark adapter unusable as long as sysfs registration is not complete */
 	atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status);
 
@@ -1609,7 +1650,6 @@
 	gid_pn->ct.handler = zfcp_ns_gid_pn_handler;
 	gid_pn->ct.handler_data = (unsigned long) gid_pn;
         gid_pn->ct.timeout = ZFCP_NS_GID_PN_TIMEOUT;
-        gid_pn->ct.timer = &erp_action->timer;
 	gid_pn->port = erp_action->port;
 
 	ret = zfcp_fsf_send_ct(&gid_pn->ct, adapter->pool.fsf_req_erp,
diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index fdabade..81680ef 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -275,19 +275,6 @@
 }
 
 /**
- * zfcp_ccw_unregister - ccw unregister function
- *
- * Unregisters the driver from common i/o layer. Function will be called at
- * module unload/system shutdown.
- */
-void __exit
-zfcp_ccw_unregister(void)
-{
-	zfcp_sysfs_driver_remove_files(&zfcp_ccw_driver.driver);
-	ccw_driver_unregister(&zfcp_ccw_driver);
-}
-
-/**
  * zfcp_ccw_shutdown - gets called on reboot/shutdown
  *
  * Makes sure that QDIO queues are down when the system gets stopped.
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index c033145..0aa3b1a 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -707,7 +707,7 @@
 			    struct zfcp_adapter *adapter,
 			    struct scsi_cmnd *scsi_cmnd,
 			    struct zfcp_fsf_req *fsf_req,
-			    struct zfcp_fsf_req *old_fsf_req)
+			    unsigned long old_req_id)
 {
 	struct zfcp_scsi_dbf_record *rec = &adapter->scsi_dbf_buf;
 	struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec;
@@ -768,8 +768,7 @@
 				rec->fsf_seqno = fsf_req->seq_no;
 				rec->fsf_issued = fsf_req->issued;
 			}
-			rec->type.old_fsf_reqid =
-				    (unsigned long) old_fsf_req;
+			rec->type.old_fsf_reqid = old_req_id;
 		} else {
 			strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE);
 			dump->total_size = buflen;
@@ -794,17 +793,17 @@
 			   struct zfcp_fsf_req *fsf_req)
 {
 	_zfcp_scsi_dbf_event_common("rslt", tag, level,
-			adapter, scsi_cmnd, fsf_req, NULL);
+			adapter, scsi_cmnd, fsf_req, 0);
 }
 
 inline void
 zfcp_scsi_dbf_event_abort(const char *tag, struct zfcp_adapter *adapter,
 			  struct scsi_cmnd *scsi_cmnd,
 			  struct zfcp_fsf_req *new_fsf_req,
-			  struct zfcp_fsf_req *old_fsf_req)
+			  unsigned long old_req_id)
 {
 	_zfcp_scsi_dbf_event_common("abrt", tag, 1,
-			adapter, scsi_cmnd, new_fsf_req, old_fsf_req);
+			adapter, scsi_cmnd, new_fsf_req, old_req_id);
 }
 
 inline void
@@ -814,7 +813,7 @@
 	struct zfcp_adapter *adapter = unit->port->adapter;
 
 	_zfcp_scsi_dbf_event_common(flag == FCP_TARGET_RESET ? "trst" : "lrst",
-			tag, 1, adapter, scsi_cmnd, NULL, NULL);
+			tag, 1, adapter, scsi_cmnd, NULL, 0);
 }
 
 static int
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 94d1b74d..8f88269 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -19,7 +19,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
  */ 
 
-
 #ifndef ZFCP_DEF_H
 #define ZFCP_DEF_H
 
@@ -32,6 +31,10 @@
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/syscalls.h>
+#include <linux/ioctl.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_cmnd.h>
@@ -39,14 +42,11 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_fc.h>
-#include "zfcp_fsf.h"
 #include <asm/ccwdev.h>
 #include <asm/qdio.h>
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
-#include <linux/mempool.h>
-#include <linux/syscalls.h>
-#include <linux/ioctl.h>
+#include "zfcp_fsf.h"
 
 
 /********************* GENERAL DEFINES *********************************/
@@ -137,7 +137,7 @@
 #define ZFCP_EXCHANGE_CONFIG_DATA_RETRIES	7
 
 /* timeout value for "default timer" for fsf requests */
-#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ);
+#define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ)
 
 /*************** FIBRE CHANNEL PROTOCOL SPECIFIC DEFINES ********************/
 
@@ -543,7 +543,7 @@
 } while (0)
 	
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_NORMAL
-# define ZFCP_LOG_NORMAL(fmt, args...)
+# define ZFCP_LOG_NORMAL(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_NORMAL(fmt, args...) \
 do { \
@@ -553,7 +553,7 @@
 #endif
 
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_INFO
-# define ZFCP_LOG_INFO(fmt, args...)
+# define ZFCP_LOG_INFO(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_INFO(fmt, args...) \
 do { \
@@ -563,14 +563,14 @@
 #endif
 
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_DEBUG
-# define ZFCP_LOG_DEBUG(fmt, args...)
+# define ZFCP_LOG_DEBUG(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_DEBUG(fmt, args...) \
 	ZFCP_LOG(ZFCP_LOG_LEVEL_DEBUG, fmt , ##args)
 #endif
 
 #if ZFCP_LOG_LEVEL_LIMIT < ZFCP_LOG_LEVEL_TRACE
-# define ZFCP_LOG_TRACE(fmt, args...)
+# define ZFCP_LOG_TRACE(fmt, args...)	do { } while (0)
 #else
 # define ZFCP_LOG_TRACE(fmt, args...) \
 	ZFCP_LOG(ZFCP_LOG_LEVEL_TRACE, fmt , ##args)
@@ -779,7 +779,6 @@
  * @handler_data: data passed to handler function
  * @pool: pointer to memory pool for ct request structure
  * @timeout: FSF timeout for this request
- * @timer: timer (e.g. for request initiated by erp)
  * @completion: completion for synchronization purposes
  * @status: used to pass error status to calling function
  */
@@ -793,7 +792,6 @@
 	unsigned long handler_data;
 	mempool_t *pool;
 	int timeout;
-	struct timer_list *timer;
 	struct completion *completion;
 	int status;
 };
@@ -821,7 +819,6 @@
  * @resp_count: number of elements in response scatter-gather list
  * @handler: handler function (called for response to the request)
  * @handler_data: data passed to handler function
- * @timer: timer (e.g. for request initiated by erp)
  * @completion: completion for synchronization purposes
  * @ls_code: hex code of ELS command
  * @status: used to pass error status to calling function
@@ -836,7 +833,6 @@
 	unsigned int resp_count;
 	zfcp_send_els_handler_t handler;
 	unsigned long handler_data;
-	struct timer_list *timer;
 	struct completion *completion;
 	int ls_code;
 	int status;
@@ -886,7 +882,6 @@
 	struct list_head        port_remove_lh;    /* head of ports to be
 						      removed */
 	u32			ports;	           /* number of remote ports */
-	struct timer_list	scsi_er_timer;     /* SCSI err recovery watch */
 	atomic_t		reqs_active;	   /* # active FSF reqs */
 	unsigned long		req_no;		   /* unique FSF req number */
 	struct list_head	*req_list;	   /* list of pending reqs */
@@ -1003,6 +998,7 @@
 	struct fsf_qtcb	       *qtcb;	       /* address of associated QTCB */
 	u32		       seq_no;         /* Sequence number of request */
         unsigned long          data;           /* private data of request */ 
+	struct timer_list      timer;	       /* used for erp or scsi er */
 	struct zfcp_erp_action *erp_action;    /* used if this request is
 						  issued on behalf of erp */
 	mempool_t	       *pool;	       /* used if request was alloacted
@@ -1016,6 +1012,7 @@
 /* driver data */
 struct zfcp_data {
 	struct scsi_host_template scsi_host_template;
+	struct scsi_transport_template *scsi_transport_template;
         atomic_t                status;             /* Module status flags */
 	struct list_head	adapter_list_head;  /* head of adapter list */
 	struct list_head	adapter_remove_lh;  /* head of adapters to be
@@ -1031,6 +1028,9 @@
 	wwn_t                   init_wwpn;
 	fcp_lun_t               init_fcp_lun;
 	char 			*driver_version;
+	kmem_cache_t		*fsf_req_qtcb_cache;
+	kmem_cache_t		*sr_buffer_cache;
+	kmem_cache_t		*gid_pn_cache;
 };
 
 /**
@@ -1051,7 +1051,7 @@
 #define ZFCP_POOL_DATA_GID_PN_NR	1
 
 /* struct used by memory pools for fsf_requests */
-struct zfcp_fsf_req_pool_element {
+struct zfcp_fsf_req_qtcb {
 	struct zfcp_fsf_req fsf_req;
 	struct fsf_qtcb qtcb;
 };
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 7f60b6f..862a411 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -64,8 +64,6 @@
 static int zfcp_erp_adapter_strategy(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *, int);
 static int zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *);
-static void zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *);
-static void zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *);
@@ -93,6 +91,7 @@
 static int zfcp_erp_unit_strategy_close(struct zfcp_erp_action *);
 static int zfcp_erp_unit_strategy_open(struct zfcp_erp_action *);
 
+static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
 static void zfcp_erp_action_dismiss_port(struct zfcp_port *);
 static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *);
 static void zfcp_erp_action_dismiss(struct zfcp_erp_action *);
@@ -111,8 +110,62 @@
 static inline void zfcp_erp_action_to_running(struct zfcp_erp_action *);
 
 static void zfcp_erp_memwait_handler(unsigned long);
-static void zfcp_erp_timeout_handler(unsigned long);
-static inline void zfcp_erp_timeout_init(struct zfcp_erp_action *);
+
+/**
+ * zfcp_close_qdio - close qdio queues for an adapter
+ */
+static void zfcp_close_qdio(struct zfcp_adapter *adapter)
+{
+	struct zfcp_qdio_queue *req_queue;
+	int first, count;
+
+	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status))
+		return;
+
+	/* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */
+	req_queue = &adapter->request_queue;
+	write_lock_irq(&req_queue->queue_lock);
+	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
+	write_unlock_irq(&req_queue->queue_lock);
+
+	debug_text_event(adapter->erp_dbf, 3, "qdio_down2a");
+	while (qdio_shutdown(adapter->ccw_device,
+			     QDIO_FLAG_CLEANUP_USING_CLEAR) == -EINPROGRESS)
+		msleep(1000);
+	debug_text_event(adapter->erp_dbf, 3, "qdio_down2b");
+
+	/* cleanup used outbound sbals */
+	count = atomic_read(&req_queue->free_count);
+	if (count < QDIO_MAX_BUFFERS_PER_Q) {
+		first = (req_queue->free_index+count) % QDIO_MAX_BUFFERS_PER_Q;
+		count = QDIO_MAX_BUFFERS_PER_Q - count;
+		zfcp_qdio_zero_sbals(req_queue->buffer, first, count);
+	}
+	req_queue->free_index = 0;
+	atomic_set(&req_queue->free_count, 0);
+	req_queue->distance_from_int = 0;
+	adapter->response_queue.free_index = 0;
+	atomic_set(&adapter->response_queue.free_count, 0);
+}
+
+/**
+ * zfcp_close_fsf - stop FSF operations for an adapter
+ *
+ * Dismiss and cleanup all pending fsf_reqs (this wakes up all initiators of
+ * requests waiting for completion; especially this returns SCSI commands
+ * with error state).
+ */
+static void zfcp_close_fsf(struct zfcp_adapter *adapter)
+{
+	/* close queues to ensure that buffers are not accessed by adapter */
+	zfcp_close_qdio(adapter);
+	zfcp_fsf_req_dismiss_all(adapter);
+	/* reset FSF request sequence number */
+	adapter->fsf_req_seq_no = 0;
+	/* all ports and units are closed */
+	zfcp_erp_modify_adapter_status(adapter,
+				       ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
+}
 
 /**
  * zfcp_fsf_request_timeout_handler - called if a request timed out
@@ -121,52 +174,20 @@
  * This function needs to be called if requests (ELS, Generic Service,
  * or SCSI commands) exceed a certain time limit. The assumption is
  * that after the time limit the adapter get stuck. So we trigger a reopen of
- * the adapter. This should not be used for error recovery, SCSI abort
- * commands and SCSI requests from SCSI mid-layer.
+ * the adapter.
  */
-void
-zfcp_fsf_request_timeout_handler(unsigned long data)
+static void zfcp_fsf_request_timeout_handler(unsigned long data)
 {
-	struct zfcp_adapter *adapter;
-
-	adapter = (struct zfcp_adapter *) data;
-
+	struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
 	zfcp_erp_adapter_reopen(adapter, 0);
 }
 
-/**
- * zfcp_fsf_scsi_er_timeout_handler - timeout handler for scsi eh tasks
- *
- * This function needs to be called whenever a SCSI error recovery
- * action (abort/reset) does not return.  Re-opening the adapter means
- * that the abort/reset command can be returned by zfcp. It won't complete
- * via the adapter anymore (because qdio queues are closed). If ERP is
- * already running on this adapter it will be stopped.
- */
-void zfcp_fsf_scsi_er_timeout_handler(unsigned long data)
+void zfcp_fsf_start_timer(struct zfcp_fsf_req *fsf_req, unsigned long timeout)
 {
-	struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
-	unsigned long flags;
-
-	ZFCP_LOG_NORMAL("warning: SCSI error recovery timed out. "
-			"Restarting all operations on the adapter %s\n",
-			zfcp_get_busid_by_adapter(adapter));
-	debug_text_event(adapter->erp_dbf, 1, "eh_lmem_tout");
-
-	write_lock_irqsave(&adapter->erp_lock, flags);
-	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
-			     &adapter->status)) {
-		zfcp_erp_modify_adapter_status(adapter,
-		       ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
-		       ZFCP_CLEAR);
-		zfcp_erp_action_dismiss_adapter(adapter);
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-		/* dismiss all pending requests including requests for ERP */
-		zfcp_fsf_req_dismiss_all(adapter);
-		adapter->fsf_req_seq_no = 0;
-	} else
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-	zfcp_erp_adapter_reopen(adapter, 0);
+	fsf_req->timer.function = zfcp_fsf_request_timeout_handler;
+	fsf_req->timer.data = (unsigned long) fsf_req->adapter;
+	fsf_req->timer.expires = timeout;
+	add_timer(&fsf_req->timer);
 }
 
 /*
@@ -282,7 +303,6 @@
 	struct zfcp_ls_adisc *adisc;
 	void *address = NULL;
 	int retval = 0;
-	struct timer_list *timer;
 
 	send_els = kzalloc(sizeof(struct zfcp_send_els), GFP_ATOMIC);
 	if (send_els == NULL)
@@ -329,22 +349,11 @@
 		      (wwn_t) adisc->wwnn, adisc->hard_nport_id,
 		      adisc->nport_id);
 
-	timer = kmalloc(sizeof(struct timer_list), GFP_ATOMIC);
-	if (!timer)
-		goto nomem;
-
-	init_timer(timer);
-	timer->function = zfcp_fsf_request_timeout_handler;
-	timer->data = (unsigned long) adapter;
-	timer->expires = ZFCP_FSF_REQUEST_TIMEOUT;
-	send_els->timer = timer;
-
 	retval = zfcp_fsf_send_els(send_els);
 	if (retval != 0) {
 		ZFCP_LOG_NORMAL("error: initiation of Send ELS failed for port "
 				"0x%08x on adapter %s\n", send_els->d_id,
 				zfcp_get_busid_by_adapter(adapter));
-		del_timer(send_els->timer);
 		goto freemem;
 	}
 
@@ -356,7 +365,6 @@
 	if (address != NULL)
 		__free_pages(send_els->req->page, 0);
 	if (send_els != NULL) {
-		kfree(send_els->timer);
 		kfree(send_els->req);
 		kfree(send_els->resp);
 		kfree(send_els);
@@ -382,9 +390,6 @@
 	struct zfcp_ls_adisc_acc *adisc;
 
 	send_els = (struct zfcp_send_els *) data;
-
-	del_timer(send_els->timer);
-
 	adapter = send_els->adapter;
 	port = send_els->port;
 	d_id = send_els->d_id;
@@ -433,7 +438,6 @@
  out:
 	zfcp_port_put(port);
 	__free_pages(send_els->req->page, 0);
-	kfree(send_els->timer);
 	kfree(send_els->req);
 	kfree(send_els->resp);
 	kfree(send_els);
@@ -909,8 +913,6 @@
 		debug_text_event(adapter->erp_dbf, 2, "a_asyh_ex");
 		debug_event(adapter->erp_dbf, 2, &erp_action->action,
 			    sizeof (int));
-		if (!(set_mask & ZFCP_STATUS_ERP_TIMEDOUT))
-			del_timer(&erp_action->timer);
 		erp_action->status |= set_mask;
 		zfcp_erp_action_ready(erp_action);
 	} else {
@@ -957,8 +959,7 @@
  *		action gets an appropriate flag and will be processed
  *		accordingly
  */
-static void
-zfcp_erp_timeout_handler(unsigned long data)
+void zfcp_erp_timeout_handler(unsigned long data)
 {
 	struct zfcp_erp_action *erp_action = (struct zfcp_erp_action *) data;
 	struct zfcp_adapter *adapter = erp_action->adapter;
@@ -1934,8 +1935,7 @@
 			  &erp_action->adapter->status);
 
  failed_openfcp:
-	zfcp_erp_adapter_strategy_close_qdio(erp_action);
-	zfcp_erp_adapter_strategy_close_fsf(erp_action);
+	zfcp_close_fsf(erp_action->adapter);
  failed_qdio:
  out:
 	return retval;
@@ -2040,59 +2040,6 @@
 	return retval;
 }
 
-/**
- * zfcp_erp_adapter_strategy_close_qdio - close qdio queues for an adapter
- */
-static void
-zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
-{
-	int first_used;
-	int used_count;
-	struct zfcp_adapter *adapter = erp_action->adapter;
-
-	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status)) {
-		ZFCP_LOG_DEBUG("error: attempt to shut down inactive QDIO "
-			       "queues on adapter %s\n",
-			       zfcp_get_busid_by_adapter(adapter));
-		return;
-	}
-
-	/*
-	 * Get queue_lock and clear QDIOUP flag. Thus it's guaranteed that
-	 * do_QDIO won't be called while qdio_shutdown is in progress.
-	 */
-	write_lock_irq(&adapter->request_queue.queue_lock);
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
-	write_unlock_irq(&adapter->request_queue.queue_lock);
-
-	debug_text_event(adapter->erp_dbf, 3, "qdio_down2a");
-	while (qdio_shutdown(adapter->ccw_device,
-			     QDIO_FLAG_CLEANUP_USING_CLEAR) == -EINPROGRESS)
-		msleep(1000);
-	debug_text_event(adapter->erp_dbf, 3, "qdio_down2b");
-
-	/*
-	 * First we had to stop QDIO operation.
-	 * Now it is safe to take the following actions.
-	 */
-
-	/* Cleanup only necessary when there are unacknowledged buffers */
-	if (atomic_read(&adapter->request_queue.free_count)
-	    < QDIO_MAX_BUFFERS_PER_Q) {
-		first_used = (adapter->request_queue.free_index +
-			      atomic_read(&adapter->request_queue.free_count))
-			% QDIO_MAX_BUFFERS_PER_Q;
-		used_count = QDIO_MAX_BUFFERS_PER_Q -
-			atomic_read(&adapter->request_queue.free_count);
-		zfcp_qdio_zero_sbals(adapter->request_queue.buffer,
-				     first_used, used_count);
-	}
-	adapter->response_queue.free_index = 0;
-	atomic_set(&adapter->response_queue.free_count, 0);
-	adapter->request_queue.free_index = 0;
-	atomic_set(&adapter->request_queue.free_count, 0);
-	adapter->request_queue.distance_from_int = 0;
-}
 
 static int
 zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *erp_action)
@@ -2127,7 +2074,6 @@
 		write_lock_irq(&adapter->erp_lock);
 		zfcp_erp_action_to_running(erp_action);
 		write_unlock_irq(&adapter->erp_lock);
-		zfcp_erp_timeout_init(erp_action);
 		if (zfcp_fsf_exchange_config_data(erp_action)) {
 			retval = ZFCP_ERP_FAILED;
 			debug_text_event(adapter->erp_dbf, 5, "a_fstx_xf");
@@ -2196,7 +2142,6 @@
 	zfcp_erp_action_to_running(erp_action);
 	write_unlock_irq(&adapter->erp_lock);
 
-	zfcp_erp_timeout_init(erp_action);
 	ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL);
 	if (ret == -EOPNOTSUPP) {
 		debug_text_event(adapter->erp_dbf, 3, "a_xport_notsupp");
@@ -2248,27 +2193,6 @@
 	return retval;
 }
 
-/**
- * zfcp_erp_adapter_strategy_close_fsf - stop FSF operations for an adapter
- */
-static void
-zfcp_erp_adapter_strategy_close_fsf(struct zfcp_erp_action *erp_action)
-{
-	struct zfcp_adapter *adapter = erp_action->adapter;
-
-	/*
-	 * wake waiting initiators of requests,
-	 * return SCSI commands (with error status),
-	 * clean up all requests (synchronously)
-	 */
-	zfcp_fsf_req_dismiss_all(adapter);
-	/* reset FSF request sequence number */
-	adapter->fsf_req_seq_no = 0;
-	/* all ports and units are closed */
-	zfcp_erp_modify_adapter_status(adapter,
-				       ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
-}
-
 /*
  * function:	
  *
@@ -2605,7 +2529,6 @@
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_close_physical_port(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "o_pfstc_nomem");
@@ -2662,7 +2585,6 @@
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_close_port(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "p_pstc_nomem");
@@ -2700,7 +2622,6 @@
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_open_port(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "p_psto_nomem");
@@ -2738,7 +2659,6 @@
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_port *port = erp_action->port;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_ns_gid_pn_request(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "p_pstn_nomem");
@@ -2864,7 +2784,6 @@
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_unit *unit = erp_action->unit;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_close_unit(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "u_ustc_nomem");
@@ -2905,7 +2824,6 @@
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_unit *unit = erp_action->unit;
 
-	zfcp_erp_timeout_init(erp_action);
 	retval = zfcp_fsf_open_unit(erp_action);
 	if (retval == -ENOMEM) {
 		debug_text_event(adapter->erp_dbf, 5, "u_usto_nomem");
@@ -2930,14 +2848,13 @@
 	return retval;
 }
 
-static inline void
-zfcp_erp_timeout_init(struct zfcp_erp_action *erp_action)
+void zfcp_erp_start_timer(struct zfcp_fsf_req *fsf_req)
 {
-	init_timer(&erp_action->timer);
-	erp_action->timer.function = zfcp_erp_timeout_handler;
-	erp_action->timer.data = (unsigned long) erp_action;
-	/* jiffies will be added in zfcp_fsf_req_send */
-	erp_action->timer.expires = ZFCP_ERP_FSFREQ_TIMEOUT;
+	BUG_ON(!fsf_req->erp_action);
+	fsf_req->timer.function = zfcp_erp_timeout_handler;
+	fsf_req->timer.data = (unsigned long) fsf_req->erp_action;
+	fsf_req->timer.expires = jiffies + ZFCP_ERP_FSFREQ_TIMEOUT;
+	add_timer(&fsf_req->timer);
 }
 
 /*
@@ -3241,7 +3158,7 @@
 }
 
 
-void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
+static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
 {
 	struct zfcp_port *port;
 
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index 146d7a2..b8794d7 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -55,7 +55,6 @@
 
 /******************************* S/390 IO ************************************/
 extern int  zfcp_ccw_register(void);
-extern void zfcp_ccw_unregister(void);
 
 extern void zfcp_qdio_zero_sbals(struct qdio_buffer **, int, int);
 extern int  zfcp_qdio_allocate(struct zfcp_adapter *);
@@ -88,8 +87,8 @@
 					struct fsf_qtcb_bottom_port *);
 extern int  zfcp_fsf_control_file(struct zfcp_adapter *, struct zfcp_fsf_req **,
 				  u32, u32, struct zfcp_sg_list *);
-extern void zfcp_fsf_request_timeout_handler(unsigned long);
-extern void zfcp_fsf_scsi_er_timeout_handler(unsigned long);
+extern void zfcp_fsf_start_timer(struct zfcp_fsf_req *, unsigned long);
+extern void zfcp_erp_start_timer(struct zfcp_fsf_req *);
 extern int  zfcp_fsf_req_dismiss_all(struct zfcp_adapter *);
 extern int  zfcp_fsf_status_read(struct zfcp_adapter *, int);
 extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *,
@@ -99,8 +98,7 @@
 extern int zfcp_fsf_send_els(struct zfcp_send_els *);
 extern int  zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *,
 					   struct zfcp_unit *,
-					   struct scsi_cmnd *,
-					   struct timer_list*, int);
+					   struct scsi_cmnd *, int, int);
 extern int  zfcp_fsf_req_complete(struct zfcp_fsf_req *);
 extern void zfcp_fsf_incoming_els(struct zfcp_fsf_req *);
 extern void zfcp_fsf_req_free(struct zfcp_fsf_req *);
@@ -124,14 +122,11 @@
 extern void set_host_byte(u32 *, char);
 extern void set_driver_byte(u32 *, char);
 extern char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *);
-extern void zfcp_fsf_start_scsi_er_timer(struct zfcp_adapter *);
 extern fcp_dl_t zfcp_get_fcp_dl(struct fcp_cmnd_iu *);
 
 extern int zfcp_scsi_command_async(struct zfcp_adapter *,struct zfcp_unit *,
-				   struct scsi_cmnd *, struct timer_list *);
-extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *,
-				  struct timer_list *);
-extern struct scsi_transport_template *zfcp_transport_template;
+				   struct scsi_cmnd *, int);
+extern int zfcp_scsi_command_sync(struct zfcp_unit *, struct scsi_cmnd *, int);
 extern struct fc_function_template zfcp_transport_functions;
 
 /******************************** ERP ****************************************/
@@ -139,7 +134,6 @@
 extern int  zfcp_erp_adapter_reopen(struct zfcp_adapter *, int);
 extern int  zfcp_erp_adapter_shutdown(struct zfcp_adapter *, int);
 extern void zfcp_erp_adapter_failed(struct zfcp_adapter *);
-extern void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *);
 
 extern void zfcp_erp_modify_port_status(struct zfcp_port *, u32, int);
 extern int  zfcp_erp_port_reopen(struct zfcp_port *, int);
@@ -187,7 +181,7 @@
 				       struct zfcp_fsf_req *);
 extern void zfcp_scsi_dbf_event_abort(const char *, struct zfcp_adapter *,
 				      struct scsi_cmnd *, struct zfcp_fsf_req *,
-				      struct zfcp_fsf_req *);
+				      unsigned long);
 extern void zfcp_scsi_dbf_event_devreset(const char *, u8, struct zfcp_unit *,
 					 struct scsi_cmnd *);
 extern void zfcp_reqlist_add(struct zfcp_adapter *, struct zfcp_fsf_req *);
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index ff2eacf..277826c 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -42,7 +42,7 @@
 static inline int zfcp_use_one_sbal(
 	struct scatterlist *, int, struct scatterlist *, int);
 static struct zfcp_fsf_req *zfcp_fsf_req_alloc(mempool_t *, int);
-static int zfcp_fsf_req_send(struct zfcp_fsf_req *, struct timer_list *);
+static int zfcp_fsf_req_send(struct zfcp_fsf_req *);
 static int zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *);
 static int zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *);
 static int zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *);
@@ -100,14 +100,19 @@
 	if (req_flags & ZFCP_REQ_NO_QTCB)
 		size = sizeof(struct zfcp_fsf_req);
 	else
-		size = sizeof(struct zfcp_fsf_req_pool_element);
+		size = sizeof(struct zfcp_fsf_req_qtcb);
 
-	if (likely(pool != NULL))
+	if (likely(pool))
 		ptr = mempool_alloc(pool, GFP_ATOMIC);
-	else
-		ptr = kmalloc(size, GFP_ATOMIC);
+	else {
+		if (req_flags & ZFCP_REQ_NO_QTCB)
+			ptr = kmalloc(size, GFP_ATOMIC);
+		else
+			ptr = kmem_cache_alloc(zfcp_data.fsf_req_qtcb_cache,
+					       SLAB_ATOMIC);
+	}
 
-	if (unlikely(NULL == ptr))
+	if (unlikely(!ptr))
 		goto out;
 
 	memset(ptr, 0, size);
@@ -115,9 +120,8 @@
 	if (req_flags & ZFCP_REQ_NO_QTCB) {
 		fsf_req = (struct zfcp_fsf_req *) ptr;
 	} else {
-		fsf_req = &((struct zfcp_fsf_req_pool_element *) ptr)->fsf_req;
-		fsf_req->qtcb =
-			&((struct zfcp_fsf_req_pool_element *) ptr)->qtcb;
+		fsf_req = &((struct zfcp_fsf_req_qtcb *) ptr)->fsf_req;
+		fsf_req->qtcb =	&((struct zfcp_fsf_req_qtcb *) ptr)->qtcb;
 	}
 
 	fsf_req->pool = pool;
@@ -139,10 +143,17 @@
 void
 zfcp_fsf_req_free(struct zfcp_fsf_req *fsf_req)
 {
-	if (likely(fsf_req->pool != NULL))
+	if (likely(fsf_req->pool)) {
 		mempool_free(fsf_req, fsf_req->pool);
-	else
-		kfree(fsf_req);
+		return;
+	}
+
+	if (fsf_req->qtcb) {
+		kmem_cache_free(zfcp_data.fsf_req_qtcb_cache, fsf_req);
+		return;
+	}
+
+	kfree(fsf_req);
 }
 
 /**
@@ -214,8 +225,10 @@
 		 */
 		zfcp_fsf_status_read_handler(fsf_req);
 		goto out;
-	} else
+	} else {
+		del_timer(&fsf_req->timer);
 		zfcp_fsf_protstatus_eval(fsf_req);
+	}
 
 	/*
 	 * fsf_req may be deleted due to waking up functions, so 
@@ -774,8 +787,7 @@
 	sbale->addr = (void *) status_buffer;
 	sbale->length = sizeof(struct fsf_status_read_buffer);
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(fsf_req, NULL);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_DEBUG("error: Could not set-up unsolicited status "
 			       "environment.\n");
@@ -1101,8 +1113,8 @@
 			   struct zfcp_unit *unit, int req_flags)
 {
 	volatile struct qdio_buffer_element *sbale;
-	unsigned long lock_flags;
 	struct zfcp_fsf_req *fsf_req = NULL;
+	unsigned long lock_flags;
 	int retval = 0;
 
 	/* setup new FSF request */
@@ -1132,12 +1144,9 @@
 	/* set handle of request which should be aborted */
 	fsf_req->qtcb->bottom.support.req_handle = (u64) old_req_id;
 
-	/* start QDIO request for this FSF request */
-
-	zfcp_fsf_start_scsi_er_timer(adapter);
-	retval = zfcp_fsf_req_send(fsf_req, NULL);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_SCSI_ER_TIMEOUT);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
-		del_timer(&adapter->scsi_er_timer);
 		ZFCP_LOG_INFO("error: Failed to send abort command request "
 			      "on adapter %s, port 0x%016Lx, unit 0x%016Lx\n",
 			      zfcp_get_busid_by_adapter(adapter),
@@ -1173,8 +1182,6 @@
 	unsigned char status_qual =
 	    new_fsf_req->qtcb->header.fsf_status_qual.word[0];
 
-	del_timer(&new_fsf_req->adapter->scsi_er_timer);
-
 	if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		/* do not set ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED */
 		goto skip_fsfstatus;
@@ -1380,11 +1387,6 @@
 		goto failed_req;
 	}
 
-        if (erp_action != NULL) {
-                erp_action->fsf_req = fsf_req;
-                fsf_req->erp_action = erp_action;
-        }
-
 	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         if (zfcp_use_one_sbal(ct->req, ct->req_count,
                               ct->resp, ct->resp_count)){
@@ -1451,8 +1453,14 @@
 
 	zfcp_san_dbf_event_ct_request(fsf_req);
 
-	/* start QDIO request for this FSF request */
-	ret = zfcp_fsf_req_send(fsf_req, ct->timer);
+	if (erp_action) {
+		erp_action->fsf_req = fsf_req;
+		fsf_req->erp_action = erp_action;
+		zfcp_erp_start_timer(fsf_req);
+	} else
+		zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+
+	ret = zfcp_fsf_req_send(fsf_req);
 	if (ret) {
 		ZFCP_LOG_DEBUG("error: initiation of CT request failed "
 			       "(adapter %s, port 0x%016Lx)\n",
@@ -1749,8 +1757,8 @@
 
 	zfcp_san_dbf_event_els_request(fsf_req);
 
-	/* start QDIO request for this FSF request */
-	ret = zfcp_fsf_req_send(fsf_req, els->timer);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+	ret = zfcp_fsf_req_send(fsf_req);
 	if (ret) {
 		ZFCP_LOG_DEBUG("error: initiation of ELS request failed "
 			       "(adapter %s, port d_id: 0x%08x)\n",
@@ -1947,6 +1955,7 @@
 zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -1955,7 +1964,7 @@
 				     FSF_QTCB_EXCHANGE_CONFIG_DATA,
 				     ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create exchange configuration "
 			      "data request for adapter %s.\n",
@@ -1963,26 +1972,26 @@
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->erp_action = erp_action;
-	erp_action->fsf_req->qtcb->bottom.config.feature_selection =
+	fsf_req->qtcb->bottom.config.feature_selection =
 			FSF_FEATURE_CFDC |
 			FSF_FEATURE_LUN_SHARING |
 			FSF_FEATURE_NOTIFICATION_LOST |
 			FSF_FEATURE_UPDATE_ALERT;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO
 		    ("error: Could not send exchange configuration data "
 		     "command on the adapter %s\n",
 		     zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2212,10 +2221,9 @@
 			    struct fsf_qtcb_bottom_port *data)
 {
 	volatile struct qdio_buffer_element *sbale;
-	int retval = 0;
-	unsigned long lock_flags;
         struct zfcp_fsf_req *fsf_req;
-	struct timer_list *timer;
+	unsigned long lock_flags;
+	int retval = 0;
 
 	if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT)) {
 		ZFCP_LOG_INFO("error: exchange port data "
@@ -2248,22 +2256,11 @@
 	if (erp_action) {
 		erp_action->fsf_req = fsf_req;
 		fsf_req->erp_action = erp_action;
-		timer = &erp_action->timer;
-	} else {
-		timer = kmalloc(sizeof(struct timer_list), GFP_ATOMIC);
-		if (!timer) {
-			write_unlock_irqrestore(&adapter->request_queue.queue_lock,
-						lock_flags);
-			zfcp_fsf_req_free(fsf_req);
-			return -ENOMEM;
-		}
-		init_timer(timer);
-		timer->function = zfcp_fsf_request_timeout_handler;
-		timer->data = (unsigned long) adapter;
-		timer->expires = ZFCP_FSF_REQUEST_TIMEOUT;
-	}
+		zfcp_erp_start_timer(fsf_req);
+	} else
+		zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
 
-	retval = zfcp_fsf_req_send(fsf_req, timer);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send an exchange port data "
                               "command on the adapter %s\n",
@@ -2271,8 +2268,6 @@
 		zfcp_fsf_req_free(fsf_req);
 		if (erp_action)
 			erp_action->fsf_req = NULL;
-		else
-			kfree(timer);
 		write_unlock_irqrestore(&adapter->request_queue.queue_lock,
 					lock_flags);
 		return retval;
@@ -2283,9 +2278,7 @@
 	if (!erp_action) {
 		wait_event(fsf_req->completion_wq,
 			   fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
-		del_timer_sync(timer);
 		zfcp_fsf_req_free(fsf_req);
-		kfree(timer);
 	}
 	return retval;
 }
@@ -2367,6 +2360,7 @@
 zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -2375,7 +2369,7 @@
 				     FSF_QTCB_OPEN_PORT_WITH_DID,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create open port request "
 			      "for port 0x%016Lx on adapter %s.\n",
@@ -2384,24 +2378,24 @@
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id;
+	fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id;
 	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->port->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->port;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->port;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send open port request for "
 			      "port 0x%016Lx on adapter %s.\n",
 			      erp_action->port->wwpn,
 			      zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2623,6 +2617,7 @@
 zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -2631,7 +2626,7 @@
 				     FSF_QTCB_CLOSE_PORT,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create a close port request "
 			      "for port 0x%016Lx on adapter %s.\n",
@@ -2640,25 +2635,25 @@
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
 	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->port->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->port;
-	erp_action->fsf_req->erp_action = erp_action;
-	erp_action->fsf_req->qtcb->header.port_handle =
-	    erp_action->port->handle;
+	fsf_req->data = (unsigned long) erp_action->port;
+	fsf_req->erp_action = erp_action;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send a close port request for "
 			      "port 0x%016Lx on adapter %s.\n",
 			      erp_action->port->wwpn,
 			      zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2755,16 +2750,17 @@
 int
 zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 {
-	int retval = 0;
-	unsigned long lock_flags;
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
+	unsigned long lock_flags;
+	int retval = 0;
 
 	/* setup new FSF request */
 	retval = zfcp_fsf_req_create(erp_action->adapter,
 				     FSF_QTCB_CLOSE_PHYSICAL_PORT,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &erp_action->fsf_req);
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create close physical port "
 			      "request (adapter %s, port 0x%016Lx)\n",
@@ -2774,8 +2770,7 @@
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-				    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
 	sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
 	sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
@@ -2783,20 +2778,19 @@
 	atomic_set_mask(ZFCP_STATUS_PORT_PHYS_CLOSING,
 			&erp_action->port->status);
 	/* save a pointer to this port */
-	erp_action->fsf_req->data = (unsigned long) erp_action->port;
-	/* port to be closed */
-	erp_action->fsf_req->qtcb->header.port_handle =
-	    erp_action->port->handle;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->port;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send close physical port "
 			      "request (adapter %s, port 0x%016Lx)\n",
 			      zfcp_get_busid_by_adapter(erp_action->adapter),
 			      erp_action->port->wwpn);
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -2961,6 +2955,7 @@
 zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -2969,7 +2964,7 @@
 				     FSF_QTCB_OPEN_LUN,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create open unit request for "
 			      "unit 0x%016Lx on port 0x%016Lx on adapter %s.\n",
@@ -2979,24 +2974,22 @@
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->qtcb->header.port_handle =
-		erp_action->port->handle;
-	erp_action->fsf_req->qtcb->bottom.support.fcp_lun =
-		erp_action->unit->fcp_lun;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->qtcb->bottom.support.fcp_lun =	erp_action->unit->fcp_lun;
 	if (!(erp_action->adapter->connection_features & FSF_FEATURE_NPIV_MODE))
-		erp_action->fsf_req->qtcb->bottom.support.option =
+		fsf_req->qtcb->bottom.support.option =
 			FSF_OPEN_LUN_SUPPRESS_BOXING;
 	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->unit->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->unit;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->unit;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(erp_action->fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send an open unit request "
 			      "on the adapter %s, port 0x%016Lx for "
@@ -3004,7 +2997,7 @@
 			      zfcp_get_busid_by_adapter(erp_action->adapter),
 			      erp_action->port->wwpn,
 			      erp_action->unit->fcp_lun);
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -3297,6 +3290,7 @@
 zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action)
 {
 	volatile struct qdio_buffer_element *sbale;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long lock_flags;
 	int retval = 0;
 
@@ -3305,7 +3299,7 @@
 				     FSF_QTCB_CLOSE_LUN,
 				     ZFCP_WAIT_FOR_SBAL | ZFCP_REQ_AUTO_CLEANUP,
 				     erp_action->adapter->pool.fsf_req_erp,
-				     &lock_flags, &(erp_action->fsf_req));
+				     &lock_flags, &fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("error: Could not create close unit request for "
 			      "unit 0x%016Lx on port 0x%016Lx on adapter %s.\n",
@@ -3315,27 +3309,26 @@
 		goto out;
 	}
 
-	sbale = zfcp_qdio_sbale_req(erp_action->fsf_req,
-                                    erp_action->fsf_req->sbal_curr, 0);
+	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	erp_action->fsf_req->qtcb->header.port_handle =
-	    erp_action->port->handle;
-	erp_action->fsf_req->qtcb->header.lun_handle = erp_action->unit->handle;
+	fsf_req->qtcb->header.port_handle = erp_action->port->handle;
+	fsf_req->qtcb->header.lun_handle = erp_action->unit->handle;
 	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->unit->status);
-	erp_action->fsf_req->data = (unsigned long) erp_action->unit;
-	erp_action->fsf_req->erp_action = erp_action;
+	fsf_req->data = (unsigned long) erp_action->unit;
+	fsf_req->erp_action = erp_action;
+	erp_action->fsf_req = fsf_req;
 
-	/* start QDIO request for this FSF request */
-	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
+	zfcp_erp_start_timer(fsf_req);
+	retval = zfcp_fsf_req_send(erp_action->fsf_req);
 	if (retval) {
 		ZFCP_LOG_INFO("error: Could not send a close unit request for "
 			      "unit 0x%016Lx on port 0x%016Lx onadapter %s.\n",
 			      erp_action->unit->fcp_lun,
 			      erp_action->port->wwpn,
 			      zfcp_get_busid_by_adapter(erp_action->adapter));
-		zfcp_fsf_req_free(erp_action->fsf_req);
+		zfcp_fsf_req_free(fsf_req);
 		erp_action->fsf_req = NULL;
 		goto out;
 	}
@@ -3488,7 +3481,7 @@
 zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
 			       struct zfcp_unit *unit,
 			       struct scsi_cmnd * scsi_cmnd,
-			       struct timer_list *timer, int req_flags)
+			       int use_timer, int req_flags)
 {
 	struct zfcp_fsf_req *fsf_req = NULL;
 	struct fcp_cmnd_iu *fcp_cmnd_iu;
@@ -3516,7 +3509,7 @@
 	fsf_req->unit = unit;
 
 	/* associate FSF request with SCSI request (for look up on abort) */
-	scsi_cmnd->host_scribble = (char *) fsf_req;
+	scsi_cmnd->host_scribble = (unsigned char *) fsf_req->req_id;
 
 	/* associate SCSI command with FSF request */
 	fsf_req->data = (unsigned long) scsi_cmnd;
@@ -3629,11 +3622,10 @@
 	ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG,
 		      (char *) scsi_cmnd->cmnd, scsi_cmnd->cmd_len);
 
-	/*
-	 * start QDIO request for this FSF request
-	 *  covered by an SBALE)
-	 */
-	retval = zfcp_fsf_req_send(fsf_req, timer);
+	if (use_timer)
+		zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (unlikely(retval < 0)) {
 		ZFCP_LOG_INFO("error: Could not send FCP command request "
 			      "on adapter %s, port 0x%016Lx, unit 0x%016Lx\n",
@@ -3718,11 +3710,9 @@
 	fcp_cmnd_iu->fcp_lun = unit->fcp_lun;
 	fcp_cmnd_iu->task_management_flags = tm_flags;
 
-	/* start QDIO request for this FSF request */
-	zfcp_fsf_start_scsi_er_timer(adapter);
-	retval = zfcp_fsf_req_send(fsf_req, NULL);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_SCSI_ER_TIMEOUT);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval) {
-		del_timer(&adapter->scsi_er_timer);
 		ZFCP_LOG_INFO("error: Could not send an FCP-command (task "
 			      "management) on adapter %s, port 0x%016Lx for "
 			      "unit LUN 0x%016Lx\n",
@@ -4226,7 +4216,6 @@
 	char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu);
 	struct zfcp_unit *unit = (struct zfcp_unit *) fsf_req->data;
 
-	del_timer(&fsf_req->adapter->scsi_er_timer);
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_TMFUNCFAILED;
 		goto skip_fsfstatus;
@@ -4295,7 +4284,6 @@
 	struct zfcp_fsf_req *fsf_req;
 	struct fsf_qtcb_bottom_support *bottom;
 	volatile struct qdio_buffer_element *sbale;
-	struct timer_list *timer;
 	unsigned long lock_flags;
 	int req_flags = 0;
 	int direction;
@@ -4327,12 +4315,6 @@
 		goto out;
 	}
 
-	timer = kmalloc(sizeof(struct timer_list), GFP_KERNEL);
-	if (!timer) {
-		retval = -ENOMEM;
-		goto out;
- 	}
-
 	retval = zfcp_fsf_req_create(adapter, fsf_command, req_flags,
 				     NULL, &lock_flags, &fsf_req);
 	if (retval < 0) {
@@ -4367,12 +4349,8 @@
 	} else
 		sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	init_timer(timer);
-	timer->function = zfcp_fsf_request_timeout_handler;
-	timer->data = (unsigned long) adapter;
-	timer->expires = ZFCP_FSF_REQUEST_TIMEOUT;
-
-	retval = zfcp_fsf_req_send(fsf_req, timer);
+	zfcp_fsf_start_timer(fsf_req, ZFCP_FSF_REQUEST_TIMEOUT);
+	retval = zfcp_fsf_req_send(fsf_req);
 	if (retval < 0) {
 		ZFCP_LOG_INFO("initiation of cfdc up/download failed"
 			      "(adapter %s)\n",
@@ -4392,15 +4370,12 @@
 	           fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
 
 	*fsf_req_ptr = fsf_req;
-	del_timer_sync(timer);
-	goto free_timer;
+	goto out;
 
  free_fsf_req:
 	zfcp_fsf_req_free(fsf_req);
  unlock_queue_lock:
 	write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags);
- free_timer:
-	kfree(timer);
  out:
 	return retval;
 }
@@ -4656,7 +4631,6 @@
 {
 	volatile struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *fsf_req = NULL;
-	unsigned long flags;
 	int ret = 0;
 	struct zfcp_qdio_queue *req_queue = &adapter->request_queue;
 
@@ -4673,12 +4647,13 @@
 	fsf_req->fsf_command = fsf_cmd;
 	INIT_LIST_HEAD(&fsf_req->list);
 	
-	/* unique request id */
-	spin_lock_irqsave(&adapter->req_list_lock, flags);
+	/* this is serialized (we are holding req_queue-lock of adapter */
+	if (adapter->req_no == 0)
+		adapter->req_no++;
 	fsf_req->req_id = adapter->req_no++;
-	spin_unlock_irqrestore(&adapter->req_list_lock, flags);
 
-        zfcp_fsf_req_qtcb_init(fsf_req);
+	init_timer(&fsf_req->timer);
+	zfcp_fsf_req_qtcb_init(fsf_req);
 
 	/* initialize waitqueue which may be used to wait on 
 	   this request completion */
@@ -4748,8 +4723,7 @@
  * returns:	0 - request transfer succesfully started
  *		!0 - start of request transfer failed
  */
-static int
-zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
+static int zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req)
 {
 	struct zfcp_adapter *adapter;
 	struct zfcp_qdio_queue *req_queue;
@@ -4777,12 +4751,6 @@
 
 	inc_seq_no = (fsf_req->qtcb != NULL);
 
-	/* figure out expiration time of timeout and start timeout */
-	if (unlikely(timer)) {
-		timer->expires += jiffies;
-		add_timer(timer);
-	}
-
 	ZFCP_LOG_TRACE("request queue of adapter %s: "
 		       "next free SBAL is %i, %i free SBALs\n",
 		       zfcp_get_busid_by_adapter(adapter),
@@ -4819,12 +4787,7 @@
 	if (unlikely(retval)) {
 		/* Queues are down..... */
 		retval = -EIO;
-		/*
-		 * FIXME(potential race):
-		 * timer might be expired (absolutely unlikely)
-		 */
-		if (timer)
-			del_timer(timer);
+		del_timer(&fsf_req->timer);
 		spin_lock(&adapter->req_list_lock);
 		zfcp_reqlist_remove(adapter, fsf_req->req_id);
 		spin_unlock(&adapter->req_list_lock);
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 1bb5508..7cafa34 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -39,11 +39,10 @@
 
 static struct device_attribute *zfcp_sysfs_sdev_attrs[];
 
-struct scsi_transport_template *zfcp_transport_template;
-
 struct zfcp_data zfcp_data = {
 	.scsi_host_template = {
 		.name			= ZFCP_NAME,
+		.module			= THIS_MODULE,
 		.proc_name		= "zfcp",
 		.slave_alloc		= zfcp_scsi_slave_alloc,
 		.slave_configure	= zfcp_scsi_slave_configure,
@@ -232,7 +231,7 @@
  */
 int
 zfcp_scsi_command_async(struct zfcp_adapter *adapter, struct zfcp_unit *unit,
-			struct scsi_cmnd *scpnt, struct timer_list *timer)
+			struct scsi_cmnd *scpnt, int use_timer)
 {
 	int tmp;
 	int retval;
@@ -268,7 +267,7 @@
 		goto out;
 	}
 
-	tmp = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, timer,
+	tmp = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, use_timer,
 					     ZFCP_REQ_AUTO_CLEANUP);
 
 	if (unlikely(tmp < 0)) {
@@ -292,21 +291,22 @@
  * zfcp_scsi_command_sync - send a SCSI command and wait for completion
  * @unit: unit where command is sent to
  * @scpnt: scsi command to be sent
- * @timer: timer to be started if request is successfully initiated
+ * @use_timer: indicates whether timer should be setup or not
  * Return: 0
  *
  * Errors are indicated in scpnt->result
  */
 int
 zfcp_scsi_command_sync(struct zfcp_unit *unit, struct scsi_cmnd *scpnt,
-		       struct timer_list *timer)
+		       int use_timer)
 {
 	int ret;
 	DECLARE_COMPLETION(wait);
 
 	scpnt->SCp.ptr = (void *) &wait;  /* silent re-use */
 	scpnt->scsi_done = zfcp_scsi_command_sync_handler;
-	ret = zfcp_scsi_command_async(unit->port->adapter, unit, scpnt, timer);
+	ret = zfcp_scsi_command_async(unit->port->adapter, unit, scpnt,
+				      use_timer);
 	if (ret == 0)
 		wait_for_completion(&wait);
 
@@ -342,7 +342,7 @@
 	adapter = (struct zfcp_adapter *) scpnt->device->host->hostdata[0];
 	unit = (struct zfcp_unit *) scpnt->device->hostdata;
 
-	return zfcp_scsi_command_async(adapter, unit, scpnt, NULL);
+	return zfcp_scsi_command_async(adapter, unit, scpnt, 0);
 }
 
 static struct zfcp_unit *
@@ -379,16 +379,15 @@
  * will handle late commands.  (Usually, the normal completion of late
  * commands is ignored with respect to the running abort operation.)
  */
-int
-zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
+int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 {
  	struct Scsi_Host *scsi_host;
  	struct zfcp_adapter *adapter;
 	struct zfcp_unit *unit;
-	int retval = SUCCESS;
-	struct zfcp_fsf_req *new_fsf_req = NULL;
-	struct zfcp_fsf_req *old_fsf_req;
+	struct zfcp_fsf_req *fsf_req;
 	unsigned long flags;
+	unsigned long old_req_id;
+	int retval = SUCCESS;
 
 	scsi_host = scpnt->device->host;
 	adapter = (struct zfcp_adapter *) scsi_host->hostdata[0];
@@ -400,55 +399,47 @@
 	/* avoid race condition between late normal completion and abort */
 	write_lock_irqsave(&adapter->abort_lock, flags);
 
-	/*
-	 * Check whether command has just completed and can not be aborted.
-	 * Even if the command has just been completed late, we can access
-	 * scpnt since the SCSI stack does not release it at least until
-	 * this routine returns. (scpnt is parameter passed to this routine
-	 * and must not disappear during abort even on late completion.)
-	 */
-	old_fsf_req = (struct zfcp_fsf_req *) scpnt->host_scribble;
-	if (!old_fsf_req) {
+	/* Check whether corresponding fsf_req is still pending */
+	spin_lock(&adapter->req_list_lock);
+	fsf_req = zfcp_reqlist_ismember(adapter, (unsigned long)
+					scpnt->host_scribble);
+	spin_unlock(&adapter->req_list_lock);
+	if (!fsf_req) {
 		write_unlock_irqrestore(&adapter->abort_lock, flags);
-		zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, NULL);
+		zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, 0);
 		retval = SUCCESS;
 		goto out;
 	}
-	old_fsf_req->data = 0;
-	old_fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING;
+	fsf_req->data = 0;
+	fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING;
+	old_req_id = fsf_req->req_id;
 
-	/* don't access old_fsf_req after releasing the abort_lock */
+	/* don't access old fsf_req after releasing the abort_lock */
 	write_unlock_irqrestore(&adapter->abort_lock, flags);
-	/* call FSF routine which does the abort */
-	new_fsf_req = zfcp_fsf_abort_fcp_command((unsigned long) old_fsf_req,
-						 adapter, unit, 0);
-	if (!new_fsf_req) {
+
+	fsf_req = zfcp_fsf_abort_fcp_command(old_req_id, adapter, unit, 0);
+	if (!fsf_req) {
 		ZFCP_LOG_INFO("error: initiation of Abort FCP Cmnd failed\n");
 		zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL,
-					  old_fsf_req);
+					  old_req_id);
 		retval = FAILED;
 		goto out;
 	}
 
-	/* wait for completion of abort */
-	__wait_event(new_fsf_req->completion_wq,
-		     new_fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
+	__wait_event(fsf_req->completion_wq,
+		     fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
 
-	/* status should be valid since signals were not permitted */
-	if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
-		zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, new_fsf_req,
-					  NULL);
+	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
+		zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, fsf_req, 0);
 		retval = SUCCESS;
-	} else if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
-		zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, new_fsf_req,
-					  NULL);
+	} else if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
+		zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, fsf_req, 0);
 		retval = SUCCESS;
 	} else {
-		zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, new_fsf_req,
-					  NULL);
+		zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, fsf_req, 0);
 		retval = FAILED;
 	}
-	zfcp_fsf_req_free(new_fsf_req);
+	zfcp_fsf_req_free(fsf_req);
  out:
 	return retval;
 }
@@ -548,14 +539,11 @@
 
 /**
  * zfcp_scsi_eh_host_reset_handler - handler for host and bus reset
- *
- * If ERP is already running it will be stopped.
  */
 int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 {
 	struct zfcp_unit *unit;
 	struct zfcp_adapter *adapter;
-	unsigned long flags;
 
 	unit = (struct zfcp_unit*) scpnt->device->hostdata;
 	adapter = unit->port->adapter;
@@ -563,22 +551,8 @@
 	ZFCP_LOG_NORMAL("host/bus reset because of problems with "
 			"unit 0x%016Lx\n", unit->fcp_lun);
 
-	write_lock_irqsave(&adapter->erp_lock, flags);
-	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
-			     &adapter->status)) {
-		zfcp_erp_modify_adapter_status(adapter,
-		       ZFCP_STATUS_COMMON_UNBLOCKED|ZFCP_STATUS_COMMON_OPEN,
-		       ZFCP_CLEAR);
-		zfcp_erp_action_dismiss_adapter(adapter);
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-		zfcp_fsf_req_dismiss_all(adapter);
-		adapter->fsf_req_seq_no = 0;
-		zfcp_erp_adapter_reopen(adapter, 0);
-	} else {
-		write_unlock_irqrestore(&adapter->erp_lock, flags);
-		zfcp_erp_adapter_reopen(adapter, 0);
-		zfcp_erp_wait(adapter);
-	}
+	zfcp_erp_adapter_reopen(adapter, 0);
+	zfcp_erp_wait(adapter);
 
 	return SUCCESS;
 }
@@ -607,7 +581,7 @@
 	adapter->scsi_host->max_channel = 0;
 	adapter->scsi_host->unique_id = unique_id++;	/* FIXME */
 	adapter->scsi_host->max_cmd_len = ZFCP_MAX_SCSI_CMND_LENGTH;
-	adapter->scsi_host->transportt = zfcp_transport_template;
+	adapter->scsi_host->transportt = zfcp_data.scsi_transport_template;
 
 	/*
 	 * save a pointer to our own adapter data structure within
@@ -648,16 +622,6 @@
 	return;
 }
 
-
-void
-zfcp_fsf_start_scsi_er_timer(struct zfcp_adapter *adapter)
-{
-	adapter->scsi_er_timer.function = zfcp_fsf_scsi_er_timeout_handler;
-	adapter->scsi_er_timer.data = (unsigned long) adapter;
-	adapter->scsi_er_timer.expires = jiffies + ZFCP_SCSI_ER_TIMEOUT;
-	add_timer(&adapter->scsi_er_timer);
-}
-
 /*
  * Support functions for FC transport class
  */
diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index d1c1e75..1e788e8 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -11,19 +11,18 @@
 #include <linux/init.h>
 #include <asm/ebcdic.h>
 
-struct sysinfo_1_1_1
-{
+struct sysinfo_1_1_1 {
 	char reserved_0[32];
 	char manufacturer[16];
 	char type[4];
 	char reserved_1[12];
-	char model[16];
+	char model_capacity[16];
 	char sequence[16];
 	char plant[4];
+	char model[16];
 };
 
-struct sysinfo_1_2_1
-{
+struct sysinfo_1_2_1 {
 	char reserved_0[80];
 	char sequence[16];
 	char plant[4];
@@ -31,9 +30,12 @@
 	unsigned short cpu_address;
 };
 
-struct sysinfo_1_2_2
-{
-	char reserved_0[32];
+struct sysinfo_1_2_2 {
+	char format;
+	char reserved_0[1];
+	unsigned short acc_offset;
+	char reserved_1[24];
+	unsigned int secondary_capability;
 	unsigned int capability;
 	unsigned short cpus_total;
 	unsigned short cpus_configured;
@@ -42,8 +44,12 @@
 	unsigned short adjustment[0];
 };
 
-struct sysinfo_2_2_1
-{
+struct sysinfo_1_2_2_extension {
+	unsigned int alt_capability;
+	unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
 	char reserved_0[80];
 	char sequence[16];
 	char plant[4];
@@ -51,15 +57,11 @@
 	unsigned short cpu_address;
 };
 
-struct sysinfo_2_2_2
-{
+struct sysinfo_2_2_2 {
 	char reserved_0[32];
 	unsigned short lpar_number;
 	char reserved_1;
 	unsigned char characteristics;
-	#define LPAR_CHAR_DEDICATED	(1 << 7)
-	#define LPAR_CHAR_SHARED	(1 << 6)
-	#define LPAR_CHAR_LIMITED	(1 << 5)
 	unsigned short cpus_total;
 	unsigned short cpus_configured;
 	unsigned short cpus_standby;
@@ -71,12 +73,14 @@
 	unsigned short cpus_shared;
 };
 
-struct sysinfo_3_2_2
-{
+#define LPAR_CHAR_DEDICATED	(1 << 7)
+#define LPAR_CHAR_SHARED	(1 << 6)
+#define LPAR_CHAR_LIMITED	(1 << 5)
+
+struct sysinfo_3_2_2 {
 	char reserved_0[31];
 	unsigned char count;
-	struct
-	{
+	struct {
 		char reserved_0[4];
 		unsigned short cpus_total;
 		unsigned short cpus_configured;
@@ -90,136 +94,223 @@
 	} vm[8];
 };
 
-union s390_sysinfo
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
 {
-	struct sysinfo_1_1_1 sysinfo_1_1_1;
-	struct sysinfo_1_2_1 sysinfo_1_2_1;
-	struct sysinfo_1_2_2 sysinfo_1_2_2;
-	struct sysinfo_2_2_1 sysinfo_2_2_1;
-	struct sysinfo_2_2_2 sysinfo_2_2_2;
-	struct sysinfo_3_2_2 sysinfo_3_2_2;
-};
+	register int r0 asm("0") = (fc << 28) | sel1;
+	register int r1 asm("1") = sel2;
 
-static inline int stsi (void *sysinfo, 
-                        int fc, int sel1, int sel2)
-{
-	int cc, retv;
-
-#ifndef CONFIG_64BIT
-	__asm__ __volatile__ (	"lr\t0,%2\n"
-				"\tlr\t1,%3\n"
-				"\tstsi\t0(%4)\n"
-				"0:\tipm\t%0\n"
-				"\tsrl\t%0,28\n"
-				"1:lr\t%1,0\n"
-				".section .fixup,\"ax\"\n"
-				"2:\tlhi\t%0,3\n"
-				"\tbras\t1,3f\n"
-				"\t.long 1b\n"
-				"3:\tl\t1,0(1)\n"
-				"\tbr\t1\n"
-				".previous\n"
-				".section __ex_table,\"a\"\n"
-				"\t.align 4\n"
-				"\t.long 0b,2b\n"
-				".previous\n"
-				: "=d" (cc), "=d" (retv)
-				: "d" ((fc << 28) | sel1), "d" (sel2), "a" (sysinfo) 
-				: "cc", "memory", "0", "1" );
-#else
-	__asm__ __volatile__ (	"lr\t0,%2\n"
-				"lr\t1,%3\n"
-				"\tstsi\t0(%4)\n"
-				"0:\tipm\t%0\n"
-				"\tsrl\t%0,28\n"
-				"1:lr\t%1,0\n"
-				".section .fixup,\"ax\"\n"
-				"2:\tlhi\t%0,3\n"
-				"\tjg\t1b\n"
-				".previous\n"
-				".section __ex_table,\"a\"\n"
-				"\t.align 8\n"
-				"\t.quad 0b,2b\n"
-				".previous\n"
-				: "=d" (cc), "=d" (retv)
-				: "d" ((fc << 28) | sel1), "d" (sel2), "a" (sysinfo) 
-				: "cc", "memory", "0", "1" );
-#endif
-
-	return cc? -1 : retv;
+	asm volatile(
+		"   stsi 0(%2)\n"
+		"0: jz   2f\n"
+		"1: lhi  %0,%3\n"
+		"2:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
+		: "cc", "memory" );
+	return r0;
 }
 
-static inline int stsi_0 (void)
+static inline int stsi_0(void)
 {
 	int rc = stsi (NULL, 0, 0, 0);
-	return rc == -1 ? rc : (((unsigned int)rc) >> 28);
+	return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28);
 }
 
-static inline int stsi_1_1_1 (struct sysinfo_1_1_1 *info)
+static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len)
 {
-	int rc = stsi (info, 1, 1, 1);
-	if (rc != -1)
-	{
-		EBCASC (info->manufacturer, sizeof(info->manufacturer));
-		EBCASC (info->type, sizeof(info->type));
-		EBCASC (info->model, sizeof(info->model));
-		EBCASC (info->sequence, sizeof(info->sequence));
-		EBCASC (info->plant, sizeof(info->plant));
+	if (stsi(info, 1, 1, 1) == -ENOSYS)
+		return len;
+
+	EBCASC(info->manufacturer, sizeof(info->manufacturer));
+	EBCASC(info->type, sizeof(info->type));
+	EBCASC(info->model, sizeof(info->model));
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	EBCASC(info->model_capacity, sizeof(info->model_capacity));
+	len += sprintf(page + len, "Manufacturer:         %-16.16s\n",
+		       info->manufacturer);
+	len += sprintf(page + len, "Type:                 %-4.4s\n",
+		       info->type);
+	if (info->model[0] != '\0')
+		/*
+		 * Sigh: the model field has been renamed with System z9
+		 * to model_capacity and a new model field has been added
+		 * after the plant field. To avoid confusing older programs
+		 * the "Model:" prints "model_capacity model" or just
+		 * "model_capacity" if the model string is empty .
+		 */
+		len += sprintf(page + len,
+			       "Model:                %-16.16s %-16.16s\n",
+			       info->model_capacity, info->model);
+	else
+		len += sprintf(page + len, "Model:                %-16.16s\n",
+			       info->model_capacity);
+	len += sprintf(page + len, "Sequence Code:        %-16.16s\n",
+		       info->sequence);
+	len += sprintf(page + len, "Plant:                %-4.4s\n",
+		       info->plant);
+	len += sprintf(page + len, "Model Capacity:       %-16.16s\n",
+		       info->model_capacity);
+	return len;
+}
+
+#if 0 /* Currently unused */
+static int stsi_1_2_1(struct sysinfo_1_2_1 *info, char *page, int len)
+{
+	if (stsi(info, 1, 2, 1) == -ENOSYS)
+		return len;
+
+	len += sprintf(page + len, "\n");
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	len += sprintf(page + len, "Sequence Code of CPU: %-16.16s\n",
+		       info->sequence);
+	len += sprintf(page + len, "Plant of CPU:         %-16.16s\n",
+		       info->plant);
+	return len;
+}
+#endif
+
+static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len)
+{
+	struct sysinfo_1_2_2_extension *ext;
+	int i;
+
+	if (stsi(info, 1, 2, 2) == -ENOSYS)
+		return len;
+	ext = (struct sysinfo_1_2_2_extension *)
+		((unsigned long) info + info->acc_offset);
+
+	len += sprintf(page + len, "\n");
+	len += sprintf(page + len, "CPUs Total:           %d\n",
+		       info->cpus_total);
+	len += sprintf(page + len, "CPUs Configured:      %d\n",
+		       info->cpus_configured);
+	len += sprintf(page + len, "CPUs Standby:         %d\n",
+		       info->cpus_standby);
+	len += sprintf(page + len, "CPUs Reserved:        %d\n",
+		       info->cpus_reserved);
+
+	if (info->format == 1) {
+		/*
+		 * Sigh 2. According to the specification the alternate
+		 * capability field is a 32 bit floating point number
+		 * if the higher order 8 bits are not zero. Printing
+		 * a floating point number in the kernel is a no-no,
+		 * always print the number as 32 bit unsigned integer.
+		 * The user-space needs to know about the stange
+		 * encoding of the alternate cpu capability.
+		 */
+		len += sprintf(page + len, "Capability:           %u %u\n",
+			       info->capability, ext->alt_capability);
+		for (i = 2; i <= info->cpus_total; i++)
+			len += sprintf(page + len,
+				       "Adjustment %02d-way:    %u %u\n",
+				       i, info->adjustment[i-2],
+				       ext->alt_adjustment[i-2]);
+
+	} else {
+		len += sprintf(page + len, "Capability:           %u\n",
+			       info->capability);
+		for (i = 2; i <= info->cpus_total; i++)
+			len += sprintf(page + len,
+				       "Adjustment %02d-way:    %u\n",
+				       i, info->adjustment[i-2]);
 	}
-	return rc == -1 ? rc : 0;
+
+	if (info->secondary_capability != 0)
+		len += sprintf(page + len, "Secondary Capability: %d\n",
+			       info->secondary_capability);
+
+	return len;
 }
 
-static inline int stsi_1_2_1 (struct sysinfo_1_2_1 *info)
+#if 0 /* Currently unused */
+static int stsi_2_2_1(struct sysinfo_2_2_1 *info, char *page, int len)
 {
-	int rc = stsi (info, 1, 2, 1);
-	if (rc != -1)
-	{
-		EBCASC (info->sequence, sizeof(info->sequence));
-		EBCASC (info->plant, sizeof(info->plant));
+	if (stsi(info, 2, 2, 1) == -ENOSYS)
+		return len;
+
+	len += sprintf(page + len, "\n");
+	EBCASC (info->sequence, sizeof(info->sequence));
+	EBCASC (info->plant, sizeof(info->plant));
+	len += sprintf(page + len, "Sequence Code of logical CPU: %-16.16s\n",
+		       info->sequence);
+	len += sprintf(page + len, "Plant of logical CPU: %-16.16s\n",
+		       info->plant);
+	return len;
+}
+#endif
+
+static int stsi_2_2_2(struct sysinfo_2_2_2 *info, char *page, int len)
+{
+	if (stsi(info, 2, 2, 2) == -ENOSYS)
+		return len;
+
+	EBCASC (info->name, sizeof(info->name));
+
+	len += sprintf(page + len, "\n");
+	len += sprintf(page + len, "LPAR Number:          %d\n",
+		       info->lpar_number);
+
+	len += sprintf(page + len, "LPAR Characteristics: ");
+	if (info->characteristics & LPAR_CHAR_DEDICATED)
+		len += sprintf(page + len, "Dedicated ");
+	if (info->characteristics & LPAR_CHAR_SHARED)
+		len += sprintf(page + len, "Shared ");
+	if (info->characteristics & LPAR_CHAR_LIMITED)
+		len += sprintf(page + len, "Limited ");
+	len += sprintf(page + len, "\n");
+
+	len += sprintf(page + len, "LPAR Name:            %-8.8s\n",
+		       info->name);
+
+	len += sprintf(page + len, "LPAR Adjustment:      %d\n",
+		       info->caf);
+
+	len += sprintf(page + len, "LPAR CPUs Total:      %d\n",
+		       info->cpus_total);
+	len += sprintf(page + len, "LPAR CPUs Configured: %d\n",
+		       info->cpus_configured);
+	len += sprintf(page + len, "LPAR CPUs Standby:    %d\n",
+		       info->cpus_standby);
+	len += sprintf(page + len, "LPAR CPUs Reserved:   %d\n",
+		       info->cpus_reserved);
+	len += sprintf(page + len, "LPAR CPUs Dedicated:  %d\n",
+		       info->cpus_dedicated);
+	len += sprintf(page + len, "LPAR CPUs Shared:     %d\n",
+		       info->cpus_shared);
+	return len;
+}
+
+static int stsi_3_2_2(struct sysinfo_3_2_2 *info, char *page, int len)
+{
+	int i;
+
+	if (stsi(info, 3, 2, 2) == -ENOSYS)
+		return len;
+	for (i = 0; i < info->count; i++) {
+		EBCASC (info->vm[i].name, sizeof(info->vm[i].name));
+		EBCASC (info->vm[i].cpi, sizeof(info->vm[i].cpi));
+		len += sprintf(page + len, "\n");
+		len += sprintf(page + len, "VM%02d Name:            %-8.8s\n",
+			       i, info->vm[i].name);
+		len += sprintf(page + len, "VM%02d Control Program: %-16.16s\n",
+			       i, info->vm[i].cpi);
+
+		len += sprintf(page + len, "VM%02d Adjustment:      %d\n",
+			       i, info->vm[i].caf);
+
+		len += sprintf(page + len, "VM%02d CPUs Total:      %d\n",
+			       i, info->vm[i].cpus_total);
+		len += sprintf(page + len, "VM%02d CPUs Configured: %d\n",
+			       i, info->vm[i].cpus_configured);
+		len += sprintf(page + len, "VM%02d CPUs Standby:    %d\n",
+			       i, info->vm[i].cpus_standby);
+		len += sprintf(page + len, "VM%02d CPUs Reserved:   %d\n",
+			       i, info->vm[i].cpus_reserved);
 	}
-	return rc == -1 ? rc : 0;
-}
-
-static inline int stsi_1_2_2 (struct sysinfo_1_2_2 *info)
-{
-	int rc = stsi (info, 1, 2, 2);
-	return rc == -1 ? rc : 0;
-}
-
-static inline int stsi_2_2_1 (struct sysinfo_2_2_1 *info)
-{
-	int rc = stsi (info, 2, 2, 1);
-	if (rc != -1)
-	{
-		EBCASC (info->sequence, sizeof(info->sequence));
-		EBCASC (info->plant, sizeof(info->plant));
-	}
-	return rc == -1 ? rc : 0;
-}
-
-static inline int stsi_2_2_2 (struct sysinfo_2_2_2 *info)
-{
-	int rc = stsi (info, 2, 2, 2);
-	if (rc != -1)
-	{
-		EBCASC (info->name, sizeof(info->name));
-  	}
-	return rc == -1 ? rc : 0;
-}
-
-static inline int stsi_3_2_2 (struct sysinfo_3_2_2 *info)
-{
-	int rc = stsi (info, 3, 2, 2);
-	if (rc != -1)
-	{
-		int i;
-		for (i = 0; i < info->count; i++)
-		{
-			EBCASC (info->vm[i].name, sizeof(info->vm[i].name));
-			EBCASC (info->vm[i].cpi, sizeof(info->vm[i].cpi));
-		}
-	}
-	return rc == -1 ? rc : 0;
+	return len;
 }
 
 
@@ -227,118 +318,34 @@
                              off_t off, int count,
                              int *eof, void *data)
 {
-	unsigned long info_page = get_zeroed_page (GFP_KERNEL); 
-	union s390_sysinfo *info = (union s390_sysinfo *) info_page;
-	int len = 0;
-	int level;
-	int i;
+	unsigned long info = get_zeroed_page (GFP_KERNEL);
+	int level, len;
 	
 	if (!info)
 		return 0;
 
-	level = stsi_0 ();
+	len = 0;
+	level = stsi_0();
+	if (level >= 1)
+		len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len);
 
-	if (level >= 1 && stsi_1_1_1 (&info->sysinfo_1_1_1) == 0)
-	{
-		len += sprintf (page+len, "Manufacturer:         %-16.16s\n",
-				info->sysinfo_1_1_1.manufacturer);
-		len += sprintf (page+len, "Type:                 %-4.4s\n",
-				info->sysinfo_1_1_1.type);
-		len += sprintf (page+len, "Model:                %-16.16s\n",
-				info->sysinfo_1_1_1.model);
-		len += sprintf (page+len, "Sequence Code:        %-16.16s\n",
-				info->sysinfo_1_1_1.sequence);
-		len += sprintf (page+len, "Plant:                %-4.4s\n",
-				info->sysinfo_1_1_1.plant);
-	}
+	if (level >= 1)
+		len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len);
 
-	if (level >= 1 && stsi_1_2_2 (&info->sysinfo_1_2_2) == 0)
-	{
-		len += sprintf (page+len, "\n");
-		len += sprintf (page+len, "CPUs Total:           %d\n",
-				info->sysinfo_1_2_2.cpus_total);
-		len += sprintf (page+len, "CPUs Configured:      %d\n",
-				info->sysinfo_1_2_2.cpus_configured);
-		len += sprintf (page+len, "CPUs Standby:         %d\n",
-				info->sysinfo_1_2_2.cpus_standby);
-		len += sprintf (page+len, "CPUs Reserved:        %d\n",
-				info->sysinfo_1_2_2.cpus_reserved);
-	
-		len += sprintf (page+len, "Capability:           %d\n",
-				info->sysinfo_1_2_2.capability);
+	if (level >= 2)
+		len = stsi_2_2_2((struct sysinfo_2_2_2 *) info, page, len);
 
-		for (i = 2; i <= info->sysinfo_1_2_2.cpus_total; i++)
-			len += sprintf (page+len, "Adjustment %02d-way:    %d\n",
-					i, info->sysinfo_1_2_2.adjustment[i-2]);
-	}
+	if (level >= 3)
+		len = stsi_3_2_2((struct sysinfo_3_2_2 *) info, page, len);
 
-	if (level >= 2 && stsi_2_2_2 (&info->sysinfo_2_2_2) == 0)
-	{
-		len += sprintf (page+len, "\n");
-		len += sprintf (page+len, "LPAR Number:          %d\n",
-				info->sysinfo_2_2_2.lpar_number);
-
-		len += sprintf (page+len, "LPAR Characteristics: ");
-		if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_DEDICATED)
-			len += sprintf (page+len, "Dedicated ");
-		if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_SHARED)
-			len += sprintf (page+len, "Shared ");
-		if (info->sysinfo_2_2_2.characteristics & LPAR_CHAR_LIMITED)
-			len += sprintf (page+len, "Limited ");
-		len += sprintf (page+len, "\n");
-	
-		len += sprintf (page+len, "LPAR Name:            %-8.8s\n",
-				info->sysinfo_2_2_2.name);
-	
-		len += sprintf (page+len, "LPAR Adjustment:      %d\n",
-				info->sysinfo_2_2_2.caf);
-	
-		len += sprintf (page+len, "LPAR CPUs Total:      %d\n",
-				info->sysinfo_2_2_2.cpus_total);
-		len += sprintf (page+len, "LPAR CPUs Configured: %d\n",
-				info->sysinfo_2_2_2.cpus_configured);
-		len += sprintf (page+len, "LPAR CPUs Standby:    %d\n",
-				info->sysinfo_2_2_2.cpus_standby);
-		len += sprintf (page+len, "LPAR CPUs Reserved:   %d\n",
-				info->sysinfo_2_2_2.cpus_reserved);
-		len += sprintf (page+len, "LPAR CPUs Dedicated:  %d\n",
-				info->sysinfo_2_2_2.cpus_dedicated);
-		len += sprintf (page+len, "LPAR CPUs Shared:     %d\n",
-				info->sysinfo_2_2_2.cpus_shared);
-	}
-
-	if (level >= 3 && stsi_3_2_2 (&info->sysinfo_3_2_2) == 0)
-	{
-		for (i = 0; i < info->sysinfo_3_2_2.count; i++)
-		{
-			len += sprintf (page+len, "\n");
-			len += sprintf (page+len, "VM%02d Name:            %-8.8s\n",
-					i, info->sysinfo_3_2_2.vm[i].name);
-			len += sprintf (page+len, "VM%02d Control Program: %-16.16s\n",
-					i, info->sysinfo_3_2_2.vm[i].cpi);
-	
-			len += sprintf (page+len, "VM%02d Adjustment:      %d\n",
-					i, info->sysinfo_3_2_2.vm[i].caf);
-	
-			len += sprintf (page+len, "VM%02d CPUs Total:      %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_total);
-			len += sprintf (page+len, "VM%02d CPUs Configured: %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_configured);
-			len += sprintf (page+len, "VM%02d CPUs Standby:    %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_standby);
-			len += sprintf (page+len, "VM%02d CPUs Reserved:   %d\n",
-					i, info->sysinfo_3_2_2.vm[i].cpus_reserved);
-		}
-	}
-
-	free_page (info_page);
+	free_page (info);
         return len;
 }
 
 static __init int create_proc_sysinfo(void)
 {
-	create_proc_read_entry ("sysinfo", 0444, NULL, 
-				proc_read_sysinfo, NULL);
+	create_proc_read_entry("sysinfo", 0444, NULL,
+			       proc_read_sysinfo, NULL);
 	return 0;
 }
 
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 16a12a3..4ea49fd 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -662,7 +662,7 @@
 	   particular standard ISA I/O Address need not be probed.
 	 */
 	PrimaryProbeInfo->IO_Address = 0;
-	while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, PCI_Device)) != NULL) {
+	while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, PCI_Device)) != NULL) {
 		struct BusLogic_HostAdapter *HostAdapter = PrototypeHostAdapter;
 		struct BusLogic_PCIHostAdapterInformation PCIHostAdapterInformation;
 		enum BusLogic_ISACompatibleIOPort ModifyIOAddressRequest;
@@ -762,7 +762,7 @@
 			PrimaryProbeInfo->Bus = Bus;
 			PrimaryProbeInfo->Device = Device;
 			PrimaryProbeInfo->IRQ_Channel = IRQ_Channel;
-			PrimaryProbeInfo->PCI_Device = PCI_Device;
+			PrimaryProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 			PCIMultiMasterCount++;
 		} else if (BusLogic_ProbeInfoCount < BusLogic_MaxHostAdapters) {
 			struct BusLogic_ProbeInfo *ProbeInfo = &BusLogic_ProbeInfoList[BusLogic_ProbeInfoCount++];
@@ -773,7 +773,7 @@
 			ProbeInfo->Bus = Bus;
 			ProbeInfo->Device = Device;
 			ProbeInfo->IRQ_Channel = IRQ_Channel;
-			ProbeInfo->PCI_Device = PCI_Device;
+			ProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 			NonPrimaryPCIMultiMasterCount++;
 			PCIMultiMasterCount++;
 		} else
@@ -823,7 +823,7 @@
 	   noting the PCI bus location and assigned IRQ Channel.
 	 */
 	PCI_Device = NULL;
-	while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC, PCI_Device)) != NULL) {
+	while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC, PCI_Device)) != NULL) {
 		unsigned char Bus;
 		unsigned char Device;
 		unsigned int IRQ_Channel;
@@ -850,7 +850,7 @@
 				ProbeInfo->Bus = Bus;
 				ProbeInfo->Device = Device;
 				ProbeInfo->IRQ_Channel = IRQ_Channel;
-				ProbeInfo->PCI_Device = PCI_Device;
+				ProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 				break;
 			}
 		}
@@ -874,7 +874,7 @@
 	/*
 	   Interrogate PCI Configuration Space for any FlashPoint Host Adapters.
 	 */
-	while ((PCI_Device = pci_find_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT, PCI_Device)) != NULL) {
+	while ((PCI_Device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT, PCI_Device)) != NULL) {
 		unsigned char Bus;
 		unsigned char Device;
 		unsigned int IRQ_Channel;
@@ -923,7 +923,7 @@
 			ProbeInfo->Bus = Bus;
 			ProbeInfo->Device = Device;
 			ProbeInfo->IRQ_Channel = IRQ_Channel;
-			ProbeInfo->PCI_Device = PCI_Device;
+			ProbeInfo->PCI_Device = pci_dev_get(PCI_Device);
 			FlashPointCount++;
 		} else
 			BusLogic_Warning("BusLogic: Too many Host Adapters " "detected\n", NULL);
@@ -1890,6 +1890,7 @@
 	 */
 	if (HostAdapter->MailboxSpace)
 		pci_free_consistent(HostAdapter->PCI_Device, HostAdapter->MailboxSize, HostAdapter->MailboxSpace, HostAdapter->MailboxSpaceHandle);
+	pci_dev_put(HostAdapter->PCI_Device);
 	HostAdapter->MailboxSpace = NULL;
 	HostAdapter->MailboxSpaceHandle = 0;
 	HostAdapter->MailboxSize = 0;
@@ -2176,6 +2177,7 @@
 {
 	int BusLogicHostAdapterCount = 0, DriverOptionsIndex = 0, ProbeIndex;
 	struct BusLogic_HostAdapter *PrototypeHostAdapter;
+	int ret = 0;
 
 #ifdef MODULE
 	if (BusLogic)
@@ -2282,25 +2284,49 @@
 		   perform Target Device Inquiry.
 		 */
 		if (BusLogic_ReadHostAdapterConfiguration(HostAdapter) &&
-		    BusLogic_ReportHostAdapterConfiguration(HostAdapter) && BusLogic_AcquireResources(HostAdapter) && BusLogic_CreateInitialCCBs(HostAdapter) && BusLogic_InitializeHostAdapter(HostAdapter) && BusLogic_TargetDeviceInquiry(HostAdapter)) {
+		    BusLogic_ReportHostAdapterConfiguration(HostAdapter) &&
+		    BusLogic_AcquireResources(HostAdapter) &&
+		    BusLogic_CreateInitialCCBs(HostAdapter) &&
+		    BusLogic_InitializeHostAdapter(HostAdapter) &&
+		    BusLogic_TargetDeviceInquiry(HostAdapter)) {
 			/*
 			   Initialization has been completed successfully.  Release and
 			   re-register usage of the I/O Address range so that the Model
 			   Name of the Host Adapter will appear, and initialize the SCSI
 			   Host structure.
 			 */
-			release_region(HostAdapter->IO_Address, HostAdapter->AddressCount);
-			if (!request_region(HostAdapter->IO_Address, HostAdapter->AddressCount, HostAdapter->FullModelName)) {
-				printk(KERN_WARNING "BusLogic: Release and re-register of " "port 0x%04lx failed \n", (unsigned long) HostAdapter->IO_Address);
+			release_region(HostAdapter->IO_Address,
+				       HostAdapter->AddressCount);
+			if (!request_region(HostAdapter->IO_Address,
+					    HostAdapter->AddressCount,
+					    HostAdapter->FullModelName)) {
+				printk(KERN_WARNING
+					"BusLogic: Release and re-register of "
+					"port 0x%04lx failed \n",
+					(unsigned long)HostAdapter->IO_Address);
 				BusLogic_DestroyCCBs(HostAdapter);
 				BusLogic_ReleaseResources(HostAdapter);
 				list_del(&HostAdapter->host_list);
 				scsi_host_put(Host);
+				ret = -ENOMEM;
 			} else {
-				BusLogic_InitializeHostStructure(HostAdapter, Host);
-				scsi_add_host(Host, HostAdapter->PCI_Device ? &HostAdapter->PCI_Device->dev : NULL);
-				scsi_scan_host(Host);
-				BusLogicHostAdapterCount++;
+				BusLogic_InitializeHostStructure(HostAdapter,
+								 Host);
+				if (scsi_add_host(Host, HostAdapter->PCI_Device
+						? &HostAdapter->PCI_Device->dev
+						  : NULL)) {
+					printk(KERN_WARNING
+					       "BusLogic: scsi_add_host()"
+					       "failed!\n");
+					BusLogic_DestroyCCBs(HostAdapter);
+					BusLogic_ReleaseResources(HostAdapter);
+					list_del(&HostAdapter->host_list);
+					scsi_host_put(Host);
+					ret = -ENODEV;
+				} else {
+					scsi_scan_host(Host);
+					BusLogicHostAdapterCount++;
+				}
 			}
 		} else {
 			/*
@@ -2315,12 +2341,13 @@
 			BusLogic_ReleaseResources(HostAdapter);
 			list_del(&HostAdapter->host_list);
 			scsi_host_put(Host);
+			ret = -ENODEV;
 		}
 	}
 	kfree(PrototypeHostAdapter);
 	kfree(BusLogic_ProbeInfoList);
 	BusLogic_ProbeInfoList = NULL;
-	return 0;
+	return ret;
 }
 
 
@@ -2954,6 +2981,7 @@
 }
 
 
+#if 0
 /*
   BusLogic_AbortCommand aborts Command if possible.
 */
@@ -3024,6 +3052,7 @@
 	return SUCCESS;
 }
 
+#endif
 /*
   BusLogic_ResetHostAdapter resets Host Adapter if possible, marking all
   currently executing SCSI Commands as having been Reset.
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 2df4d15..c4dfcc9 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -27,6 +27,11 @@
 	  However, do not compile this as a module if your root file system
 	  (the one containing the directory /) is located on a SCSI device.
 
+config SCSI_NETLINK
+	bool
+	default	n
+	select NET
+
 config SCSI_PROC_FS
 	bool "legacy /proc/scsi/ support"
 	depends on SCSI && PROC_FS
@@ -209,7 +214,7 @@
 	  there should be no noticeable performance impact as long as you have
 	  logging turned off.
 
-menu "SCSI Transport Attributes"
+menu "SCSI Transports"
 	depends on SCSI
 
 config SCSI_SPI_ATTRS
@@ -222,6 +227,7 @@
 config SCSI_FC_ATTRS
 	tristate "FiberChannel Transport Attributes"
 	depends on SCSI
+	select SCSI_NETLINK
 	help
 	  If you wish to export transport-specific information about
 	  each attached FiberChannel device to sysfs, say Y.
@@ -242,6 +248,8 @@
 	  If you wish to export transport-specific information about
 	  each attached SAS device to sysfs, say Y.
 
+source "drivers/scsi/libsas/Kconfig"
+
 endmenu
 
 menu "SCSI low-level drivers"
@@ -431,6 +439,7 @@
 	  module will be called aic7xxx_old.
 
 source "drivers/scsi/aic7xxx/Kconfig.aic79xx"
+source "drivers/scsi/aic94xx/Kconfig"
 
 # All the I2O code and drivers do not seem to be 64bit safe.
 config SCSI_DPT_I2O
@@ -469,6 +478,20 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called in2000.
 
+config SCSI_ARCMSR
+	tristate "ARECA ARC11X0[PCI-X]/ARC12X0[PCI-EXPRESS] SATA-RAID support"
+	depends on PCI && SCSI
+	help
+	  This driver supports all of ARECA's SATA RAID controller cards.
+	  This is an ARECA-maintained driver by Erich Chen.
+	  If you have any problems, please mail to: < erich@areca.com.tw >
+	  Areca supports Linux RAID config tools.
+
+	  < http://www.areca.com.tw >
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called arcmsr (modprobe arcmsr).
+
 source "drivers/scsi/megaraid/Kconfig.megaraid"
 
 config SCSI_HPTIOP
@@ -915,6 +938,13 @@
 	depends on SCSI_LASI700
 	default y
 
+config SCSI_STEX
+	tristate "Promise SuperTrak EX Series support"
+	depends on PCI && SCSI
+	---help---
+	  This driver supports Promise SuperTrak EX8350/8300/16350/16300
+	  Storage controllers.
+
 config SCSI_SYM53C8XX_2
 	tristate "SYM53C8XX Version 2 SCSI support"
 	depends on PCI && SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index b678f95..1ef951b 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -32,6 +32,7 @@
 obj-$(CONFIG_SCSI_FC_ATTRS) 	+= scsi_transport_fc.o
 obj-$(CONFIG_SCSI_ISCSI_ATTRS)	+= scsi_transport_iscsi.o
 obj-$(CONFIG_SCSI_SAS_ATTRS)	+= scsi_transport_sas.o
+obj-$(CONFIG_SCSI_SAS_LIBSAS)	+= libsas/
 
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
@@ -59,6 +60,7 @@
 obj-$(CONFIG_SCSI_BUSLOGIC)	+= BusLogic.o
 obj-$(CONFIG_SCSI_DPT_I2O)	+= dpt_i2o.o
 obj-$(CONFIG_SCSI_U14_34F)	+= u14-34f.o
+obj-$(CONFIG_SCSI_ARCMSR)	+= arcmsr/
 obj-$(CONFIG_SCSI_ULTRASTOR)	+= ultrastor.o
 obj-$(CONFIG_SCSI_AHA152X)	+= aha152x.o
 obj-$(CONFIG_SCSI_AHA1542)	+= aha1542.o
@@ -67,6 +69,7 @@
 obj-$(CONFIG_SCSI_AIC79XX)	+= aic7xxx/
 obj-$(CONFIG_SCSI_AACRAID)	+= aacraid/
 obj-$(CONFIG_SCSI_AIC7XXX_OLD)	+= aic7xxx_old.o
+obj-$(CONFIG_SCSI_AIC94XX)	+= aic94xx/
 obj-$(CONFIG_SCSI_IPS)		+= ips.o
 obj-$(CONFIG_SCSI_FD_MCS)	+= fd_mcs.o
 obj-$(CONFIG_SCSI_FUTURE_DOMAIN)+= fdomain.o
@@ -123,6 +126,7 @@
 obj-$(CONFIG_SCSI_IPR)		+= ipr.o
 obj-$(CONFIG_SCSI_IBMVSCSI)	+= ibmvscsi/
 obj-$(CONFIG_SCSI_HPTIOP)	+= hptiop.o
+obj-$(CONFIG_SCSI_STEX)		+= stex.o
 
 obj-$(CONFIG_ARM)		+= arm/
 
@@ -140,6 +144,7 @@
 				   scsicam.o scsi_error.o scsi_lib.o \
 				   scsi_scan.o scsi_sysfs.o \
 				   scsi_devinfo.o
+scsi_mod-$(CONFIG_SCSI_NETLINK)	+= scsi_netlink.o
 scsi_mod-$(CONFIG_SYSCTL)	+= scsi_sysctl.o
 scsi_mod-$(CONFIG_SCSI_PROC_FS)	+= scsi_proc.o
 
diff --git a/drivers/scsi/a2091.c b/drivers/scsi/a2091.c
index fddfa2e..0854069 100644
--- a/drivers/scsi/a2091.c
+++ b/drivers/scsi/a2091.c
@@ -40,7 +40,7 @@
     return IRQ_HANDLED;
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned short cntr = CNTR_PDMD | CNTR_INTEN;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -115,7 +115,7 @@
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt, 
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 		      int status)
 {
     /* disable SCSI interrupts */
@@ -217,7 +217,7 @@
     return num_a2091;
 }
 
-static int a2091_bus_reset(Scsi_Cmnd *cmd)
+static int a2091_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/a2091.h b/drivers/scsi/a2091.h
index 22d6a13..fe809bc 100644
--- a/drivers/scsi/a2091.h
+++ b/drivers/scsi/a2091.h
@@ -13,10 +13,6 @@
 
 int a2091_detect(struct scsi_host_template *);
 int a2091_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/a3000.c b/drivers/scsi/a3000.c
index ae9ab4b..7bf46d4 100644
--- a/drivers/scsi/a3000.c
+++ b/drivers/scsi/a3000.c
@@ -44,7 +44,7 @@
 	return IRQ_NONE;
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned short cntr = CNTR_PDMD | CNTR_INTEN;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -110,8 +110,8 @@
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
-		      int status)
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
+		     int status)
 {
     /* disable SCSI interrupts */
     unsigned short cntr = CNTR_PDMD;
@@ -205,7 +205,7 @@
     return 0;
 }
 
-static int a3000_bus_reset(Scsi_Cmnd *cmd)
+static int a3000_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 	
diff --git a/drivers/scsi/a3000.h b/drivers/scsi/a3000.h
index 5535a65..44a4ec7 100644
--- a/drivers/scsi/a3000.h
+++ b/drivers/scsi/a3000.h
@@ -13,10 +13,6 @@
 
 int a3000_detect(struct scsi_host_template *);
 int a3000_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 83b5c7d..ac108f9 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -169,13 +169,17 @@
 int acbsize = -1;
 module_param(acbsize, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware.");
+
+int expose_physicals = 0;
+module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. 0=off, 1=on");
 /**
  *	aac_get_config_status	-	check the adapter configuration
  *	@common: adapter to query
  *
  *	Query config status, and commit the configuration if needed.
  */
-int aac_get_config_status(struct aac_dev *dev)
+int aac_get_config_status(struct aac_dev *dev, int commit_flag)
 {
 	int status = 0;
 	struct fib * fibptr;
@@ -219,7 +223,7 @@
 	aac_fib_complete(fibptr);
 	/* Send a CT_COMMIT_CONFIG to enable discovery of devices */
 	if (status >= 0) {
-		if (commit == 1) {
+		if ((commit == 1) || commit_flag) {
 			struct aac_commit_config * dinfo;
 			aac_fib_init(fibptr);
 			dinfo = (struct aac_commit_config *) fib_data(fibptr);
@@ -489,6 +493,8 @@
 	unsigned instance;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -ENOMEM;
 	instance = dev->scsi_host_ptr->unique_id;
 
 	if (!(fibptr = aac_fib_alloc(dev)))
@@ -782,8 +788,9 @@
 		dev->maximum_num_channels = le32_to_cpu(bus_info->BusCount);
 	}
 
-	tmp = le32_to_cpu(dev->adapter_info.kernelrev);
-	printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n", 
+	if (!dev->in_reset) {
+		tmp = le32_to_cpu(dev->adapter_info.kernelrev);
+		printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n",
 			dev->name, 
 			dev->id,
 			tmp>>24,
@@ -792,20 +799,21 @@
 			le32_to_cpu(dev->adapter_info.kernelbuild),
 			(int)sizeof(dev->supplement_adapter_info.BuildDate),
 			dev->supplement_adapter_info.BuildDate);
-	tmp = le32_to_cpu(dev->adapter_info.monitorrev);
-	printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n", 
+		tmp = le32_to_cpu(dev->adapter_info.monitorrev);
+		printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n",
 			dev->name, dev->id,
 			tmp>>24,(tmp>>16)&0xff,tmp&0xff,
 			le32_to_cpu(dev->adapter_info.monitorbuild));
-	tmp = le32_to_cpu(dev->adapter_info.biosrev);
-	printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n", 
+		tmp = le32_to_cpu(dev->adapter_info.biosrev);
+		printk(KERN_INFO "%s%d: bios %d.%d-%d[%d]\n",
 			dev->name, dev->id,
 			tmp>>24,(tmp>>16)&0xff,tmp&0xff,
 			le32_to_cpu(dev->adapter_info.biosbuild));
-	if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
-		printk(KERN_INFO "%s%d: serial %x\n",
-			dev->name, dev->id,
-			le32_to_cpu(dev->adapter_info.serial[0]));
+		if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
+			printk(KERN_INFO "%s%d: serial %x\n",
+				dev->name, dev->id,
+				le32_to_cpu(dev->adapter_info.serial[0]));
+	}
 
 	dev->nondasd_support = 0;
 	dev->raid_scsi_mode = 0;
@@ -1392,6 +1400,7 @@
 	struct scsi_cmnd *cmd;
 	struct scsi_device *sdev = scsicmd->device;
 	int active = 0;
+	struct aac_dev *aac;
 	unsigned long flags;
 
 	/*
@@ -1413,11 +1422,14 @@
 	if (active)
 		return SCSI_MLQUEUE_DEVICE_BUSY;
 
+	aac = (struct aac_dev *)scsicmd->device->host->hostdata;
+	if (aac->in_reset)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 	/*
 	 *	Allocate and initialize a Fib
 	 */
-	if (!(cmd_fibcontext = 
-	    aac_fib_alloc((struct aac_dev *)scsicmd->device->host->hostdata)))
+	if (!(cmd_fibcontext = aac_fib_alloc(aac)))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
 	aac_fib_init(cmd_fibcontext);
@@ -1470,6 +1482,8 @@
 	struct aac_dev *dev = (struct aac_dev *)host->hostdata;
 	struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev;
 	
+	if (fsa_dev_ptr == NULL)
+		return -1;
 	/*
 	 *	If the bus, id or lun is out of range, return fail
 	 *	Test does not apply to ID 16, the pseudo id for the controller
@@ -1499,6 +1513,8 @@
 				case INQUIRY:
 				case READ_CAPACITY:
 				case TEST_UNIT_READY:
+					if (dev->in_reset)
+						return -1;
 					spin_unlock_irq(host->host_lock);
 					aac_probe_container(dev, cid);
 					if ((fsa_dev_ptr[cid].valid & 1) == 0)
@@ -1523,7 +1539,9 @@
 				return 0;
 			}
 		} else {  /* check for physical non-dasd devices */
-			if(dev->nondasd_support == 1){
+			if ((dev->nondasd_support == 1) || expose_physicals) {
+				if (dev->in_reset)
+					return -1;
 				return aac_send_srb_fib(scsicmd);
 			} else {
 				scsicmd->result = DID_NO_CONNECT << 16;
@@ -1579,6 +1597,8 @@
 			scsicmd->scsi_done(scsicmd);
 			return 0;
 		}
+		if (dev->in_reset)
+			return -1;
 		setinqstr(dev, (void *) (inq_data.inqd_vid), fsa_dev_ptr[cid].type);
 		inq_data.inqd_pdt = INQD_PDT_DA;	/* Direct/random access device */
 		aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data));
@@ -1734,6 +1754,8 @@
 		case READ_10:
 		case READ_12:
 		case READ_16:
+			if (dev->in_reset)
+				return -1;
 			/*
 			 *	Hack to keep track of ordinal number of the device that
 			 *	corresponds to a container. Needed to convert
@@ -1752,6 +1774,8 @@
 		case WRITE_10:
 		case WRITE_12:
 		case WRITE_16:
+			if (dev->in_reset)
+				return -1;
 			return aac_write(scsicmd, cid);
 
 		case SYNCHRONIZE_CACHE:
@@ -1782,6 +1806,8 @@
 	struct fsa_dev_info *fsa_dev_ptr;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -EBUSY;
 	if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk)))
 		return -EFAULT;
 	if (qd.cnum == -1)
@@ -1820,6 +1846,8 @@
 	struct fsa_dev_info *fsa_dev_ptr;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -EBUSY;
 
 	if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
 		return -EFAULT;
@@ -1843,6 +1871,8 @@
 	struct fsa_dev_info *fsa_dev_ptr;
 
 	fsa_dev_ptr = dev->fsa_dev;
+	if (!fsa_dev_ptr)
+		return -EBUSY;
 
 	if (copy_from_user(&dd, arg, sizeof (struct aac_delete_disk)))
 		return -EFAULT;
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index d0eecd4..eb3ed91 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -494,6 +494,7 @@
 	int  (*adapter_sync_cmd)(struct aac_dev *dev, u32 command, u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6, u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4);
 	int  (*adapter_check_health)(struct aac_dev *dev);
 	int  (*adapter_send)(struct fib * fib);
+	int  (*adapter_ioremap)(struct aac_dev * dev, u32 size);
 };
 
 /*
@@ -682,14 +683,6 @@
 	__le32	Mailbox[8];
 };
 
-#define	InboundMailbox0		IndexRegs.Mailbox[0]
-#define	InboundMailbox1		IndexRegs.Mailbox[1]
-#define	InboundMailbox2		IndexRegs.Mailbox[2]
-#define	InboundMailbox3		IndexRegs.Mailbox[3]
-#define	InboundMailbox4		IndexRegs.Mailbox[4]
-#define	InboundMailbox5		IndexRegs.Mailbox[5]
-#define	InboundMailbox6		IndexRegs.Mailbox[6]
-
 #define	INBOUNDDOORBELL_0	0x00000001
 #define INBOUNDDOORBELL_1	0x00000002
 #define INBOUNDDOORBELL_2	0x00000004
@@ -1010,6 +1003,8 @@
 		struct rx_registers __iomem *rx;
 		struct rkt_registers __iomem *rkt;
 	} regs;
+	volatile void __iomem *base;
+	volatile struct rx_inbound __iomem *IndexRegs;
 	u32			OIMR; /* Mask Register Cache */
 	/*
 	 *	AIF thread states
@@ -1029,6 +1024,7 @@
 	  init->InitStructRevision==cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4)
 	u8			raw_io_64;
 	u8			printf_enabled;
+	u8			in_reset;
 };
 
 #define aac_adapter_interrupt(dev) \
@@ -1049,6 +1045,9 @@
 #define aac_adapter_send(fib) \
 	((fib)->dev)->a_ops.adapter_send(fib)
 
+#define aac_adapter_ioremap(dev, size) \
+	(dev)->a_ops.adapter_ioremap(dev, size)
+
 #define FIB_CONTEXT_FLAG_TIMED_OUT		(0x00000001)
 
 /*
@@ -1524,7 +1523,6 @@
 	__le32		count;	/* sizeof(((struct aac_get_name_resp *)NULL)->data) */
 };
 
-#define CT_OK        218
 struct aac_get_name_resp {
 	__le32		dummy0;
 	__le32		dummy1;
@@ -1670,6 +1668,7 @@
 #define RCV_TEMP_READINGS		0x00000025
 #define GET_COMM_PREFERRED_SETTINGS	0x00000026
 #define IOP_RESET			0x00001000
+#define IOP_RESET_ALWAYS		0x00001001
 #define RE_INIT_ADAPTER			0x000000ee
 
 /*
@@ -1788,7 +1787,7 @@
 int aac_fib_complete(struct fib * context);
 #define fib_data(fibctx) ((void *)(fibctx)->hw_fib->data)
 struct aac_dev *aac_init_adapter(struct aac_dev *dev);
-int aac_get_config_status(struct aac_dev *dev);
+int aac_get_config_status(struct aac_dev *dev, int commit_flag);
 int aac_get_containers(struct aac_dev *dev);
 int aac_scsi_cmd(struct scsi_cmnd *cmd);
 int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
@@ -1799,6 +1798,7 @@
 unsigned int aac_response_normal(struct aac_queue * q);
 unsigned int aac_command_normal(struct aac_queue * q);
 unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
+int aac_check_health(struct aac_dev * dev);
 int aac_command_thread(void *data);
 int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
 int aac_fib_adapter_complete(struct fib * fibptr, unsigned short size);
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 255421d..da1d3a9 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -38,7 +38,7 @@
 #include <linux/completion.h>
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
+#include <linux/delay.h> /* ssleep prototype */
 #include <linux/kthread.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -140,7 +140,8 @@
 		fibptr->hw_fib_pa = hw_fib_pa;
 		fibptr->hw_fib = hw_fib;
 	}
-	aac_fib_free(fibptr);
+	if (retval != -EINTR)
+		aac_fib_free(fibptr);
 	return retval;
 }
 
@@ -297,7 +298,7 @@
 		spin_unlock_irqrestore(&dev->fib_lock, flags);
 		/* If someone killed the AIF aacraid thread, restart it */
 		status = !dev->aif_thread;
-		if (status && dev->queues && dev->fsa_dev) {
+		if (status && !dev->in_reset && dev->queues && dev->fsa_dev) {
 			/* Be paranoid, be very paranoid! */
 			kthread_stop(dev->thread);
 			ssleep(1);
@@ -621,7 +622,13 @@
 
 		actual_fibsize = sizeof (struct aac_srb) + (((user_srbcmd->sg.count & 0xff) - 1) * sizeof (struct sgentry));
 		if(actual_fibsize != fibsize){ // User made a mistake - should not continue
-			dprintk((KERN_DEBUG"aacraid: Bad Size specified in Raw SRB command\n"));
+			dprintk((KERN_DEBUG"aacraid: Bad Size specified in "
+			  "Raw SRB command calculated fibsize=%d "
+			  "user_srbcmd->sg.count=%d aac_srb=%d sgentry=%d "
+			  "issued fibsize=%d\n",
+			  actual_fibsize, user_srbcmd->sg.count,
+			  sizeof(struct aac_srb), sizeof(struct sgentry),
+			  fibsize));
 			rcode = -EINVAL;
 			goto cleanup;
 		}
@@ -663,6 +670,10 @@
 		psg->count = cpu_to_le32(sg_indx+1);
 		status = aac_fib_send(ScsiPortCommand, srbfib, actual_fibsize, FsaNormal, 1, 1, NULL, NULL);
 	}
+	if (status == -EINTR) {
+		rcode = -EINTR;
+		goto cleanup;
+	}
 
 	if (status != 0){
 		dprintk((KERN_DEBUG"aacraid: Could not send raw srb fib to hba\n")); 
@@ -696,8 +707,10 @@
 	for(i=0; i <= sg_indx; i++){
 		kfree(sg_list[i]);
 	}
-	aac_fib_complete(srbfib);
-	aac_fib_free(srbfib);
+	if (rcode != -EINTR) {
+		aac_fib_complete(srbfib);
+		aac_fib_free(srbfib);
+	}
 
 	return rcode;
 }
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 1cd3584..d5cf8b9 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -180,7 +180,7 @@
 			  -2 /* Timeout silently */, 1,
 			  NULL, NULL);
 
-	if (status == 0)
+	if (status >= 0)
 		aac_fib_complete(fibctx);
 	aac_fib_free(fibctx);
 	return status;
@@ -307,17 +307,12 @@
 		if (status[1] & AAC_OPT_NEW_COMM)
 			dev->new_comm_interface = dev->a_ops.adapter_send != 0;
 		if (dev->new_comm_interface && (status[2] > dev->base_size)) {
-			iounmap(dev->regs.sa);
+			aac_adapter_ioremap(dev, 0);
 			dev->base_size = status[2];
-			dprintk((KERN_DEBUG "ioremap(%lx,%d)\n",
-			  host->base, status[2]));
-			dev->regs.sa = ioremap(host->base, status[2]);
-			if (dev->regs.sa == NULL) {
+			if (aac_adapter_ioremap(dev, status[2])) {
 				/* remap failed, go back ... */
 				dev->new_comm_interface = 0;
-				dev->regs.sa = ioremap(host->base, 
-						AAC_MIN_FOOTPRINT_SIZE);
-				if (dev->regs.sa == NULL) {	
+				if (aac_adapter_ioremap(dev, AAC_MIN_FOOTPRINT_SIZE)) {
 					printk(KERN_WARNING
 					  "aacraid: unable to map adapter.\n");
 					return NULL;
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 3f27419..8734a04 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -40,8 +40,10 @@
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
 #include <asm/semaphore.h>
 
 #include "aacraid.h"
@@ -464,6 +466,8 @@
 	dprintk((KERN_DEBUG "  hw_fib pa being sent=%lx\n",(ulong)fibptr->hw_fib_pa));
 	dprintk((KERN_DEBUG "  fib being sent=%p\n",fibptr));
 
+	if (!dev->queues)
+		return -EBUSY;
 	q = &dev->queues->queue[AdapNormCmdQueue];
 
 	if(wait)
@@ -527,8 +531,15 @@
 				}
 				udelay(5);
 			}
-		} else
-			down(&fibptr->event_wait);
+		} else if (down_interruptible(&fibptr->event_wait)) {
+			spin_lock_irqsave(&fibptr->event_lock, flags);
+			if (fibptr->done == 0) {
+				fibptr->done = 2; /* Tell interrupt we aborted */
+				spin_unlock_irqrestore(&fibptr->event_lock, flags);
+				return -EINTR;
+			}
+			spin_unlock_irqrestore(&fibptr->event_lock, flags);
+		}
 		BUG_ON(fibptr->done == 0);
 			
 		if((fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT)){
@@ -795,7 +806,7 @@
 
 	/* Sniff for container changes */
 
-	if (!dev)
+	if (!dev || !dev->fsa_dev)
 		return;
 	container = (u32)-1;
 
@@ -1022,13 +1033,7 @@
 	if (device) {
 		switch (device_config_needed) {
 		case DELETE:
-			scsi_remove_device(device);
-			break;
 		case CHANGE:
-			if (!dev->fsa_dev[container].valid) {
-				scsi_remove_device(device);
-				break;
-			}
 			scsi_rescan_device(&device->sdev_gendev);
 
 		default:
@@ -1045,6 +1050,262 @@
 
 }
 
+static int _aac_reset_adapter(struct aac_dev *aac)
+{
+	int index, quirks;
+	u32 ret;
+	int retval;
+	struct Scsi_Host *host;
+	struct scsi_device *dev;
+	struct scsi_cmnd *command;
+	struct scsi_cmnd *command_list;
+
+	/*
+	 * Assumptions:
+	 *	- host is locked.
+	 *	- in_reset is asserted, so no new i/o is getting to the
+	 *	  card.
+	 *	- The card is dead.
+	 */
+	host = aac->scsi_host_ptr;
+	scsi_block_requests(host);
+	aac_adapter_disable_int(aac);
+	spin_unlock_irq(host->host_lock);
+	kthread_stop(aac->thread);
+
+	/*
+	 *	If a positive health, means in a known DEAD PANIC
+	 * state and the adapter could be reset to `try again'.
+	 */
+	retval = aac_adapter_check_health(aac);
+	if (retval == 0)
+		retval = aac_adapter_sync_cmd(aac, IOP_RESET_ALWAYS,
+		  0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL);
+	if (retval)
+		retval = aac_adapter_sync_cmd(aac, IOP_RESET,
+		  0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL);
+
+	if (retval)
+		goto out;
+	if (ret != 0x00000001) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	index = aac->cardtype;
+
+	/*
+	 * Re-initialize the adapter, first free resources, then carefully
+	 * apply the initialization sequence to come back again. Only risk
+	 * is a change in Firmware dropping cache, it is assumed the caller
+	 * will ensure that i/o is queisced and the card is flushed in that
+	 * case.
+	 */
+	aac_fib_map_free(aac);
+	aac->hw_fib_va = NULL;
+	aac->hw_fib_pa = 0;
+	pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
+	aac->comm_addr = NULL;
+	aac->comm_phys = 0;
+	kfree(aac->queues);
+	aac->queues = NULL;
+	free_irq(aac->pdev->irq, aac);
+	kfree(aac->fsa_dev);
+	aac->fsa_dev = NULL;
+	if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) {
+		if (((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) ||
+		  ((retval = pci_set_consistent_dma_mask(aac->pdev, DMA_32BIT_MASK))))
+			goto out;
+	} else {
+		if (((retval = pci_set_dma_mask(aac->pdev, 0x7FFFFFFFULL))) ||
+		  ((retval = pci_set_consistent_dma_mask(aac->pdev, 0x7FFFFFFFULL))))
+			goto out;
+	}
+	if ((retval = (*(aac_get_driver_ident(index)->init))(aac)))
+		goto out;
+	if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
+		if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
+			goto out;
+	aac->thread = kthread_run(aac_command_thread, aac, aac->name);
+	if (IS_ERR(aac->thread)) {
+		retval = PTR_ERR(aac->thread);
+		goto out;
+	}
+	(void)aac_get_adapter_info(aac);
+	quirks = aac_get_driver_ident(index)->quirks;
+	if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) {
+ 		host->sg_tablesize = 34;
+ 		host->max_sectors = (host->sg_tablesize * 8) + 112;
+ 	}
+ 	if ((quirks & AAC_QUIRK_17SG) && (host->sg_tablesize > 17)) {
+ 		host->sg_tablesize = 17;
+ 		host->max_sectors = (host->sg_tablesize * 8) + 112;
+ 	}
+	aac_get_config_status(aac, 1);
+	aac_get_containers(aac);
+	/*
+	 * This is where the assumption that the Adapter is quiesced
+	 * is important.
+	 */
+	command_list = NULL;
+	__shost_for_each_device(dev, host) {
+		unsigned long flags;
+		spin_lock_irqsave(&dev->list_lock, flags);
+		list_for_each_entry(command, &dev->cmd_list, list)
+			if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
+				command->SCp.buffer = (struct scatterlist *)command_list;
+				command_list = command;
+			}
+		spin_unlock_irqrestore(&dev->list_lock, flags);
+	}
+	while ((command = command_list)) {
+		command_list = (struct scsi_cmnd *)command->SCp.buffer;
+		command->SCp.buffer = NULL;
+		command->result = DID_OK << 16
+		  | COMMAND_COMPLETE << 8
+		  | SAM_STAT_TASK_SET_FULL;
+		command->SCp.phase = AAC_OWNER_ERROR_HANDLER;
+		command->scsi_done(command);
+	}
+	retval = 0;
+
+out:
+	aac->in_reset = 0;
+	scsi_unblock_requests(host);
+	spin_lock_irq(host->host_lock);
+	return retval;
+}
+
+int aac_check_health(struct aac_dev * aac)
+{
+	int BlinkLED;
+	unsigned long time_now, flagv = 0;
+	struct list_head * entry;
+	struct Scsi_Host * host;
+
+	/* Extending the scope of fib_lock slightly to protect aac->in_reset */
+	if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
+		return 0;
+
+	if (aac->in_reset || !(BlinkLED = aac_adapter_check_health(aac))) {
+		spin_unlock_irqrestore(&aac->fib_lock, flagv);
+		return 0; /* OK */
+	}
+
+	aac->in_reset = 1;
+
+	/* Fake up an AIF:
+	 *	aac_aifcmd.command = AifCmdEventNotify = 1
+	 *	aac_aifcmd.seqnum = 0xFFFFFFFF
+	 *	aac_aifcmd.data[0] = AifEnExpEvent = 23
+	 *	aac_aifcmd.data[1] = AifExeFirmwarePanic = 3
+	 *	aac.aifcmd.data[2] = AifHighPriority = 3
+	 *	aac.aifcmd.data[3] = BlinkLED
+	 */
+
+	time_now = jiffies/HZ;
+	entry = aac->fib_list.next;
+
+	/*
+	 * For each Context that is on the
+	 * fibctxList, make a copy of the
+	 * fib, and then set the event to wake up the
+	 * thread that is waiting for it.
+	 */
+	while (entry != &aac->fib_list) {
+		/*
+		 * Extract the fibctx
+		 */
+		struct aac_fib_context *fibctx = list_entry(entry, struct aac_fib_context, next);
+		struct hw_fib * hw_fib;
+		struct fib * fib;
+		/*
+		 * Check if the queue is getting
+		 * backlogged
+		 */
+		if (fibctx->count > 20) {
+			/*
+			 * It's *not* jiffies folks,
+			 * but jiffies / HZ, so do not
+			 * panic ...
+			 */
+			u32 time_last = fibctx->jiffies;
+			/*
+			 * Has it been > 2 minutes
+			 * since the last read off
+			 * the queue?
+			 */
+			if ((time_now - time_last) > aif_timeout) {
+				entry = entry->next;
+				aac_close_fib_context(aac, fibctx);
+				continue;
+			}
+		}
+		/*
+		 * Warning: no sleep allowed while
+		 * holding spinlock
+		 */
+		hw_fib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
+		fib = kmalloc(sizeof(struct fib), GFP_ATOMIC);
+		if (fib && hw_fib) {
+			struct aac_aifcmd * aif;
+
+			memset(hw_fib, 0, sizeof(struct hw_fib));
+			memset(fib, 0, sizeof(struct fib));
+			fib->hw_fib = hw_fib;
+			fib->dev = aac;
+			aac_fib_init(fib);
+			fib->type = FSAFS_NTC_FIB_CONTEXT;
+			fib->size = sizeof (struct fib);
+			fib->data = hw_fib->data;
+			aif = (struct aac_aifcmd *)hw_fib->data;
+			aif->command = cpu_to_le32(AifCmdEventNotify);
+		 	aif->seqnum = cpu_to_le32(0xFFFFFFFF);
+		 	aif->data[0] = cpu_to_le32(AifEnExpEvent);
+			aif->data[1] = cpu_to_le32(AifExeFirmwarePanic);
+		 	aif->data[2] = cpu_to_le32(AifHighPriority);
+			aif->data[3] = cpu_to_le32(BlinkLED);
+
+			/*
+			 * Put the FIB onto the
+			 * fibctx's fibs
+			 */
+			list_add_tail(&fib->fiblink, &fibctx->fib_list);
+			fibctx->count++;
+			/*
+			 * Set the event to wake up the
+			 * thread that will waiting.
+			 */
+			up(&fibctx->wait_sem);
+		} else {
+			printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
+			kfree(fib);
+			kfree(hw_fib);
+		}
+		entry = entry->next;
+	}
+
+	spin_unlock_irqrestore(&aac->fib_lock, flagv);
+
+	if (BlinkLED < 0) {
+		printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED);
+		goto out;
+	}
+
+	printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
+
+	host = aac->scsi_host_ptr;
+	spin_lock_irqsave(host->host_lock, flagv);
+	BlinkLED = _aac_reset_adapter(aac);
+	spin_unlock_irqrestore(host->host_lock, flagv);
+	return BlinkLED;
+
+out:
+	aac->in_reset = 0;
+	return BlinkLED;
+}
+
+
 /**
  *	aac_command_thread	-	command processing thread
  *	@dev: Adapter to monitor
diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c
index b2a5c72..8335f07 100644
--- a/drivers/scsi/aacraid/dpcsup.c
+++ b/drivers/scsi/aacraid/dpcsup.c
@@ -124,10 +124,15 @@
 		} else {
 			unsigned long flagv;
 			spin_lock_irqsave(&fib->event_lock, flagv);
-			fib->done = 1;
+			if (!fib->done)
+				fib->done = 1;
 			up(&fib->event_wait);
 			spin_unlock_irqrestore(&fib->event_lock, flagv);
 			FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
+			if (fib->done == 2) {
+				aac_fib_complete(fib);
+				aac_fib_free(fib);
+			}
 		}
 		consumed++;
 		spin_lock_irqsave(q->lock, flags);
@@ -316,7 +321,8 @@
 			unsigned long flagv;
 	  		dprintk((KERN_INFO "event_wait up\n"));
 			spin_lock_irqsave(&fib->event_lock, flagv);
-			fib->done = 1;
+			if (!fib->done)
+				fib->done = 1;
 			up(&fib->event_wait);
 			spin_unlock_irqrestore(&fib->event_lock, flagv);
 			FIB_COUNTER_INCREMENT(aac_config.NormalRecved);
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index e42a479..359e7dd 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -82,6 +82,8 @@
 static int aac_cfg_major = -1;
 char aac_driver_version[] = AAC_DRIVER_FULL_VERSION;
 
+extern int expose_physicals;
+
 /*
  * Because of the way Linux names scsi devices, the order in this table has
  * become important.  Check for on-board Raid first, add-in cards second.
@@ -394,6 +396,7 @@
 		sdev->skip_ms_page_3f = 1;
 	}
 	if ((sdev->type == TYPE_DISK) &&
+			!expose_physicals &&
 			(sdev_channel(sdev) != CONTAINER_CHANNEL)) {
 		struct aac_dev *aac = (struct aac_dev *)sdev->host->hostdata;
 		if (!aac->raid_scsi_mode || (sdev_channel(sdev) != 2))
@@ -454,17 +457,17 @@
 	printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n", 
 					AAC_DRIVERNAME);
 	aac = (struct aac_dev *)host->hostdata;
-	if (aac_adapter_check_health(aac)) {
-		printk(KERN_ERR "%s: Host adapter appears dead\n", 
-				AAC_DRIVERNAME);
-		return -ENODEV;
-	}
+
+	if ((count = aac_check_health(aac)))
+		return count;
 	/*
 	 * Wait for all commands to complete to this specific
 	 * target (block maximum 60 seconds).
 	 */
 	for (count = 60; count; --count) {
-		int active = 0;
+		int active = aac->in_reset;
+
+		if (active == 0)
 		__shost_for_each_device(dev, host) {
 			spin_lock_irqsave(&dev->list_lock, flags);
 			list_for_each_entry(command, &dev->cmd_list, list) {
@@ -864,13 +867,6 @@
 	 *	Map in the registers from the adapter.
 	 */
 	aac->base_size = AAC_MIN_FOOTPRINT_SIZE;
-	if ((aac->regs.sa = ioremap(
-	  (unsigned long)aac->scsi_host_ptr->base, AAC_MIN_FOOTPRINT_SIZE))
-	  == NULL) {	
-		printk(KERN_WARNING "%s: unable to map adapter.\n",
-		  AAC_DRIVERNAME);
-		goto out_free_fibs;
-	}
 	if ((*aac_drivers[index].init)(aac))
 		goto out_unmap;
 
@@ -928,12 +924,12 @@
 	 * all containers are on the virtual channel 0 (CONTAINER_CHANNEL)
 	 * physical channels are address by their actual physical number+1
 	 */
-	if (aac->nondasd_support == 1)
+	if ((aac->nondasd_support == 1) || expose_physicals)
 		shost->max_channel = aac->maximum_num_channels;
 	else
 		shost->max_channel = 0;
 
-	aac_get_config_status(aac);
+	aac_get_config_status(aac, 0);
 	aac_get_containers(aac);
 	list_add(&aac->entry, insert);
 
@@ -969,8 +965,7 @@
 	aac_fib_map_free(aac);
 	pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
 	kfree(aac->queues);
-	iounmap(aac->regs.sa);
- out_free_fibs:
+	aac_adapter_ioremap(aac, 0);
 	kfree(aac->fibs);
 	kfree(aac->fsa_dev);
  out_free_host:
@@ -1005,7 +1000,7 @@
 	kfree(aac->queues);
 
 	free_irq(pdev->irq, aac);
-	iounmap(aac->regs.sa);
+	aac_adapter_ioremap(aac, 0);
 	
 	kfree(aac->fibs);
 	kfree(aac->fsa_dev);
@@ -1013,6 +1008,10 @@
 	list_del(&aac->entry);
 	scsi_host_put(shost);
 	pci_disable_device(pdev);
+	if (list_empty(&aac_devices)) {
+		unregister_chrdev(aac_cfg_major, "aac");
+		aac_cfg_major = -1;
+	}
 }
 
 static struct pci_driver aac_pci_driver = {
diff --git a/drivers/scsi/aacraid/rkt.c b/drivers/scsi/aacraid/rkt.c
index 458ea89..643f23b 100644
--- a/drivers/scsi/aacraid/rkt.c
+++ b/drivers/scsi/aacraid/rkt.c
@@ -28,370 +28,27 @@
  *
  */
 
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
 #include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/completion.h>
-#include <linux/time.h>
-#include <linux/interrupt.h>
-#include <asm/semaphore.h>
 
 #include <scsi/scsi_host.h>
 
 #include "aacraid.h"
 
-static irqreturn_t aac_rkt_intr(int irq, void *dev_id, struct pt_regs *regs)
+/**
+ *	aac_rkt_ioremap
+ *	@size: mapping resize request
+ *
+ */
+static int aac_rkt_ioremap(struct aac_dev * dev, u32 size)
 {
-	struct aac_dev *dev = dev_id;
-
-	if (dev->new_comm_interface) {
-		u32 Index = rkt_readl(dev, MUnit.OutboundQueue);
-		if (Index == 0xFFFFFFFFL)
-			Index = rkt_readl(dev, MUnit.OutboundQueue);
-		if (Index != 0xFFFFFFFFL) {
-			do {
-				if (aac_intr_normal(dev, Index)) {
-					rkt_writel(dev, MUnit.OutboundQueue, Index);
-					rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespReady);
-				}
-				Index = rkt_readl(dev, MUnit.OutboundQueue);
-			} while (Index != 0xFFFFFFFFL);
-			return IRQ_HANDLED;
-		}
-	} else {
-		unsigned long bellbits;
-		u8 intstat;
-		intstat = rkt_readb(dev, MUnit.OISR);
-		/*
-		 *	Read mask and invert because drawbridge is reversed.
-		 *	This allows us to only service interrupts that have 
-		 *	been enabled.
-		 *	Check to see if this is our interrupt.  If it isn't just return
-		 */
-		if (intstat & ~(dev->OIMR))
-		{
-			bellbits = rkt_readl(dev, OutboundDoorbellReg);
-			if (bellbits & DoorBellPrintfReady) {
-				aac_printf(dev, rkt_readl (dev, IndexRegs.Mailbox[5]));
-				rkt_writel(dev, MUnit.ODR,DoorBellPrintfReady);
-				rkt_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
-			}
-			else if (bellbits & DoorBellAdapterNormCmdReady) {
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
-				aac_command_normal(&dev->queues->queue[HostNormCmdQueue]);
-//				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdReady);
-			}
-			else if (bellbits & DoorBellAdapterNormRespReady) {
-				rkt_writel(dev, MUnit.ODR,DoorBellAdapterNormRespReady);
-				aac_response_normal(&dev->queues->queue[HostNormRespQueue]);
-			}
-			else if (bellbits & DoorBellAdapterNormCmdNotFull) {
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
-			}
-			else if (bellbits & DoorBellAdapterNormRespNotFull) {
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormCmdNotFull);
-				rkt_writel(dev, MUnit.ODR, DoorBellAdapterNormRespNotFull);
-			}
-			return IRQ_HANDLED;
-		}
+	if (!size) {
+		iounmap(dev->regs.rkt);
+		return 0;
 	}
-	return IRQ_NONE;
-}
-
-/**
- *	aac_rkt_disable_interrupt	-	Disable interrupts
- *	@dev: Adapter
- */
-
-static void aac_rkt_disable_interrupt(struct aac_dev *dev)
-{
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
-}
-
-/**
- *	rkt_sync_cmd	-	send a command and wait
- *	@dev: Adapter
- *	@command: Command to execute
- *	@p1: first parameter
- *	@ret: adapter status
- *
- *	This routine will send a synchronous command to the adapter and wait 
- *	for its	completion.
- */
-
-static int rkt_sync_cmd(struct aac_dev *dev, u32 command,
-	u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6,
-	u32 *status, u32 *r1, u32 *r2, u32 *r3, u32 *r4)
-{
-	unsigned long start;
-	int ok;
-	/*
-	 *	Write the command into Mailbox 0
-	 */
-	rkt_writel(dev, InboundMailbox0, command);
-	/*
-	 *	Write the parameters into Mailboxes 1 - 6
-	 */
-	rkt_writel(dev, InboundMailbox1, p1);
-	rkt_writel(dev, InboundMailbox2, p2);
-	rkt_writel(dev, InboundMailbox3, p3);
-	rkt_writel(dev, InboundMailbox4, p4);
-	/*
-	 *	Clear the synch command doorbell to start on a clean slate.
-	 */
-	rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
-	/*
-	 *	Disable doorbell interrupts
-	 */
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
-	/*
-	 *	Force the completion of the mask register write before issuing
-	 *	the interrupt.
-	 */
-	rkt_readb (dev, MUnit.OIMR);
-	/*
-	 *	Signal that there is a new synch command
-	 */
-	rkt_writel(dev, InboundDoorbellReg, INBOUNDDOORBELL_0);
-
-	ok = 0;
-	start = jiffies;
-
-	/*
-	 *	Wait up to 30 seconds
-	 */
-	while (time_before(jiffies, start+30*HZ)) 
-	{
-		udelay(5);	/* Delay 5 microseconds to let Mon960 get info. */
-		/*
-		 *	Mon960 will set doorbell0 bit when it has completed the command.
-		 */
-		if (rkt_readl(dev, OutboundDoorbellReg) & OUTBOUNDDOORBELL_0) {
-			/*
-			 *	Clear the doorbell.
-			 */
-			rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
-			ok = 1;
-			break;
-		}
-		/*
-		 *	Yield the processor in case we are slow 
-		 */
-		msleep(1);
-	}
-	if (ok != 1) {
-		/*
-		 *	Restore interrupt mask even though we timed out
-		 */
-		if (dev->new_comm_interface)
-			rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
-		else
-			rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
-		return -ETIMEDOUT;
-	}
-	/*
-	 *	Pull the synch status from Mailbox 0.
-	 */
-	if (status)
-		*status = rkt_readl(dev, IndexRegs.Mailbox[0]);
-	if (r1)
-		*r1 = rkt_readl(dev, IndexRegs.Mailbox[1]);
-	if (r2)
-		*r2 = rkt_readl(dev, IndexRegs.Mailbox[2]);
-	if (r3)
-		*r3 = rkt_readl(dev, IndexRegs.Mailbox[3]);
-	if (r4)
-		*r4 = rkt_readl(dev, IndexRegs.Mailbox[4]);
-	/*
-	 *	Clear the synch command doorbell.
-	 */
-	rkt_writel(dev, OutboundDoorbellReg, OUTBOUNDDOORBELL_0);
-	/*
-	 *	Restore interrupt mask
-	 */
-	if (dev->new_comm_interface)
-		rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
-	else
-		rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
-	return 0;
-
-}
-
-/**
- *	aac_rkt_interrupt_adapter	-	interrupt adapter
- *	@dev: Adapter
- *
- *	Send an interrupt to the i960 and breakpoint it.
- */
-
-static void aac_rkt_interrupt_adapter(struct aac_dev *dev)
-{
-	rkt_sync_cmd(dev, BREAKPOINT_REQUEST, 0, 0, 0, 0, 0, 0,
-	  NULL, NULL, NULL, NULL, NULL);
-}
-
-/**
- *	aac_rkt_notify_adapter		-	send an event to the adapter
- *	@dev: Adapter
- *	@event: Event to send
- *
- *	Notify the i960 that something it probably cares about has
- *	happened.
- */
-
-static void aac_rkt_notify_adapter(struct aac_dev *dev, u32 event)
-{
-	switch (event) {
-
-	case AdapNormCmdQue:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_1);
-		break;
-	case HostNormRespNotFull:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_4);
-		break;
-	case AdapNormRespQue:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_2);
-		break;
-	case HostNormCmdNotFull:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
-		break;
-	case HostShutdown:
-//		rkt_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
-//		  NULL, NULL, NULL, NULL, NULL);
-		break;
-	case FastIo:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
-		break;
-	case AdapPrintfDone:
-		rkt_writel(dev, MUnit.IDR,INBOUNDDOORBELL_5);
-		break;
-	default:
-		BUG();
-		break;
-	}
-}
-
-/**
- *	aac_rkt_start_adapter		-	activate adapter
- *	@dev:	Adapter
- *
- *	Start up processing on an i960 based AAC adapter
- */
-
-static void aac_rkt_start_adapter(struct aac_dev *dev)
-{
-	struct aac_init *init;
-
-	init = dev->init;
-	init->HostElapsedSeconds = cpu_to_le32(get_seconds());
-	// We can only use a 32 bit address here
-	rkt_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
-	  0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
-}
-
-/**
- *	aac_rkt_check_health
- *	@dev: device to check if healthy
- *
- *	Will attempt to determine if the specified adapter is alive and
- *	capable of handling requests, returning 0 if alive.
- */
-static int aac_rkt_check_health(struct aac_dev *dev)
-{
-	u32 status = rkt_readl(dev, MUnit.OMRx[0]);
-
-	/*
-	 *	Check to see if the board failed any self tests.
-	 */
-	if (status & SELF_TEST_FAILED)
+	dev->base = dev->regs.rkt = ioremap(dev->scsi_host_ptr->base, size);
+	if (dev->base == NULL)
 		return -1;
-	/*
-	 *	Check to see if the board panic'd.
-	 */
-	if (status & KERNEL_PANIC) {
-		char * buffer;
-		struct POSTSTATUS {
-			__le32 Post_Command;
-			__le32 Post_Address;
-		} * post;
-		dma_addr_t paddr, baddr;
-		int ret;
-
-		if ((status & 0xFF000000L) == 0xBC000000L)
-			return (status >> 16) & 0xFF;
-		buffer = pci_alloc_consistent(dev->pdev, 512, &baddr);
-		ret = -2;
-		if (buffer == NULL)
-			return ret;
-		post = pci_alloc_consistent(dev->pdev,
-		  sizeof(struct POSTSTATUS), &paddr);
-		if (post == NULL) {
-			pci_free_consistent(dev->pdev, 512, buffer, baddr);
-			return ret;
-		}
-                memset(buffer, 0, 512);
-		post->Post_Command = cpu_to_le32(COMMAND_POST_RESULTS);
-                post->Post_Address = cpu_to_le32(baddr);
-                rkt_writel(dev, MUnit.IMRx[0], paddr);
-                rkt_sync_cmd(dev, COMMAND_POST_RESULTS, baddr, 0, 0, 0, 0, 0,
-		  NULL, NULL, NULL, NULL, NULL);
-		pci_free_consistent(dev->pdev, sizeof(struct POSTSTATUS),
-		  post, paddr);
-                if ((buffer[0] == '0') && ((buffer[1] == 'x') || (buffer[1] == 'X'))) {
-                        ret = (buffer[2] <= '9') ? (buffer[2] - '0') : (buffer[2] - 'A' + 10);
-                        ret <<= 4;
-                        ret += (buffer[3] <= '9') ? (buffer[3] - '0') : (buffer[3] - 'A' + 10);
-                }
-		pci_free_consistent(dev->pdev, 512, buffer, baddr);
-                return ret;
-        }
-	/*
-	 *	Wait for the adapter to be up and running.
-	 */
-	if (!(status & KERNEL_UP_AND_RUNNING))
-		return -3;
-	/*
-	 *	Everything is OK
-	 */
-	return 0;
-}
-
-/**
- *	aac_rkt_send
- *	@fib: fib to issue
- *
- *	Will send a fib, returning 0 if successful.
- */
-static int aac_rkt_send(struct fib * fib)
-{
-	u64 addr = fib->hw_fib_pa;
-	struct aac_dev *dev = fib->dev;
-	volatile void __iomem *device = dev->regs.rkt;
-	u32 Index;
-
-	dprintk((KERN_DEBUG "%p->aac_rkt_send(%p->%llx)\n", dev, fib, addr));
-	Index = rkt_readl(dev, MUnit.InboundQueue);
-	if (Index == 0xFFFFFFFFL)
-		Index = rkt_readl(dev, MUnit.InboundQueue);
-	dprintk((KERN_DEBUG "Index = 0x%x\n", Index));
-	if (Index == 0xFFFFFFFFL)
-		return Index;
-	device += Index;
-	dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff),
-	  (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size)));
-	writel((u32)(addr & 0xffffffff), device);
-	device += sizeof(u32);
-	writel((u32)(addr >> 32), device);
-	device += sizeof(u32);
-	writel(le16_to_cpu(fib->hw_fib->header.Size), device);
-	rkt_writel(dev, MUnit.InboundQueue, Index);
-	dprintk((KERN_DEBUG "aac_rkt_send - return 0\n"));
+	dev->IndexRegs = &dev->regs.rkt->IndexRegs;
 	return 0;
 }
 
@@ -406,78 +63,18 @@
 
 int aac_rkt_init(struct aac_dev *dev)
 {
-	unsigned long start;
-	unsigned long status;
-	int instance;
-	const char * name;
+	int retval;
+	extern int _aac_rx_init(struct aac_dev *dev);
+	extern void aac_rx_start_adapter(struct aac_dev *dev);
 
-	instance = dev->id;
-	name     = dev->name;
-
-	/*
-	 *	Check to see if the board panic'd while booting.
-	 */
-	/*
-	 *	Check to see if the board failed any self tests.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) {
-		printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance);
-		goto error_iounmap;
-	}
-	/*
-	 *	Check to see if the monitor panic'd while booting.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & MONITOR_PANIC) {
-		printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance);
-		goto error_iounmap;
-	}
-	/*
-	 *	Check to see if the board panic'd while booting.
-	 */
-	if (rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) {
-		printk(KERN_ERR "%s%d: adapter kernel panic'd.\n", dev->name, instance);
-		goto error_iounmap;
-	}
-	start = jiffies;
-	/*
-	 *	Wait for the adapter to be up and running. Wait up to 3 minutes
-	 */
-	while (!(rkt_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING))
-	{
-		if(time_after(jiffies, start+startup_timeout*HZ))
-		{
-			status = rkt_readl(dev, MUnit.OMRx[0]);
-			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", 
-					dev->name, instance, status);
-			goto error_iounmap;
-		}
-		msleep(1);
-	}
-	if (request_irq(dev->scsi_host_ptr->irq, aac_rkt_intr, IRQF_SHARED|IRQF_DISABLED, "aacraid", (void *)dev)<0)
-	{
-		printk(KERN_ERR "%s%d: Interrupt unavailable.\n", name, instance);
-		goto error_iounmap;
-	}
 	/*
 	 *	Fill in the function dispatch table.
 	 */
-	dev->a_ops.adapter_interrupt = aac_rkt_interrupt_adapter;
-	dev->a_ops.adapter_disable_int = aac_rkt_disable_interrupt;
-	dev->a_ops.adapter_notify = aac_rkt_notify_adapter;
-	dev->a_ops.adapter_sync_cmd = rkt_sync_cmd;
-	dev->a_ops.adapter_check_health = aac_rkt_check_health;
-	dev->a_ops.adapter_send = aac_rkt_send;
+	dev->a_ops.adapter_ioremap = aac_rkt_ioremap;
 
-	/*
-	 *	First clear out all interrupts.  Then enable the one's that we
-	 *	can handle.
-	 */
-	rkt_writeb(dev, MUnit.OIMR, 0xff);
-	rkt_writel(dev, MUnit.ODR, 0xffffffff);
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xfb);
-
-	if (aac_init_adapter(dev) == NULL)
-		goto error_irq;
+	retval = _aac_rx_init(dev);
+	if (retval)
+		return retval;
 	if (dev->new_comm_interface) {
 		/*
 		 * FIB Setup has already been done, but we can minimize the
@@ -494,20 +91,11 @@
 			dev->init->MaxIoCommands = cpu_to_le32(246);
 			dev->scsi_host_ptr->can_queue = 246 - AAC_NUM_MGT_FIB;
 		}
-		rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
 	}
 	/*
 	 *	Tell the adapter that all is configured, and it can start
 	 *	accepting requests
 	 */
-	aac_rkt_start_adapter(dev);
+	aac_rx_start_adapter(dev);
 	return 0;
-
-error_irq:
-	rkt_writeb(dev, MUnit.OIMR, dev->OIMR = 0xff);
-	free_irq(dev->scsi_host_ptr->irq, (void *)dev);
-
-error_iounmap:
-
-	return -1;
 }
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 035018d..a1d214d 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -79,7 +79,7 @@
 		{
 			bellbits = rx_readl(dev, OutboundDoorbellReg);
 			if (bellbits & DoorBellPrintfReady) {
-				aac_printf(dev, rx_readl (dev, IndexRegs.Mailbox[5]));
+				aac_printf(dev, readl (&dev->IndexRegs->Mailbox[5]));
 				rx_writel(dev, MUnit.ODR,DoorBellPrintfReady);
 				rx_writel(dev, InboundDoorbellReg,DoorBellPrintfDone);
 			}
@@ -134,14 +134,14 @@
 	/*
 	 *	Write the command into Mailbox 0
 	 */
-	rx_writel(dev, InboundMailbox0, command);
+	writel(command, &dev->IndexRegs->Mailbox[0]);
 	/*
 	 *	Write the parameters into Mailboxes 1 - 6
 	 */
-	rx_writel(dev, InboundMailbox1, p1);
-	rx_writel(dev, InboundMailbox2, p2);
-	rx_writel(dev, InboundMailbox3, p3);
-	rx_writel(dev, InboundMailbox4, p4);
+	writel(p1, &dev->IndexRegs->Mailbox[1]);
+	writel(p2, &dev->IndexRegs->Mailbox[2]);
+	writel(p3, &dev->IndexRegs->Mailbox[3]);
+	writel(p4, &dev->IndexRegs->Mailbox[4]);
 	/*
 	 *	Clear the synch command doorbell to start on a clean slate.
 	 */
@@ -199,15 +199,15 @@
 	 *	Pull the synch status from Mailbox 0.
 	 */
 	if (status)
-		*status = rx_readl(dev, IndexRegs.Mailbox[0]);
+		*status = readl(&dev->IndexRegs->Mailbox[0]);
 	if (r1)
-		*r1 = rx_readl(dev, IndexRegs.Mailbox[1]);
+		*r1 = readl(&dev->IndexRegs->Mailbox[1]);
 	if (r2)
-		*r2 = rx_readl(dev, IndexRegs.Mailbox[2]);
+		*r2 = readl(&dev->IndexRegs->Mailbox[2]);
 	if (r3)
-		*r3 = rx_readl(dev, IndexRegs.Mailbox[3]);
+		*r3 = readl(&dev->IndexRegs->Mailbox[3]);
 	if (r4)
-		*r4 = rx_readl(dev, IndexRegs.Mailbox[4]);
+		*r4 = readl(&dev->IndexRegs->Mailbox[4]);
 	/*
 	 *	Clear the synch command doorbell.
 	 */
@@ -261,8 +261,6 @@
 		rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_3);
 		break;
 	case HostShutdown:
-//		rx_sync_cmd(dev, HOST_CRASHING, 0, 0, 0, 0, 0, 0,
-//		  NULL, NULL, NULL, NULL, NULL);
 		break;
 	case FastIo:
 		rx_writel(dev, MUnit.IDR,INBOUNDDOORBELL_6);
@@ -283,7 +281,7 @@
  *	Start up processing on an i960 based AAC adapter
  */
 
-static void aac_rx_start_adapter(struct aac_dev *dev)
+void aac_rx_start_adapter(struct aac_dev *dev)
 {
 	struct aac_init *init;
 
@@ -381,7 +379,7 @@
 	dprintk((KERN_DEBUG "Index = 0x%x\n", Index));
 	if (Index == 0xFFFFFFFFL)
 		return Index;
-	device += Index;
+	device = dev->base + Index;
 	dprintk((KERN_DEBUG "entry = %x %x %u\n", (u32)(addr & 0xffffffff),
 	  (u32)(addr >> 32), (u32)le16_to_cpu(fib->hw_fib->header.Size)));
 	writel((u32)(addr & 0xffffffff), device);
@@ -395,6 +393,43 @@
 }
 
 /**
+ *	aac_rx_ioremap
+ *	@size: mapping resize request
+ *
+ */
+static int aac_rx_ioremap(struct aac_dev * dev, u32 size)
+{
+	if (!size) {
+		iounmap(dev->regs.rx);
+		return 0;
+	}
+	dev->base = dev->regs.rx = ioremap(dev->scsi_host_ptr->base, size);
+	if (dev->base == NULL)
+		return -1;
+	dev->IndexRegs = &dev->regs.rx->IndexRegs;
+	return 0;
+}
+
+static int aac_rx_restart_adapter(struct aac_dev *dev)
+{
+	u32 var;
+
+	printk(KERN_ERR "%s%d: adapter kernel panic'd.\n",
+			dev->name, dev->id);
+
+	if (aac_rx_check_health(dev) <= 0)
+		return 1;
+	if (rx_sync_cmd(dev, IOP_RESET, 0, 0, 0, 0, 0, 0,
+			&var, NULL, NULL, NULL, NULL))
+		return 1;
+	if (var != 0x00000001)
+		 return 1;
+	if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC)
+		return 1;
+	return 0;
+}
+
+/**
  *	aac_rx_init	-	initialize an i960 based AAC card
  *	@dev: device to configure
  *
@@ -403,7 +438,7 @@
  *	to the comm region.
  */
 
-int aac_rx_init(struct aac_dev *dev)
+int _aac_rx_init(struct aac_dev *dev)
 {
 	unsigned long start;
 	unsigned long status;
@@ -413,27 +448,30 @@
 	instance = dev->id;
 	name     = dev->name;
 
+	if (aac_adapter_ioremap(dev, dev->base_size)) {
+		printk(KERN_WARNING "%s: unable to map adapter.\n", name);
+		goto error_iounmap;
+	}
+
 	/*
 	 *	Check to see if the board panic'd while booting.
 	 */
+	status = rx_readl(dev, MUnit.OMRx[0]);
+	if (status & KERNEL_PANIC)
+		if (aac_rx_restart_adapter(dev))
+			goto error_iounmap;
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & SELF_TEST_FAILED) {
+	status = rx_readl(dev, MUnit.OMRx[0]);
+	if (status & SELF_TEST_FAILED) {
 		printk(KERN_ERR "%s%d: adapter self-test failed.\n", dev->name, instance);
 		goto error_iounmap;
 	}
 	/*
-	 *	Check to see if the board panic'd while booting.
-	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & KERNEL_PANIC) {
-		printk(KERN_ERR "%s%d: adapter kernel panic.\n", dev->name, instance);
-		goto error_iounmap;
-	}
-	/*
 	 *	Check to see if the monitor panic'd while booting.
 	 */
-	if (rx_readl(dev, MUnit.OMRx[0]) & MONITOR_PANIC) {
+	if (status & MONITOR_PANIC) {
 		printk(KERN_ERR "%s%d: adapter monitor panic.\n", dev->name, instance);
 		goto error_iounmap;
 	}
@@ -441,12 +479,10 @@
 	/*
 	 *	Wait for the adapter to be up and running. Wait up to 3 minutes
 	 */
-	while ((!(rx_readl(dev, IndexRegs.Mailbox[7]) & KERNEL_UP_AND_RUNNING))
-		|| (!(rx_readl(dev, MUnit.OMRx[0]) & KERNEL_UP_AND_RUNNING)))
+	while (!((status = rx_readl(dev, MUnit.OMRx[0])) & KERNEL_UP_AND_RUNNING))
 	{
 		if(time_after(jiffies, start+startup_timeout*HZ))
 		{
-			status = rx_readl(dev, IndexRegs.Mailbox[7]);
 			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n", 
 					dev->name, instance, status);
 			goto error_iounmap;
@@ -481,11 +517,6 @@
 	if (dev->new_comm_interface)
 		rx_writeb(dev, MUnit.OIMR, dev->OIMR = 0xf7);
 
-	/*
-	 *	Tell the adapter that all is configured, and it can start
-	 *	accepting requests
-	 */
-	aac_rx_start_adapter(dev);
 	return 0;
 
 error_irq:
@@ -496,3 +527,23 @@
 
 	return -1;
 }
+
+int aac_rx_init(struct aac_dev *dev)
+{
+	int retval;
+
+	/*
+	 *	Fill in the function dispatch table.
+	 */
+	dev->a_ops.adapter_ioremap = aac_rx_ioremap;
+
+	retval = _aac_rx_init(dev);
+	if (!retval) {
+		/*
+		 *	Tell the adapter that all is configured, and it can
+		 * start accepting requests
+		 */
+		aac_rx_start_adapter(dev);
+	}
+	return retval;
+}
diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c
index cd586cc..f906ead 100644
--- a/drivers/scsi/aacraid/sa.c
+++ b/drivers/scsi/aacraid/sa.c
@@ -281,6 +281,21 @@
 }
 
 /**
+ *	aac_sa_ioremap
+ *	@size: mapping resize request
+ *
+ */
+static int aac_sa_ioremap(struct aac_dev * dev, u32 size)
+{
+	if (!size) {
+		iounmap(dev->regs.sa);
+		return 0;
+	}
+	dev->base = dev->regs.sa = ioremap(dev->scsi_host_ptr->base, size);
+	return (dev->base == NULL) ? -1 : 0;
+}
+
+/**
  *	aac_sa_init	-	initialize an ARM based AAC card
  *	@dev: device to configure
  *
@@ -299,6 +314,11 @@
 	instance = dev->id;
 	name     = dev->name;
 
+	if (aac_sa_ioremap(dev, dev->base_size)) {
+		printk(KERN_WARNING "%s: unable to map adapter.\n", name);
+		goto error_iounmap;
+	}
+
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
@@ -341,6 +361,7 @@
 	dev->a_ops.adapter_notify = aac_sa_notify_adapter;
 	dev->a_ops.adapter_sync_cmd = sa_sync_cmd;
 	dev->a_ops.adapter_check_health = aac_sa_check_health;
+	dev->a_ops.adapter_ioremap = aac_sa_ioremap;
 
 	/*
 	 *	First clear out all interrupts.  Then enable the one's that 
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index e32b4ab..773f02e 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -888,10 +888,6 @@
 #define ASC_PCI_ID2DEV(id)    (((id) >> 11) & 0x1F)
 #define ASC_PCI_ID2FUNC(id)   (((id) >> 8) & 0x7)
 #define ASC_PCI_MKID(bus, dev, func) ((((dev) & 0x1F) << 11) | (((func) & 0x7) << 8) | ((bus) & 0xFF))
-#define ASC_PCI_VENDORID                  0x10CD
-#define ASC_PCI_DEVICEID_1200A            0x1100
-#define ASC_PCI_DEVICEID_1200B            0x1200
-#define ASC_PCI_DEVICEID_ULTRA            0x1300
 #define ASC_PCI_REVISION_3150             0x02
 #define ASC_PCI_REVISION_3050             0x03
 
@@ -899,6 +895,14 @@
 #define  ASC_DVCLIB_CALL_FAILED   (0)
 #define  ASC_DVCLIB_CALL_ERROR    (-1)
 
+#define PCI_VENDOR_ID_ASP		0x10cd
+#define PCI_DEVICE_ID_ASP_1200A		0x1100
+#define PCI_DEVICE_ID_ASP_ABP940	0x1200
+#define PCI_DEVICE_ID_ASP_ABP940U	0x1300
+#define PCI_DEVICE_ID_ASP_ABP940UW	0x2300
+#define PCI_DEVICE_ID_38C0800_REV1	0x2500
+#define PCI_DEVICE_ID_38C1600_REV1	0x2700
+
 /*
  * Enable CC_VERY_LONG_SG_LIST to support up to 64K element SG lists.
  * The SRB structure will have to be changed and the ASC_SRB2SCSIQ()
@@ -1492,8 +1496,6 @@
 #define ASC_INIT_STATE_END_INQUIRY   0x0080
 #define ASC_INIT_RESET_SCSI_DONE     0x0100
 #define ASC_INIT_STATE_WITHOUT_EEP   0x8000
-#define ASC_PCI_DEVICE_ID_REV_A      0x1100
-#define ASC_PCI_DEVICE_ID_REV_B      0x1200
 #define ASC_BUG_FIX_IF_NOT_DWB       0x0001
 #define ASC_BUG_FIX_ASYN_USE_SYN     0x0002
 #define ASYN_SDTR_DATA_FIX_PCI_REV_AB 0x41
@@ -2100,12 +2102,6 @@
 #define ADV_NUM_PAGE_CROSSING \
     ((ADV_SG_TOTAL_MEM_SIZE + (ADV_PAGE_SIZE - 1))/ADV_PAGE_SIZE)
 
-/* a_condor.h */
-#define ADV_PCI_VENDOR_ID               0x10CD
-#define ADV_PCI_DEVICE_ID_REV_A         0x2300
-#define ADV_PCI_DEVID_38C0800_REV1      0x2500
-#define ADV_PCI_DEVID_38C1600_REV1      0x2700
-
 #define ADV_EEP_DVC_CFG_BEGIN           (0x00)
 #define ADV_EEP_DVC_CFG_END             (0x15)
 #define ADV_EEP_DVC_CTL_BEGIN           (0x16)  /* location of OEM name */
@@ -3569,14 +3565,7 @@
 #define PCI_MAX_SLOT            0x1F
 #define PCI_MAX_BUS             0xFF
 #define PCI_IOADDRESS_MASK      0xFFFE
-#define ASC_PCI_VENDORID        0x10CD
 #define ASC_PCI_DEVICE_ID_CNT   6       /* PCI Device ID count. */
-#define ASC_PCI_DEVICE_ID_1100  0x1100
-#define ASC_PCI_DEVICE_ID_1200  0x1200
-#define ASC_PCI_DEVICE_ID_1300  0x1300
-#define ASC_PCI_DEVICE_ID_2300  0x2300  /* ASC-3550 */
-#define ASC_PCI_DEVICE_ID_2500  0x2500  /* ASC-38C0800 */
-#define ASC_PCI_DEVICE_ID_2700  0x2700  /* ASC-38C1600 */
 
 #ifndef ADVANSYS_STATS
 #define ASC_STATS(shp, counter)
@@ -4330,12 +4319,12 @@
     struct pci_dev      *pci_devp = NULL;
     int                 pci_device_id_cnt = 0;
     unsigned int        pci_device_id[ASC_PCI_DEVICE_ID_CNT] = {
-                                    ASC_PCI_DEVICE_ID_1100,
-                                    ASC_PCI_DEVICE_ID_1200,
-                                    ASC_PCI_DEVICE_ID_1300,
-                                    ASC_PCI_DEVICE_ID_2300,
-                                    ASC_PCI_DEVICE_ID_2500,
-                                    ASC_PCI_DEVICE_ID_2700
+                                    PCI_DEVICE_ID_ASP_1200A,
+                                    PCI_DEVICE_ID_ASP_ABP940,
+                                    PCI_DEVICE_ID_ASP_ABP940U,
+                                    PCI_DEVICE_ID_ASP_ABP940UW,
+                                    PCI_DEVICE_ID_38C0800_REV1,
+                                    PCI_DEVICE_ID_38C1600_REV1
                         };
     ADV_PADDR           pci_memory_address;
 #endif /* CONFIG_PCI */
@@ -4471,7 +4460,7 @@
 
                     /* Find all PCI cards. */
                     while (pci_device_id_cnt < ASC_PCI_DEVICE_ID_CNT) {
-                        if ((pci_devp = pci_find_device(ASC_PCI_VENDORID,
+                        if ((pci_devp = pci_find_device(PCI_VENDOR_ID_ASP,
                             pci_device_id[pci_device_id_cnt], pci_devp)) ==
                             NULL) {
                             pci_device_id_cnt++;
@@ -4575,9 +4564,9 @@
              */
 #ifdef CONFIG_PCI
             if (asc_bus[bus] == ASC_IS_PCI &&
-                (pci_devp->device == ASC_PCI_DEVICE_ID_2300 ||
-                 pci_devp->device == ASC_PCI_DEVICE_ID_2500 ||
-                 pci_devp->device == ASC_PCI_DEVICE_ID_2700))
+                (pci_devp->device == PCI_DEVICE_ID_ASP_ABP940UW ||
+                 pci_devp->device == PCI_DEVICE_ID_38C0800_REV1 ||
+                 pci_devp->device == PCI_DEVICE_ID_38C1600_REV1))
             {
                 boardp->flags |= ASC_IS_WIDE_BOARD;
             }
@@ -4600,11 +4589,11 @@
                 adv_dvc_varp->isr_callback = adv_isr_callback;
                 adv_dvc_varp->async_callback = adv_async_callback;
 #ifdef CONFIG_PCI
-                if (pci_devp->device == ASC_PCI_DEVICE_ID_2300)
+                if (pci_devp->device == PCI_DEVICE_ID_ASP_ABP940UW)
                 {
                     ASC_DBG(1, "advansys_detect: ASC-3550\n");
                     adv_dvc_varp->chip_type = ADV_CHIP_ASC3550;
-                } else if (pci_devp->device == ASC_PCI_DEVICE_ID_2500)
+                } else if (pci_devp->device == PCI_DEVICE_ID_38C0800_REV1)
                 {
                     ASC_DBG(1, "advansys_detect: ASC-38C0800\n");
                     adv_dvc_varp->chip_type = ADV_CHIP_ASC38C0800;
@@ -11922,7 +11911,7 @@
         PCIRevisionID = DvcReadPCIConfigByte(asc_dvc,
                                     AscPCIConfigRevisionIDRegister);
 
-        if (PCIVendorID != ASC_PCI_VENDORID) {
+        if (PCIVendorID != PCI_VENDOR_ID_ASP) {
             warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE;
         }
         prevCmdRegBits = DvcReadPCIConfigByte(asc_dvc,
@@ -11942,15 +11931,15 @@
                 warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE;
             }
         }
-        if ((PCIDeviceID == ASC_PCI_DEVICEID_1200A) ||
-            (PCIDeviceID == ASC_PCI_DEVICEID_1200B)) {
+        if ((PCIDeviceID == PCI_DEVICE_ID_ASP_1200A) ||
+            (PCIDeviceID == PCI_DEVICE_ID_ASP_ABP940)) {
             DvcWritePCIConfigByte(asc_dvc,
                             AscPCIConfigLatencyTimer, 0x00);
             if (DvcReadPCIConfigByte(asc_dvc, AscPCIConfigLatencyTimer)
                 != 0x00) {
                 warn_code |= ASC_WARN_SET_PCI_CONFIG_SPACE;
             }
-        } else if (PCIDeviceID == ASC_PCI_DEVICEID_ULTRA) {
+        } else if (PCIDeviceID == PCI_DEVICE_ID_ASP_ABP940U) {
             if (DvcReadPCIConfigByte(asc_dvc,
                                 AscPCIConfigLatencyTimer) < 0x20) {
                 DvcWritePCIConfigByte(asc_dvc,
@@ -12037,8 +12026,8 @@
         AscSetChipCfgMsw(iop_base, cfg_msw);
         if ((asc_dvc->bus_type & ASC_IS_PCI_ULTRA) == ASC_IS_PCI_ULTRA) {
         } else {
-            if ((pci_device_id == ASC_PCI_DEVICE_ID_REV_A) ||
-                (pci_device_id == ASC_PCI_DEVICE_ID_REV_B)) {
+            if ((pci_device_id == PCI_DEVICE_ID_ASP_1200A) ||
+                (pci_device_id == PCI_DEVICE_ID_ASP_ABP940)) {
                 asc_dvc->bug_fix_cntl |= ASC_BUG_FIX_IF_NOT_DWB;
                 asc_dvc->bug_fix_cntl |= ASC_BUG_FIX_ASYN_USE_SYN;
             }
@@ -14275,8 +14264,8 @@
     0,                          /* 55 reserved */
     0,                          /* 56 cisptr_lsw */
     0,                          /* 57 cisprt_msw */
-    ADV_PCI_VENDOR_ID,          /* 58 subsysvid */
-    ADV_PCI_DEVID_38C0800_REV1, /* 59 subsysid */
+    PCI_VENDOR_ID_ASP,          /* 58 subsysvid */
+    PCI_DEVICE_ID_38C0800_REV1, /* 59 subsysid */
     0,                          /* 60 reserved */
     0,                          /* 61 reserved */
     0,                          /* 62 reserved */
@@ -14405,8 +14394,8 @@
     0,                          /* 55 reserved */
     0,                          /* 56 cisptr_lsw */
     0,                          /* 57 cisprt_msw */
-    ADV_PCI_VENDOR_ID,          /* 58 subsysvid */
-    ADV_PCI_DEVID_38C1600_REV1, /* 59 subsysid */
+    PCI_VENDOR_ID_ASP,          /* 58 subsysvid */
+    PCI_DEVICE_ID_38C1600_REV1, /* 59 subsysid */
     0,                          /* 60 reserved */
     0,                          /* 61 reserved */
     0,                          /* 62 reserved */
@@ -18225,3 +18214,22 @@
     }
 }
 MODULE_LICENSE("Dual BSD/GPL");
+
+/* PCI Devices supported by this driver */
+static struct pci_device_id advansys_pci_tbl[] __devinitdata = {
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_1200A,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940U,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_ASP_ABP940UW,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_38C0800_REV1,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ASP, PCI_DEVICE_ID_38C1600_REV1,
+	PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, advansys_pci_tbl);
+
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index f974869..fb6a476 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -253,6 +253,7 @@
 #include <linux/isapnp.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/list.h>
 #include <asm/semaphore.h>
 #include <scsi/scsicam.h>
 
@@ -262,6 +263,8 @@
 #include <scsi/scsi_transport_spi.h>
 #include "aha152x.h"
 
+static LIST_HEAD(aha152x_host_list);
+
 
 /* DEFINES */
 
@@ -423,8 +426,6 @@
 
 #endif /* !PCMCIA */
 
-static int registered_count=0;
-static struct Scsi_Host *aha152x_host[2];
 static struct scsi_host_template aha152x_driver_template;
 
 /*
@@ -541,6 +542,7 @@
 #ifdef __ISAPNP__
 	struct pnp_dev *pnpdev;
 #endif
+	struct list_head host_list;
 };
 
 
@@ -755,20 +757,9 @@
 	return ptr;
 }
 
-static inline struct Scsi_Host *lookup_irq(int irqno)
-{
-	int i;
-
-	for(i=0; i<ARRAY_SIZE(aha152x_host); i++)
-		if(aha152x_host[i] && aha152x_host[i]->irq==irqno)
-			return aha152x_host[i];
-
-	return NULL;
-}
-
 static irqreturn_t swintr(int irqno, void *dev_id, struct pt_regs *regs)
 {
-	struct Scsi_Host *shpnt = lookup_irq(irqno);
+	struct Scsi_Host *shpnt = (struct Scsi_Host *)dev_id;
 
 	if (!shpnt) {
         	printk(KERN_ERR "aha152x: catched software interrupt %d for unknown controller.\n", irqno);
@@ -791,10 +782,11 @@
 		return NULL;
 	}
 
-	/* need to have host registered before triggering any interrupt */
-	aha152x_host[registered_count] = shpnt;
-
 	memset(HOSTDATA(shpnt), 0, sizeof *HOSTDATA(shpnt));
+	INIT_LIST_HEAD(&HOSTDATA(shpnt)->host_list);
+
+	/* need to have host registered before triggering any interrupt */
+	list_add_tail(&HOSTDATA(shpnt)->host_list, &aha152x_host_list);
 
 	shpnt->io_port   = setup->io_port;
 	shpnt->n_io_port = IO_RANGE;
@@ -907,12 +899,10 @@
 
 	scsi_scan_host(shpnt);
 
-	registered_count++;
-
 	return shpnt;
 
 out_host_put:
-	aha152x_host[registered_count]=NULL;
+	list_del(&HOSTDATA(shpnt)->host_list);
 	scsi_host_put(shpnt);
 
 	return NULL;
@@ -937,6 +927,7 @@
 #endif
 
 	scsi_remove_host(shpnt);
+	list_del(&HOSTDATA(shpnt)->host_list);
 	scsi_host_put(shpnt);
 }
 
@@ -1459,9 +1450,12 @@
  */
 static void run(void)
 {
-	int i;
-	for (i = 0; i<ARRAY_SIZE(aha152x_host); i++) {
-		is_complete(aha152x_host[i]);
+	struct aha152x_hostdata *hd;
+
+	list_for_each_entry(hd, &aha152x_host_list, host_list) {
+		struct Scsi_Host *shost = container_of((void *)hd, struct Scsi_Host, hostdata);
+
+		is_complete(shost);
 	}
 }
 
@@ -1471,7 +1465,7 @@
  */
 static irqreturn_t intr(int irqno, void *dev_id, struct pt_regs *regs)
 {
-	struct Scsi_Host *shpnt = lookup_irq(irqno);
+	struct Scsi_Host *shpnt = (struct Scsi_Host *)dev_id;
 	unsigned long flags;
 	unsigned char rev, dmacntrl0;
 
@@ -3953,16 +3947,17 @@
 #endif
 	}
 
-	return registered_count>0;
+	return 1;
 }
 
 static void __exit aha152x_exit(void)
 {
-	int i;
+	struct aha152x_hostdata *hd;
 
-	for(i=0; i<ARRAY_SIZE(setup); i++) {
-		aha152x_release(aha152x_host[i]);
-		aha152x_host[i]=NULL;
+	list_for_each_entry(hd, &aha152x_host_list, host_list) {
+		struct Scsi_Host *shost = container_of((void *)hd, struct Scsi_Host, hostdata);
+
+		aha152x_release(shost);
 	}
 }
 
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 998999c..c7eeace 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -321,7 +321,7 @@
 MODULE_VERSION(AIC79XX_DRIVER_VERSION);
 module_param(aic79xx, charp, 0444);
 MODULE_PARM_DESC(aic79xx,
-"period delimited, options string.\n"
+"period-delimited options string:\n"
 "	verbose			Enable verbose/diagnostic logging\n"
 "	allow_memio		Allow device registers to be memory mapped\n"
 "	debug			Bitmask of debug values to enable\n"
@@ -346,7 +346,7 @@
 "		Shorten the selection timeout to 128ms\n"
 "\n"
 "	options aic79xx 'aic79xx=verbose.tag_info:{{}.{}.{..10}}.seltime:1'\n"
-"\n");
+);
 
 static void ahd_linux_handle_scsi_status(struct ahd_softc *,
 					 struct scsi_device *,
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index aa4be8a..64c8b88 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -341,7 +341,7 @@
 MODULE_VERSION(AIC7XXX_DRIVER_VERSION);
 module_param(aic7xxx, charp, 0444);
 MODULE_PARM_DESC(aic7xxx,
-"period delimited, options string.\n"
+"period-delimited options string:\n"
 "	verbose			Enable verbose/diagnostic logging\n"
 "	allow_memio		Allow device registers to be memory mapped\n"
 "	debug			Bitmask of debug values to enable\n"
@@ -2539,15 +2539,28 @@
 static void ahc_linux_get_signalling(struct Scsi_Host *shost)
 {
 	struct ahc_softc *ahc = *(struct ahc_softc **)shost->hostdata;
-	u8 mode = ahc_inb(ahc, SBLKCTL);
+	unsigned long flags;
+	u8 mode;
 
-	if (mode & ENAB40)
-		spi_signalling(shost) = SPI_SIGNAL_LVD;
-	else if (mode & ENAB20)
+	if (!(ahc->features & AHC_ULTRA2)) {
+		/* non-LVD chipset, may not have SBLKCTL reg */
 		spi_signalling(shost) = 
 			ahc->features & AHC_HVD ?
 			SPI_SIGNAL_HVD :
 			SPI_SIGNAL_SE;
+		return;
+	}
+
+	ahc_lock(ahc, &flags);
+	ahc_pause(ahc);
+	mode = ahc_inb(ahc, SBLKCTL);
+	ahc_unpause(ahc);
+	ahc_unlock(ahc, &flags);
+
+	if (mode & ENAB40)
+		spi_signalling(shost) = SPI_SIGNAL_LVD;
+	else if (mode & ENAB20)
+		spi_signalling(shost) = SPI_SIGNAL_SE;
 	else
 		spi_signalling(shost) = SPI_SIGNAL_UNKNOWN;
 }
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 3f85b5e..5dcef48 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -249,8 +249,6 @@
 #include <linux/stat.h>
 #include <linux/slab.h>        /* for kmalloc() */
 
-#include <linux/config.h>        /* for CONFIG_PCI */
-
 #define AIC7XXX_C_VERSION  "5.2.6"
 
 #define ALL_TARGETS -1
@@ -9196,7 +9194,7 @@
     for (i = 0; i < ARRAY_SIZE(aic_pdevs); i++)
     {
       pdev = NULL;
-      while ((pdev = pci_find_device(aic_pdevs[i].vendor_id,
+      while ((pdev = pci_get_device(aic_pdevs[i].vendor_id,
                                      aic_pdevs[i].device_id,
                                      pdev))) {
 	if (pci_enable_device(pdev))
@@ -9653,6 +9651,9 @@
            */
           aic7xxx_configure_bugs(temp_p);
 
+          /* Hold a pci device reference */
+          pci_dev_get(temp_p->pdev);
+
           if ( list_p == NULL )
           {
             list_p = current_p = temp_p;
@@ -10989,8 +10990,10 @@
   if(!p->pdev)
     release_region(p->base, MAXREG - MINREG);
 #ifdef CONFIG_PCI
-  else
+  else {
     pci_release_regions(p->pdev);
+    pci_dev_put(p->pdev);
+  }
 #endif
   prev = NULL;
   next = first_aic7xxx;
diff --git a/drivers/scsi/aic94xx/Kconfig b/drivers/scsi/aic94xx/Kconfig
new file mode 100644
index 0000000..0ed391d
--- /dev/null
+++ b/drivers/scsi/aic94xx/Kconfig
@@ -0,0 +1,41 @@
+#
+# Kernel configuration file for aic94xx SAS/SATA driver.
+#
+# Copyright (c) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (c) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This file is part of the aic94xx driver.
+#
+# The aic94xx driver is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# The aic94xx driver is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Aic94xx Driver; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+#
+
+config SCSI_AIC94XX
+	tristate "Adaptec AIC94xx SAS/SATA support"
+	depends on PCI
+	select SCSI_SAS_LIBSAS
+	help
+		This driver supports Adaptec's SAS/SATA 3Gb/s 64 bit PCI-X
+		AIC94xx chip based host adapters.
+
+config AIC94XX_DEBUG
+	bool "Compile in debug mode"
+	default y
+	depends on SCSI_AIC94XX
+	help
+		Compiles the aic94xx driver in debug mode.  In debug mode,
+		the driver prints some messages to the console.
diff --git a/drivers/scsi/aic94xx/Makefile b/drivers/scsi/aic94xx/Makefile
new file mode 100644
index 0000000..e6b7012
--- /dev/null
+++ b/drivers/scsi/aic94xx/Makefile
@@ -0,0 +1,39 @@
+#
+# Makefile for Adaptec aic94xx SAS/SATA driver.
+#
+# Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This file is part of the the aic94xx driver.
+#
+# The aic94xx driver is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# The aic94xx driver is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the aic94xx driver; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+ifeq ($(CONFIG_AIC94XX_DEBUG),y)
+	EXTRA_CFLAGS += -DASD_DEBUG -DASD_ENTER_EXIT
+endif
+
+obj-$(CONFIG_SCSI_AIC94XX) += aic94xx.o
+aic94xx-y += aic94xx_init.o \
+	     aic94xx_hwi.o  \
+	     aic94xx_reg.o  \
+	     aic94xx_sds.o  \
+	     aic94xx_seq.o  \
+	     aic94xx_dump.o \
+	     aic94xx_scb.o  \
+	     aic94xx_dev.o  \
+	     aic94xx_tmf.o  \
+	     aic94xx_task.o
diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h
new file mode 100644
index 0000000..1bd5b4e
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx.h
@@ -0,0 +1,114 @@
+/*
+ * Aic94xx SAS/SATA driver header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * $Id: //depot/aic94xx/aic94xx.h#31 $
+ */
+
+#ifndef _AIC94XX_H_
+#define _AIC94XX_H_
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <scsi/libsas.h>
+
+#define ASD_DRIVER_NAME		"aic94xx"
+#define ASD_DRIVER_DESCRIPTION	"Adaptec aic94xx SAS/SATA driver"
+
+#define asd_printk(fmt, ...)	printk(KERN_NOTICE ASD_DRIVER_NAME ": " fmt, ## __VA_ARGS__)
+
+#ifdef ASD_ENTER_EXIT
+#define ENTER  printk(KERN_NOTICE "%s: ENTER %s\n", ASD_DRIVER_NAME, \
+		__FUNCTION__)
+#define EXIT   printk(KERN_NOTICE "%s: --EXIT %s\n", ASD_DRIVER_NAME, \
+		__FUNCTION__)
+#else
+#define ENTER
+#define EXIT
+#endif
+
+#ifdef ASD_DEBUG
+#define ASD_DPRINTK asd_printk
+#else
+#define ASD_DPRINTK(fmt, ...)
+#endif
+
+/* 2*ITNL timeout + 1 second */
+#define AIC94XX_SCB_TIMEOUT  (5*HZ)
+
+extern kmem_cache_t *asd_dma_token_cache;
+extern kmem_cache_t *asd_ascb_cache;
+extern char sas_addr_str[2*SAS_ADDR_SIZE + 1];
+
+static inline void asd_stringify_sas_addr(char *p, const u8 *sas_addr)
+{
+	int i;
+	for (i = 0; i < SAS_ADDR_SIZE; i++, p += 2)
+		snprintf(p, 3, "%02X", sas_addr[i]);
+	*p = '\0';
+}
+
+static inline void asd_destringify_sas_addr(u8 *sas_addr, const char *p)
+{
+	int i;
+	for (i = 0; i < SAS_ADDR_SIZE; i++) {
+		u8 h, l;
+		if (!*p)
+			break;
+		h = isdigit(*p) ? *p-'0' : *p-'A'+10;
+		p++;
+		l = isdigit(*p) ? *p-'0' : *p-'A'+10;
+		p++;
+		sas_addr[i] = (h<<4) | l;
+	}
+}
+
+struct asd_ha_struct;
+struct asd_ascb;
+
+int  asd_read_ocm(struct asd_ha_struct *asd_ha);
+int  asd_read_flash(struct asd_ha_struct *asd_ha);
+
+int  asd_dev_found(struct domain_device *dev);
+void asd_dev_gone(struct domain_device *dev);
+
+void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id);
+
+int  asd_execute_task(struct sas_task *, int num, unsigned long gfp_flags);
+
+/* ---------- TMFs ---------- */
+int  asd_abort_task(struct sas_task *);
+int  asd_abort_task_set(struct domain_device *, u8 *lun);
+int  asd_clear_aca(struct domain_device *, u8 *lun);
+int  asd_clear_task_set(struct domain_device *, u8 *lun);
+int  asd_lu_reset(struct domain_device *, u8 *lun);
+int  asd_query_task(struct sas_task *);
+
+/* ---------- Adapter and Port management ---------- */
+int  asd_clear_nexus_port(struct asd_sas_port *port);
+int  asd_clear_nexus_ha(struct sas_ha_struct *sas_ha);
+
+/* ---------- Phy Management ---------- */
+int  asd_control_phy(struct asd_sas_phy *phy, enum phy_func func, void *arg);
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_dev.c b/drivers/scsi/aic94xx/aic94xx_dev.c
new file mode 100644
index 0000000..6f8901b
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_dev.c
@@ -0,0 +1,353 @@
+/*
+ * Aic94xx SAS/SATA DDB management
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * $Id: //depot/aic94xx/aic94xx_dev.c#21 $
+ */
+
+#include "aic94xx.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_sas.h"
+
+#define FIND_FREE_DDB(_ha) find_first_zero_bit((_ha)->hw_prof.ddb_bitmap, \
+					       (_ha)->hw_prof.max_ddbs)
+#define SET_DDB(_ddb, _ha) set_bit(_ddb, (_ha)->hw_prof.ddb_bitmap)
+#define CLEAR_DDB(_ddb, _ha) clear_bit(_ddb, (_ha)->hw_prof.ddb_bitmap)
+
+static inline int asd_get_ddb(struct asd_ha_struct *asd_ha)
+{
+	unsigned long flags;
+	int ddb, i;
+
+	spin_lock_irqsave(&asd_ha->hw_prof.ddb_lock, flags);
+	ddb = FIND_FREE_DDB(asd_ha);
+	if (ddb >= asd_ha->hw_prof.max_ddbs) {
+		ddb = -ENOMEM;
+		spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags);
+		goto out;
+	}
+	SET_DDB(ddb, asd_ha);
+	spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags);
+
+	for (i = 0; i < sizeof(struct asd_ddb_ssp_smp_target_port); i+= 4)
+		asd_ddbsite_write_dword(asd_ha, ddb, i, 0);
+out:
+	return ddb;
+}
+
+#define INIT_CONN_TAG   offsetof(struct asd_ddb_ssp_smp_target_port, init_conn_tag)
+#define DEST_SAS_ADDR   offsetof(struct asd_ddb_ssp_smp_target_port, dest_sas_addr)
+#define SEND_QUEUE_HEAD offsetof(struct asd_ddb_ssp_smp_target_port, send_queue_head)
+#define DDB_TYPE        offsetof(struct asd_ddb_ssp_smp_target_port, ddb_type)
+#define CONN_MASK       offsetof(struct asd_ddb_ssp_smp_target_port, conn_mask)
+#define DDB_TARG_FLAGS  offsetof(struct asd_ddb_ssp_smp_target_port, flags)
+#define DDB_TARG_FLAGS2 offsetof(struct asd_ddb_stp_sata_target_port, flags2)
+#define EXEC_QUEUE_TAIL offsetof(struct asd_ddb_ssp_smp_target_port, exec_queue_tail)
+#define SEND_QUEUE_TAIL offsetof(struct asd_ddb_ssp_smp_target_port, send_queue_tail)
+#define SISTER_DDB      offsetof(struct asd_ddb_ssp_smp_target_port, sister_ddb)
+#define MAX_CCONN       offsetof(struct asd_ddb_ssp_smp_target_port, max_concurrent_conn)
+#define NUM_CTX         offsetof(struct asd_ddb_ssp_smp_target_port, num_contexts)
+#define ATA_CMD_SCBPTR  offsetof(struct asd_ddb_stp_sata_target_port, ata_cmd_scbptr)
+#define SATA_TAG_ALLOC_MASK offsetof(struct asd_ddb_stp_sata_target_port, sata_tag_alloc_mask)
+#define NUM_SATA_TAGS   offsetof(struct asd_ddb_stp_sata_target_port, num_sata_tags)
+#define SATA_STATUS     offsetof(struct asd_ddb_stp_sata_target_port, sata_status)
+#define NCQ_DATA_SCB_PTR offsetof(struct asd_ddb_stp_sata_target_port, ncq_data_scb_ptr)
+#define ITNL_TIMEOUT    offsetof(struct asd_ddb_ssp_smp_target_port, itnl_timeout)
+
+static inline void asd_free_ddb(struct asd_ha_struct *asd_ha, int ddb)
+{
+	unsigned long flags;
+
+	if (!ddb || ddb >= 0xFFFF)
+		return;
+	asd_ddbsite_write_byte(asd_ha, ddb, DDB_TYPE, DDB_TYPE_UNUSED);
+	spin_lock_irqsave(&asd_ha->hw_prof.ddb_lock, flags);
+	CLEAR_DDB(ddb, asd_ha);
+	spin_unlock_irqrestore(&asd_ha->hw_prof.ddb_lock, flags);
+}
+
+static inline void asd_set_ddb_type(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb = (int) (unsigned long) dev->lldd_dev;
+
+	if (dev->dev_type == SATA_PM_PORT)
+		asd_ddbsite_write_byte(asd_ha,ddb, DDB_TYPE, DDB_TYPE_PM_PORT);
+	else if (dev->tproto)
+		asd_ddbsite_write_byte(asd_ha,ddb, DDB_TYPE, DDB_TYPE_TARGET);
+	else
+		asd_ddbsite_write_byte(asd_ha,ddb,DDB_TYPE,DDB_TYPE_INITIATOR);
+}
+
+static int asd_init_sata_tag_ddb(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb, i;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	for (i = 0; i < sizeof(struct asd_ddb_sata_tag); i += 2)
+		asd_ddbsite_write_word(asd_ha, ddb, i, 0xFFFF);
+
+	asd_ddbsite_write_word(asd_ha, (int) (unsigned long) dev->lldd_dev,
+			       SISTER_DDB, ddb);
+	return 0;
+}
+
+static inline int asd_init_sata(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb = (int) (unsigned long) dev->lldd_dev;
+	u32 qdepth = 0;
+	int res = 0;
+
+	asd_ddbsite_write_word(asd_ha, ddb, ATA_CMD_SCBPTR, 0xFFFF);
+	if ((dev->dev_type == SATA_DEV || dev->dev_type == SATA_PM_PORT) &&
+	    dev->sata_dev.identify_device &&
+	    dev->sata_dev.identify_device[10] != 0) {
+		u16 w75 = le16_to_cpu(dev->sata_dev.identify_device[75]);
+		u16 w76 = le16_to_cpu(dev->sata_dev.identify_device[76]);
+
+		if (w76 & 0x100) /* NCQ? */
+			qdepth = (w75 & 0x1F) + 1;
+		asd_ddbsite_write_dword(asd_ha, ddb, SATA_TAG_ALLOC_MASK,
+					(1<<qdepth)-1);
+		asd_ddbsite_write_byte(asd_ha, ddb, NUM_SATA_TAGS, qdepth);
+	}
+	if (dev->dev_type == SATA_DEV || dev->dev_type == SATA_PM ||
+	    dev->dev_type == SATA_PM_PORT) {
+		struct dev_to_host_fis *fis = (struct dev_to_host_fis *)
+			dev->frame_rcvd;
+		asd_ddbsite_write_byte(asd_ha, ddb, SATA_STATUS, fis->status);
+	}
+	asd_ddbsite_write_word(asd_ha, ddb, NCQ_DATA_SCB_PTR, 0xFFFF);
+	if (qdepth > 0)
+		res = asd_init_sata_tag_ddb(dev);
+	return res;
+}
+
+static int asd_init_target_ddb(struct domain_device *dev)
+{
+	int ddb, i;
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	u8 flags = 0;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	dev->lldd_dev = (void *) (unsigned long) ddb;
+
+	asd_ddbsite_write_byte(asd_ha, ddb, 0, DDB_TP_CONN_TYPE);
+	asd_ddbsite_write_byte(asd_ha, ddb, 1, 0);
+	asd_ddbsite_write_word(asd_ha, ddb, INIT_CONN_TAG, 0xFFFF);
+	for (i = 0; i < SAS_ADDR_SIZE; i++)
+		asd_ddbsite_write_byte(asd_ha, ddb, DEST_SAS_ADDR+i,
+				       dev->sas_addr[i]);
+	asd_ddbsite_write_word(asd_ha, ddb, SEND_QUEUE_HEAD, 0xFFFF);
+	asd_set_ddb_type(dev);
+	asd_ddbsite_write_byte(asd_ha, ddb, CONN_MASK, dev->port->phy_mask);
+	if (dev->port->oob_mode != SATA_OOB_MODE) {
+		flags |= OPEN_REQUIRED;
+		if ((dev->dev_type == SATA_DEV) ||
+		    (dev->tproto & SAS_PROTO_STP)) {
+			struct smp_resp *rps_resp = &dev->sata_dev.rps_resp;
+			if (rps_resp->frame_type == SMP_RESPONSE &&
+			    rps_resp->function == SMP_REPORT_PHY_SATA &&
+			    rps_resp->result == SMP_RESP_FUNC_ACC) {
+				if (rps_resp->rps.affil_valid)
+					flags |= STP_AFFIL_POL;
+				if (rps_resp->rps.affil_supp)
+					flags |= SUPPORTS_AFFIL;
+			}
+		} else {
+			flags |= CONCURRENT_CONN_SUPP;
+			if (!dev->parent &&
+			    (dev->dev_type == EDGE_DEV ||
+			     dev->dev_type == FANOUT_DEV))
+				asd_ddbsite_write_byte(asd_ha, ddb, MAX_CCONN,
+						       4);
+			else
+				asd_ddbsite_write_byte(asd_ha, ddb, MAX_CCONN,
+						       dev->pathways);
+			asd_ddbsite_write_byte(asd_ha, ddb, NUM_CTX, 1);
+		}
+	}
+	if (dev->dev_type == SATA_PM)
+		flags |= SATA_MULTIPORT;
+	asd_ddbsite_write_byte(asd_ha, ddb, DDB_TARG_FLAGS, flags);
+
+	flags = 0;
+	if (dev->tproto & SAS_PROTO_STP)
+		flags |= STP_CL_POL_NO_TX;
+	asd_ddbsite_write_byte(asd_ha, ddb, DDB_TARG_FLAGS2, flags);
+
+	asd_ddbsite_write_word(asd_ha, ddb, EXEC_QUEUE_TAIL, 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, ddb, SEND_QUEUE_TAIL, 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, ddb, SISTER_DDB, 0xFFFF);
+
+	if (dev->dev_type == SATA_DEV || (dev->tproto & SAS_PROTO_STP)) {
+		i = asd_init_sata(dev);
+		if (i < 0) {
+			asd_free_ddb(asd_ha, ddb);
+			return i;
+		}
+	}
+
+	if (dev->dev_type == SAS_END_DEV) {
+		struct sas_end_device *rdev = rphy_to_end_device(dev->rphy);
+		if (rdev->I_T_nexus_loss_timeout > 0)
+			asd_ddbsite_write_word(asd_ha, ddb, ITNL_TIMEOUT,
+					       min(rdev->I_T_nexus_loss_timeout,
+						   (u16)ITNL_TIMEOUT_CONST));
+		else
+			asd_ddbsite_write_word(asd_ha, ddb, ITNL_TIMEOUT,
+					       (u16)ITNL_TIMEOUT_CONST);
+	}
+	return 0;
+}
+
+static int asd_init_sata_pm_table_ddb(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	int ddb, i;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	for (i = 0; i < 32; i += 2)
+		asd_ddbsite_write_word(asd_ha, ddb, i, 0xFFFF);
+
+	asd_ddbsite_write_word(asd_ha, (int) (unsigned long) dev->lldd_dev,
+			       SISTER_DDB, ddb);
+
+	return 0;
+}
+
+#define PM_PORT_FLAGS offsetof(struct asd_ddb_sata_pm_port, pm_port_flags)
+#define PARENT_DDB    offsetof(struct asd_ddb_sata_pm_port, parent_ddb)
+
+/**
+ * asd_init_sata_pm_port_ddb -- SATA Port Multiplier Port
+ * dev: pointer to domain device
+ *
+ * For SATA Port Multiplier Ports we need to allocate one SATA Port
+ * Multiplier Port DDB and depending on whether the target on it
+ * supports SATA II NCQ, one SATA Tag DDB.
+ */
+static int asd_init_sata_pm_port_ddb(struct domain_device *dev)
+{
+	int ddb, i, parent_ddb, pmtable_ddb;
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	u8  flags;
+
+	ddb = asd_get_ddb(asd_ha);
+	if (ddb < 0)
+		return ddb;
+
+	asd_set_ddb_type(dev);
+	flags = (dev->sata_dev.port_no << 4) | PM_PORT_SET;
+	asd_ddbsite_write_byte(asd_ha, ddb, PM_PORT_FLAGS, flags);
+	asd_ddbsite_write_word(asd_ha, ddb, SISTER_DDB, 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, ddb, ATA_CMD_SCBPTR, 0xFFFF);
+	asd_init_sata(dev);
+
+	parent_ddb = (int) (unsigned long) dev->parent->lldd_dev;
+	asd_ddbsite_write_word(asd_ha, ddb, PARENT_DDB, parent_ddb);
+	pmtable_ddb = asd_ddbsite_read_word(asd_ha, parent_ddb, SISTER_DDB);
+	asd_ddbsite_write_word(asd_ha, pmtable_ddb, dev->sata_dev.port_no,ddb);
+
+	if (asd_ddbsite_read_byte(asd_ha, ddb, NUM_SATA_TAGS) > 0) {
+		i = asd_init_sata_tag_ddb(dev);
+		if (i < 0) {
+			asd_free_ddb(asd_ha, ddb);
+			return i;
+		}
+	}
+	return 0;
+}
+
+static int asd_init_initiator_ddb(struct domain_device *dev)
+{
+	return -ENODEV;
+}
+
+/**
+ * asd_init_sata_pm_ddb -- SATA Port Multiplier
+ * dev: pointer to domain device
+ *
+ * For STP and direct-attached SATA Port Multipliers we need
+ * one target port DDB entry and one SATA PM table DDB entry.
+ */
+static int asd_init_sata_pm_ddb(struct domain_device *dev)
+{
+	int res = 0;
+
+	res = asd_init_target_ddb(dev);
+	if (res)
+		goto out;
+	res = asd_init_sata_pm_table_ddb(dev);
+	if (res)
+		asd_free_ddb(dev->port->ha->lldd_ha,
+			     (int) (unsigned long) dev->lldd_dev);
+out:
+	return res;
+}
+
+int asd_dev_found(struct domain_device *dev)
+{
+	int res = 0;
+
+	switch (dev->dev_type) {
+	case SATA_PM:
+		res = asd_init_sata_pm_ddb(dev);
+		break;
+	case SATA_PM_PORT:
+		res = asd_init_sata_pm_port_ddb(dev);
+		break;
+	default:
+		if (dev->tproto)
+			res = asd_init_target_ddb(dev);
+		else
+			res = asd_init_initiator_ddb(dev);
+	}
+	return res;
+}
+
+void asd_dev_gone(struct domain_device *dev)
+{
+	int ddb, sister_ddb;
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+
+	ddb = (int) (unsigned long) dev->lldd_dev;
+	sister_ddb = asd_ddbsite_read_word(asd_ha, ddb, SISTER_DDB);
+
+	if (sister_ddb != 0xFFFF)
+		asd_free_ddb(asd_ha, sister_ddb);
+	asd_free_ddb(asd_ha, ddb);
+	dev->lldd_dev = NULL;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_dump.c b/drivers/scsi/aic94xx/aic94xx_dump.c
new file mode 100644
index 0000000..e6ade59
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_dump.c
@@ -0,0 +1,959 @@
+/*
+ * Aic94xx SAS/SATA driver dump interface.
+ *
+ * Copyright (C) 2004 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2004 David Chaw <david_chaw@adaptec.com>
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * 2005/07/14/LT  Complete overhaul of this file.  Update pages, register
+ * locations, names, etc.  Make use of macros.  Print more information.
+ * Print all cseq and lseq mip and mdp.
+ *
+ */
+
+#include "linux/pci.h"
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_reg_def.h"
+#include "aic94xx_sas.h"
+
+#include "aic94xx_dump.h"
+
+#ifdef ASD_DEBUG
+
+#define MD(x)	    (1 << (x))
+#define MODE_COMMON (1 << 31)
+#define MODE_0_7    (0xFF)
+
+static const struct lseq_cio_regs {
+	char	*name;
+	u32	offs;
+	u8	width;
+	u32	mode;
+} LSEQmCIOREGS[] = {
+	{"LmMnSCBPTR",    0x20, 16, MD(0)|MD(1)|MD(2)|MD(3)|MD(4) },
+	{"LmMnDDBPTR",    0x22, 16, MD(0)|MD(1)|MD(2)|MD(3)|MD(4) },
+	{"LmREQMBX",      0x30, 32, MODE_COMMON },
+	{"LmRSPMBX",      0x34, 32, MODE_COMMON },
+	{"LmMnINT",       0x38, 32, MODE_0_7 },
+	{"LmMnINTEN",     0x3C, 32, MODE_0_7 },
+	{"LmXMTPRIMD",    0x40, 32, MODE_COMMON },
+	{"LmXMTPRIMCS",   0x44,  8, MODE_COMMON },
+	{"LmCONSTAT",     0x45,  8, MODE_COMMON },
+	{"LmMnDMAERRS",   0x46,  8, MD(0)|MD(1) },
+	{"LmMnSGDMAERRS", 0x47,  8, MD(0)|MD(1) },
+	{"LmMnEXPHDRP",   0x48,  8, MD(0) },
+	{"LmMnSASAALIGN", 0x48,  8, MD(1) },
+	{"LmMnMSKHDRP",   0x49,  8, MD(0) },
+	{"LmMnSTPALIGN",  0x49,  8, MD(1) },
+	{"LmMnRCVHDRP",   0x4A,  8, MD(0) },
+	{"LmMnXMTHDRP",   0x4A,  8, MD(1) },
+	{"LmALIGNMODE",   0x4B,  8, MD(1) },
+	{"LmMnEXPRCVCNT", 0x4C, 32, MD(0) },
+	{"LmMnXMTCNT",    0x4C, 32, MD(1) },
+	{"LmMnCURRTAG",   0x54, 16, MD(0) },
+	{"LmMnPREVTAG",   0x56, 16, MD(0) },
+	{"LmMnACKOFS",    0x58,  8, MD(1) },
+	{"LmMnXFRLVL",    0x59,  8, MD(0)|MD(1) },
+	{"LmMnSGDMACTL",  0x5A,  8, MD(0)|MD(1) },
+	{"LmMnSGDMASTAT", 0x5B,  8, MD(0)|MD(1) },
+	{"LmMnDDMACTL",   0x5C,  8, MD(0)|MD(1) },
+	{"LmMnDDMASTAT",  0x5D,  8, MD(0)|MD(1) },
+	{"LmMnDDMAMODE",  0x5E, 16, MD(0)|MD(1) },
+	{"LmMnPIPECTL",   0x61,  8, MD(0)|MD(1) },
+	{"LmMnACTSCB",    0x62, 16, MD(0)|MD(1) },
+	{"LmMnSGBHADR",   0x64,  8, MD(0)|MD(1) },
+	{"LmMnSGBADR",    0x65,  8, MD(0)|MD(1) },
+	{"LmMnSGDCNT",    0x66,  8, MD(0)|MD(1) },
+	{"LmMnSGDMADR",   0x68, 32, MD(0)|MD(1) },
+	{"LmMnSGDMADR",   0x6C, 32, MD(0)|MD(1) },
+	{"LmMnXFRCNT",    0x70, 32, MD(0)|MD(1) },
+	{"LmMnXMTCRC",    0x74, 32, MD(1) },
+	{"LmCURRTAG",     0x74, 16, MD(0) },
+	{"LmPREVTAG",     0x76, 16, MD(0) },
+	{"LmMnDPSEL",     0x7B,  8, MD(0)|MD(1) },
+	{"LmDPTHSTAT",    0x7C,  8, MODE_COMMON },
+	{"LmMnHOLDLVL",   0x7D,  8, MD(0) },
+	{"LmMnSATAFS",    0x7E,  8, MD(1) },
+	{"LmMnCMPLTSTAT", 0x7F,  8, MD(0)|MD(1) },
+	{"LmPRMSTAT0",    0x80, 32, MODE_COMMON },
+	{"LmPRMSTAT1",    0x84, 32, MODE_COMMON },
+	{"LmGPRMINT",     0x88,  8, MODE_COMMON },
+        {"LmMnCURRSCB",   0x8A, 16, MD(0) },
+	{"LmPRMICODE",    0x8C, 32, MODE_COMMON },
+	{"LmMnRCVCNT",    0x90, 16, MD(0) },
+	{"LmMnBUFSTAT",   0x92, 16, MD(0) },
+	{"LmMnXMTHDRSIZE",0x92,  8, MD(1) },
+	{"LmMnXMTSIZE",   0x93,  8, MD(1) },
+	{"LmMnTGTXFRCNT", 0x94, 32, MD(0) },
+	{"LmMnEXPROFS",   0x98, 32, MD(0) },
+	{"LmMnXMTROFS",   0x98, 32, MD(1) },
+	{"LmMnRCVROFS",   0x9C, 32, MD(0) },
+	{"LmCONCTL",      0xA0, 16, MODE_COMMON },
+	{"LmBITLTIMER",   0xA2, 16, MODE_COMMON },
+	{"LmWWNLOW",      0xA8, 32, MODE_COMMON },
+	{"LmWWNHIGH",     0xAC, 32, MODE_COMMON },
+	{"LmMnFRMERR",    0xB0, 32, MD(0) },
+	{"LmMnFRMERREN",  0xB4, 32, MD(0) },
+	{"LmAWTIMER",     0xB8, 16, MODE_COMMON },
+	{"LmAWTCTL",      0xBA,  8, MODE_COMMON },
+	{"LmMnHDRCMPS",   0xC0, 32, MD(0) },
+	{"LmMnXMTSTAT",   0xC4,  8, MD(1) },
+	{"LmHWTSTATEN",   0xC5,  8, MODE_COMMON },
+	{"LmMnRRDYRC",    0xC6,  8, MD(0) },
+        {"LmMnRRDYTC",    0xC6,  8, MD(1) },
+	{"LmHWTSTAT",     0xC7,  8, MODE_COMMON },
+	{"LmMnDATABUFADR",0xC8, 16, MD(0)|MD(1) },
+	{"LmDWSSTATUS",   0xCB,  8, MODE_COMMON },
+	{"LmMnACTSTAT",   0xCE, 16, MD(0)|MD(1) },
+	{"LmMnREQSCB",    0xD2, 16, MD(0)|MD(1) },
+	{"LmXXXPRIM",     0xD4, 32, MODE_COMMON },
+	{"LmRCVASTAT",    0xD9,  8, MODE_COMMON },
+	{"LmINTDIS1",     0xDA,  8, MODE_COMMON },
+	{"LmPSTORESEL",   0xDB,  8, MODE_COMMON },
+	{"LmPSTORE",      0xDC, 32, MODE_COMMON },
+	{"LmPRIMSTAT0EN", 0xE0, 32, MODE_COMMON },
+	{"LmPRIMSTAT1EN", 0xE4, 32, MODE_COMMON },
+	{"LmDONETCTL",    0xF2, 16, MODE_COMMON },
+	{NULL, 0, 0, 0 }
+};
+/*
+static struct lseq_cio_regs LSEQmOOBREGS[] = {
+   {"OOB_BFLTR"        ,0x100, 8, MD(5)},
+   {"OOB_INIT_MIN"     ,0x102,16, MD(5)},
+   {"OOB_INIT_MAX"     ,0x104,16, MD(5)},
+   {"OOB_INIT_NEG"     ,0x106,16, MD(5)},
+   {"OOB_SAS_MIN"      ,0x108,16, MD(5)},
+   {"OOB_SAS_MAX"      ,0x10A,16, MD(5)},
+   {"OOB_SAS_NEG"      ,0x10C,16, MD(5)},
+   {"OOB_WAKE_MIN"     ,0x10E,16, MD(5)},
+   {"OOB_WAKE_MAX"     ,0x110,16, MD(5)},
+   {"OOB_WAKE_NEG"     ,0x112,16, MD(5)},
+   {"OOB_IDLE_MAX"     ,0x114,16, MD(5)},
+   {"OOB_BURST_MAX"    ,0x116,16, MD(5)},
+   {"OOB_XMIT_BURST"   ,0x118, 8, MD(5)},
+   {"OOB_SEND_PAIRS"   ,0x119, 8, MD(5)},
+   {"OOB_INIT_IDLE"    ,0x11A, 8, MD(5)},
+   {"OOB_INIT_NEGO"    ,0x11C, 8, MD(5)},
+   {"OOB_SAS_IDLE"     ,0x11E, 8, MD(5)},
+   {"OOB_SAS_NEGO"     ,0x120, 8, MD(5)},
+   {"OOB_WAKE_IDLE"    ,0x122, 8, MD(5)},
+   {"OOB_WAKE_NEGO"    ,0x124, 8, MD(5)},
+   {"OOB_DATA_KBITS"   ,0x126, 8, MD(5)},
+   {"OOB_BURST_DATA"   ,0x128,32, MD(5)},
+   {"OOB_ALIGN_0_DATA" ,0x12C,32, MD(5)},
+   {"OOB_ALIGN_1_DATA" ,0x130,32, MD(5)},
+   {"OOB_SYNC_DATA"    ,0x134,32, MD(5)},
+   {"OOB_D10_2_DATA"   ,0x138,32, MD(5)},
+   {"OOB_PHY_RST_CNT"  ,0x13C,32, MD(5)},
+   {"OOB_SIG_GEN"      ,0x140, 8, MD(5)},
+   {"OOB_XMIT"         ,0x141, 8, MD(5)},
+   {"FUNCTION_MAKS"    ,0x142, 8, MD(5)},
+   {"OOB_MODE"         ,0x143, 8, MD(5)},
+   {"CURRENT_STATUS"   ,0x144, 8, MD(5)},
+   {"SPEED_MASK"       ,0x145, 8, MD(5)},
+   {"PRIM_COUNT"       ,0x146, 8, MD(5)},
+   {"OOB_SIGNALS"      ,0x148, 8, MD(5)},
+   {"OOB_DATA_DET"     ,0x149, 8, MD(5)},
+   {"OOB_TIME_OUT"     ,0x14C, 8, MD(5)},
+   {"OOB_TIMER_ENABLE" ,0x14D, 8, MD(5)},
+   {"OOB_STATUS"       ,0x14E, 8, MD(5)},
+   {"HOT_PLUG_DELAY"   ,0x150, 8, MD(5)},
+   {"RCD_DELAY"        ,0x151, 8, MD(5)},
+   {"COMSAS_TIMER"     ,0x152, 8, MD(5)},
+   {"SNTT_DELAY"       ,0x153, 8, MD(5)},
+   {"SPD_CHNG_DELAY"   ,0x154, 8, MD(5)},
+   {"SNLT_DELAY"       ,0x155, 8, MD(5)},
+   {"SNWT_DELAY"       ,0x156, 8, MD(5)},
+   {"ALIGN_DELAY"      ,0x157, 8, MD(5)},
+   {"INT_ENABLE_0"     ,0x158, 8, MD(5)},
+   {"INT_ENABLE_1"     ,0x159, 8, MD(5)},
+   {"INT_ENABLE_2"     ,0x15A, 8, MD(5)},
+   {"INT_ENABLE_3"     ,0x15B, 8, MD(5)},
+   {"OOB_TEST_REG"     ,0x15C, 8, MD(5)},
+   {"PHY_CONTROL_0"    ,0x160, 8, MD(5)},
+   {"PHY_CONTROL_1"    ,0x161, 8, MD(5)},
+   {"PHY_CONTROL_2"    ,0x162, 8, MD(5)},
+   {"PHY_CONTROL_3"    ,0x163, 8, MD(5)},
+   {"PHY_OOB_CAL_TX"   ,0x164, 8, MD(5)},
+   {"PHY_OOB_CAL_RX"   ,0x165, 8, MD(5)},
+   {"OOB_PHY_CAL_TX"   ,0x166, 8, MD(5)},
+   {"OOB_PHY_CAL_RX"   ,0x167, 8, MD(5)},
+   {"PHY_CONTROL_4"    ,0x168, 8, MD(5)},
+   {"PHY_TEST"         ,0x169, 8, MD(5)},
+   {"PHY_PWR_CTL"      ,0x16A, 8, MD(5)},
+   {"PHY_PWR_DELAY"    ,0x16B, 8, MD(5)},
+   {"OOB_SM_CON"       ,0x16C, 8, MD(5)},
+   {"ADDR_TRAP_1"      ,0x16D, 8, MD(5)},
+   {"ADDR_NEXT_1"      ,0x16E, 8, MD(5)},
+   {"NEXT_ST_1"        ,0x16F, 8, MD(5)},
+   {"OOB_SM_STATE"     ,0x170, 8, MD(5)},
+   {"ADDR_TRAP_2"      ,0x171, 8, MD(5)},
+   {"ADDR_NEXT_2"      ,0x172, 8, MD(5)},
+   {"NEXT_ST_2"        ,0x173, 8, MD(5)},
+   {NULL, 0, 0, 0 }
+};
+*/
+#define STR_8BIT   "   %30s[0x%04x]:0x%02x\n"
+#define STR_16BIT  "   %30s[0x%04x]:0x%04x\n"
+#define STR_32BIT  "   %30s[0x%04x]:0x%08x\n"
+#define STR_64BIT  "   %30s[0x%04x]:0x%llx\n"
+
+#define PRINT_REG_8bit(_ha, _n, _r) asd_printk(STR_8BIT, #_n, _n,      \
+					     asd_read_reg_byte(_ha, _r))
+#define PRINT_REG_16bit(_ha, _n, _r) asd_printk(STR_16BIT, #_n, _n,     \
+					      asd_read_reg_word(_ha, _r))
+#define PRINT_REG_32bit(_ha, _n, _r) asd_printk(STR_32BIT, #_n, _n,      \
+					      asd_read_reg_dword(_ha, _r))
+
+#define PRINT_CREG_8bit(_ha, _n) asd_printk(STR_8BIT, #_n, _n,      \
+					     asd_read_reg_byte(_ha, C##_n))
+#define PRINT_CREG_16bit(_ha, _n) asd_printk(STR_16BIT, #_n, _n,     \
+					      asd_read_reg_word(_ha, C##_n))
+#define PRINT_CREG_32bit(_ha, _n) asd_printk(STR_32BIT, #_n, _n,      \
+					      asd_read_reg_dword(_ha, C##_n))
+
+#define MSTR_8BIT   "   Mode:%02d %30s[0x%04x]:0x%02x\n"
+#define MSTR_16BIT  "   Mode:%02d %30s[0x%04x]:0x%04x\n"
+#define MSTR_32BIT  "   Mode:%02d %30s[0x%04x]:0x%08x\n"
+
+#define PRINT_MREG_8bit(_ha, _m, _n, _r) asd_printk(MSTR_8BIT, _m, #_n, _n,   \
+					     asd_read_reg_byte(_ha, _r))
+#define PRINT_MREG_16bit(_ha, _m, _n, _r) asd_printk(MSTR_16BIT, _m, #_n, _n, \
+					      asd_read_reg_word(_ha, _r))
+#define PRINT_MREG_32bit(_ha, _m, _n, _r) asd_printk(MSTR_32BIT, _m, #_n, _n, \
+					      asd_read_reg_dword(_ha, _r))
+
+/* can also be used for MD when the register is mode aware already */
+#define PRINT_MIS_byte(_ha, _n) asd_printk(STR_8BIT, #_n,CSEQ_##_n-CMAPPEDSCR,\
+                                           asd_read_reg_byte(_ha, CSEQ_##_n))
+#define PRINT_MIS_word(_ha, _n) asd_printk(STR_16BIT,#_n,CSEQ_##_n-CMAPPEDSCR,\
+                                           asd_read_reg_word(_ha, CSEQ_##_n))
+#define PRINT_MIS_dword(_ha, _n)                      \
+        asd_printk(STR_32BIT,#_n,CSEQ_##_n-CMAPPEDSCR,\
+                   asd_read_reg_dword(_ha, CSEQ_##_n))
+#define PRINT_MIS_qword(_ha, _n)                                       \
+        asd_printk(STR_64BIT, #_n,CSEQ_##_n-CMAPPEDSCR,                \
+                   (unsigned long long)(((u64)asd_read_reg_dword(_ha, CSEQ_##_n))     \
+                 | (((u64)asd_read_reg_dword(_ha, (CSEQ_##_n)+4))<<32)))
+
+#define CMDP_REG(_n, _m) (_m*(CSEQ_PAGE_SIZE*2)+CSEQ_##_n)
+#define PRINT_CMDP_word(_ha, _n) \
+asd_printk("%20s 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", \
+	#_n, \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 0)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 1)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 2)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 3)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 4)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 5)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 6)), \
+	asd_read_reg_word(_ha, CMDP_REG(_n, 7)))
+
+#define PRINT_CMDP_byte(_ha, _n) \
+asd_printk("%20s 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n", \
+	#_n, \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 0)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 1)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 2)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 3)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 4)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 5)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 6)), \
+	asd_read_reg_byte(_ha, CMDP_REG(_n, 7)))
+
+static void asd_dump_cseq_state(struct asd_ha_struct *asd_ha)
+{
+	int mode;
+
+	asd_printk("CSEQ STATE\n");
+
+	asd_printk("ARP2 REGISTERS\n");
+
+	PRINT_CREG_32bit(asd_ha, ARP2CTL);
+	PRINT_CREG_32bit(asd_ha, ARP2INT);
+	PRINT_CREG_32bit(asd_ha, ARP2INTEN);
+	PRINT_CREG_8bit(asd_ha, MODEPTR);
+	PRINT_CREG_8bit(asd_ha, ALTMODE);
+	PRINT_CREG_8bit(asd_ha, FLAG);
+	PRINT_CREG_8bit(asd_ha, ARP2INTCTL);
+	PRINT_CREG_16bit(asd_ha, STACK);
+	PRINT_CREG_16bit(asd_ha, PRGMCNT);
+	PRINT_CREG_16bit(asd_ha, ACCUM);
+	PRINT_CREG_16bit(asd_ha, SINDEX);
+	PRINT_CREG_16bit(asd_ha, DINDEX);
+	PRINT_CREG_8bit(asd_ha, SINDIR);
+	PRINT_CREG_8bit(asd_ha, DINDIR);
+	PRINT_CREG_8bit(asd_ha, JUMLDIR);
+	PRINT_CREG_8bit(asd_ha, ARP2HALTCODE);
+	PRINT_CREG_16bit(asd_ha, CURRADDR);
+	PRINT_CREG_16bit(asd_ha, LASTADDR);
+	PRINT_CREG_16bit(asd_ha, NXTLADDR);
+
+	asd_printk("IOP REGISTERS\n");
+
+	PRINT_REG_32bit(asd_ha, BISTCTL1, CBISTCTL);
+	PRINT_CREG_32bit(asd_ha, MAPPEDSCR);
+
+	asd_printk("CIO REGISTERS\n");
+
+	for (mode = 0; mode < 9; mode++)
+		PRINT_MREG_16bit(asd_ha, mode, MnSCBPTR, CMnSCBPTR(mode));
+	PRINT_MREG_16bit(asd_ha, 15, MnSCBPTR, CMnSCBPTR(15));
+
+	for (mode = 0; mode < 9; mode++)
+		PRINT_MREG_16bit(asd_ha, mode, MnDDBPTR, CMnDDBPTR(mode));
+	PRINT_MREG_16bit(asd_ha, 15, MnDDBPTR, CMnDDBPTR(15));
+
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnREQMBX, CMnREQMBX(mode));
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnRSPMBX, CMnRSPMBX(mode));
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnINT, CMnINT(mode));
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_32bit(asd_ha, mode, MnINTEN, CMnINTEN(mode));
+
+	PRINT_CREG_8bit(asd_ha, SCRATCHPAGE);
+	for (mode = 0; mode < 8; mode++)
+		PRINT_MREG_8bit(asd_ha, mode, MnSCRATCHPAGE,
+				CMnSCRATCHPAGE(mode));
+
+	PRINT_REG_32bit(asd_ha, CLINKCON, CLINKCON);
+	PRINT_REG_8bit(asd_ha, CCONMSK, CCONMSK);
+	PRINT_REG_8bit(asd_ha, CCONEXIST, CCONEXIST);
+	PRINT_REG_16bit(asd_ha, CCONMODE, CCONMODE);
+	PRINT_REG_32bit(asd_ha, CTIMERCALC, CTIMERCALC);
+	PRINT_REG_8bit(asd_ha, CINTDIS, CINTDIS);
+
+	asd_printk("SCRATCH MEMORY\n");
+
+	asd_printk("MIP 4 >>>>>\n");
+	PRINT_MIS_word(asd_ha, Q_EXE_HEAD);
+	PRINT_MIS_word(asd_ha, Q_EXE_TAIL);
+	PRINT_MIS_word(asd_ha, Q_DONE_HEAD);
+	PRINT_MIS_word(asd_ha, Q_DONE_TAIL);
+	PRINT_MIS_word(asd_ha, Q_SEND_HEAD);
+	PRINT_MIS_word(asd_ha, Q_SEND_TAIL);
+	PRINT_MIS_word(asd_ha, Q_DMA2CHIM_HEAD);
+	PRINT_MIS_word(asd_ha, Q_DMA2CHIM_TAIL);
+	PRINT_MIS_word(asd_ha, Q_COPY_HEAD);
+	PRINT_MIS_word(asd_ha, Q_COPY_TAIL);
+	PRINT_MIS_word(asd_ha, REG0);
+	PRINT_MIS_word(asd_ha, REG1);
+	PRINT_MIS_dword(asd_ha, REG2);
+	PRINT_MIS_byte(asd_ha, LINK_CTL_Q_MAP);
+	PRINT_MIS_byte(asd_ha, MAX_CSEQ_MODE);
+	PRINT_MIS_byte(asd_ha, FREE_LIST_HACK_COUNT);
+
+	asd_printk("MIP 5 >>>>\n");
+	PRINT_MIS_qword(asd_ha, EST_NEXUS_REQ_QUEUE);
+	PRINT_MIS_qword(asd_ha, EST_NEXUS_REQ_COUNT);
+	PRINT_MIS_word(asd_ha, Q_EST_NEXUS_HEAD);
+	PRINT_MIS_word(asd_ha, Q_EST_NEXUS_TAIL);
+	PRINT_MIS_word(asd_ha, NEED_EST_NEXUS_SCB);
+	PRINT_MIS_byte(asd_ha, EST_NEXUS_REQ_HEAD);
+	PRINT_MIS_byte(asd_ha, EST_NEXUS_REQ_TAIL);
+	PRINT_MIS_byte(asd_ha, EST_NEXUS_SCB_OFFSET);
+
+	asd_printk("MIP 6 >>>>\n");
+	PRINT_MIS_word(asd_ha, INT_ROUT_RET_ADDR0);
+	PRINT_MIS_word(asd_ha, INT_ROUT_RET_ADDR1);
+	PRINT_MIS_word(asd_ha, INT_ROUT_SCBPTR);
+	PRINT_MIS_byte(asd_ha, INT_ROUT_MODE);
+	PRINT_MIS_byte(asd_ha, ISR_SCRATCH_FLAGS);
+	PRINT_MIS_word(asd_ha, ISR_SAVE_SINDEX);
+	PRINT_MIS_word(asd_ha, ISR_SAVE_DINDEX);
+	PRINT_MIS_word(asd_ha, Q_MONIRTT_HEAD);
+	PRINT_MIS_word(asd_ha, Q_MONIRTT_TAIL);
+	PRINT_MIS_byte(asd_ha, FREE_SCB_MASK);
+	PRINT_MIS_word(asd_ha, BUILTIN_FREE_SCB_HEAD);
+	PRINT_MIS_word(asd_ha, BUILTIN_FREE_SCB_TAIL);
+	PRINT_MIS_word(asd_ha, EXTENDED_FREE_SCB_HEAD);
+	PRINT_MIS_word(asd_ha, EXTENDED_FREE_SCB_TAIL);
+
+	asd_printk("MIP 7 >>>>\n");
+	PRINT_MIS_qword(asd_ha, EMPTY_REQ_QUEUE);
+	PRINT_MIS_qword(asd_ha, EMPTY_REQ_COUNT);
+	PRINT_MIS_word(asd_ha, Q_EMPTY_HEAD);
+	PRINT_MIS_word(asd_ha, Q_EMPTY_TAIL);
+	PRINT_MIS_word(asd_ha, NEED_EMPTY_SCB);
+	PRINT_MIS_byte(asd_ha, EMPTY_REQ_HEAD);
+	PRINT_MIS_byte(asd_ha, EMPTY_REQ_TAIL);
+	PRINT_MIS_byte(asd_ha, EMPTY_SCB_OFFSET);
+	PRINT_MIS_word(asd_ha, PRIMITIVE_DATA);
+	PRINT_MIS_dword(asd_ha, TIMEOUT_CONST);
+
+	asd_printk("MDP 0 >>>>\n");
+	asd_printk("%-20s %6s %6s %6s %6s %6s %6s %6s %6s\n",
+		   "Mode: ", "0", "1", "2", "3", "4", "5", "6", "7");
+	PRINT_CMDP_word(asd_ha, LRM_SAVE_SINDEX);
+	PRINT_CMDP_word(asd_ha, LRM_SAVE_SCBPTR);
+	PRINT_CMDP_word(asd_ha, Q_LINK_HEAD);
+	PRINT_CMDP_word(asd_ha, Q_LINK_TAIL);
+	PRINT_CMDP_byte(asd_ha, LRM_SAVE_SCRPAGE);
+
+	asd_printk("MDP 0 Mode 8 >>>>\n");
+	PRINT_MIS_word(asd_ha, RET_ADDR);
+	PRINT_MIS_word(asd_ha, RET_SCBPTR);
+	PRINT_MIS_word(asd_ha, SAVE_SCBPTR);
+	PRINT_MIS_word(asd_ha, EMPTY_TRANS_CTX);
+	PRINT_MIS_word(asd_ha, RESP_LEN);
+	PRINT_MIS_word(asd_ha, TMF_SCBPTR);
+	PRINT_MIS_word(asd_ha, GLOBAL_PREV_SCB);
+	PRINT_MIS_word(asd_ha, GLOBAL_HEAD);
+	PRINT_MIS_word(asd_ha, CLEAR_LU_HEAD);
+	PRINT_MIS_byte(asd_ha, TMF_OPCODE);
+	PRINT_MIS_byte(asd_ha, SCRATCH_FLAGS);
+	PRINT_MIS_word(asd_ha, HSB_SITE);
+	PRINT_MIS_word(asd_ha, FIRST_INV_SCB_SITE);
+	PRINT_MIS_word(asd_ha, FIRST_INV_DDB_SITE);
+
+	asd_printk("MDP 1 Mode 8 >>>>\n");
+	PRINT_MIS_qword(asd_ha, LUN_TO_CLEAR);
+	PRINT_MIS_qword(asd_ha, LUN_TO_CHECK);
+
+	asd_printk("MDP 2 Mode 8 >>>>\n");
+	PRINT_MIS_qword(asd_ha, HQ_NEW_POINTER);
+	PRINT_MIS_qword(asd_ha, HQ_DONE_BASE);
+	PRINT_MIS_dword(asd_ha, HQ_DONE_POINTER);
+	PRINT_MIS_byte(asd_ha, HQ_DONE_PASS);
+}
+
+#define PRINT_LREG_8bit(_h, _lseq, _n) \
+        asd_printk(STR_8BIT, #_n, _n, asd_read_reg_byte(_h, Lm##_n(_lseq)))
+#define PRINT_LREG_16bit(_h, _lseq, _n) \
+        asd_printk(STR_16BIT, #_n, _n, asd_read_reg_word(_h, Lm##_n(_lseq)))
+#define PRINT_LREG_32bit(_h, _lseq, _n) \
+        asd_printk(STR_32BIT, #_n, _n, asd_read_reg_dword(_h, Lm##_n(_lseq)))
+
+#define PRINT_LMIP_byte(_h, _lseq, _n)                              \
+	asd_printk(STR_8BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		   asd_read_reg_byte(_h, LmSEQ_##_n(_lseq)))
+#define PRINT_LMIP_word(_h, _lseq, _n)                              \
+	asd_printk(STR_16BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		   asd_read_reg_word(_h, LmSEQ_##_n(_lseq)))
+#define PRINT_LMIP_dword(_h, _lseq, _n)                             \
+	asd_printk(STR_32BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		   asd_read_reg_dword(_h, LmSEQ_##_n(_lseq)))
+#define PRINT_LMIP_qword(_h, _lseq, _n)                                \
+	asd_printk(STR_64BIT, #_n, LmSEQ_##_n(_lseq)-LmSCRATCH(_lseq), \
+		 (unsigned long long)(((unsigned long long) \
+		 asd_read_reg_dword(_h, LmSEQ_##_n(_lseq))) \
+	          | (((unsigned long long) \
+		 asd_read_reg_dword(_h, LmSEQ_##_n(_lseq)+4))<<32)))
+
+static void asd_print_lseq_cio_reg(struct asd_ha_struct *asd_ha,
+				   u32 lseq_cio_addr, int i)
+{
+	switch (LSEQmCIOREGS[i].width) {
+	case 8:
+		asd_printk("%20s[0x%x]: 0x%02x\n", LSEQmCIOREGS[i].name,
+			   LSEQmCIOREGS[i].offs,
+			   asd_read_reg_byte(asd_ha, lseq_cio_addr +
+					     LSEQmCIOREGS[i].offs));
+
+		break;
+	case 16:
+		asd_printk("%20s[0x%x]: 0x%04x\n", LSEQmCIOREGS[i].name,
+			   LSEQmCIOREGS[i].offs,
+			   asd_read_reg_word(asd_ha, lseq_cio_addr +
+					     LSEQmCIOREGS[i].offs));
+
+		break;
+	case 32:
+		asd_printk("%20s[0x%x]: 0x%08x\n", LSEQmCIOREGS[i].name,
+			   LSEQmCIOREGS[i].offs,
+			   asd_read_reg_dword(asd_ha, lseq_cio_addr +
+					      LSEQmCIOREGS[i].offs));
+		break;
+	}
+}
+
+static void asd_dump_lseq_state(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u32 moffs;
+	int mode;
+
+	asd_printk("LSEQ %d STATE\n", lseq);
+
+	asd_printk("LSEQ%d: ARP2 REGISTERS\n", lseq);
+	PRINT_LREG_32bit(asd_ha, lseq, ARP2CTL);
+	PRINT_LREG_32bit(asd_ha, lseq, ARP2INT);
+	PRINT_LREG_32bit(asd_ha, lseq, ARP2INTEN);
+	PRINT_LREG_8bit(asd_ha, lseq, MODEPTR);
+	PRINT_LREG_8bit(asd_ha, lseq, ALTMODE);
+	PRINT_LREG_8bit(asd_ha, lseq, FLAG);
+	PRINT_LREG_8bit(asd_ha, lseq, ARP2INTCTL);
+	PRINT_LREG_16bit(asd_ha, lseq, STACK);
+	PRINT_LREG_16bit(asd_ha, lseq, PRGMCNT);
+	PRINT_LREG_16bit(asd_ha, lseq, ACCUM);
+	PRINT_LREG_16bit(asd_ha, lseq, SINDEX);
+	PRINT_LREG_16bit(asd_ha, lseq, DINDEX);
+	PRINT_LREG_8bit(asd_ha, lseq, SINDIR);
+	PRINT_LREG_8bit(asd_ha, lseq, DINDIR);
+	PRINT_LREG_8bit(asd_ha, lseq, JUMLDIR);
+	PRINT_LREG_8bit(asd_ha, lseq, ARP2HALTCODE);
+	PRINT_LREG_16bit(asd_ha, lseq, CURRADDR);
+	PRINT_LREG_16bit(asd_ha, lseq, LASTADDR);
+	PRINT_LREG_16bit(asd_ha, lseq, NXTLADDR);
+
+	asd_printk("LSEQ%d: IOP REGISTERS\n", lseq);
+
+	PRINT_LREG_32bit(asd_ha, lseq, MODECTL);
+	PRINT_LREG_32bit(asd_ha, lseq, DBGMODE);
+	PRINT_LREG_32bit(asd_ha, lseq, CONTROL);
+	PRINT_REG_32bit(asd_ha, BISTCTL0, LmBISTCTL0(lseq));
+	PRINT_REG_32bit(asd_ha, BISTCTL1, LmBISTCTL1(lseq));
+
+	asd_printk("LSEQ%d: CIO REGISTERS\n", lseq);
+	asd_printk("Mode common:\n");
+
+	for (mode = 0; mode < 8; mode++) {
+		u32 lseq_cio_addr = LmSEQ_PHY_BASE(mode, lseq);
+		int i;
+
+		for (i = 0; LSEQmCIOREGS[i].name; i++)
+			if (LSEQmCIOREGS[i].mode == MODE_COMMON)
+				asd_print_lseq_cio_reg(asd_ha,lseq_cio_addr,i);
+	}
+
+	asd_printk("Mode unique:\n");
+	for (mode = 0; mode < 8; mode++) {
+		u32 lseq_cio_addr = LmSEQ_PHY_BASE(mode, lseq);
+		int i;
+
+		asd_printk("Mode %d\n", mode);
+		for  (i = 0; LSEQmCIOREGS[i].name; i++) {
+			if (!(LSEQmCIOREGS[i].mode & (1 << mode)))
+				continue;
+			asd_print_lseq_cio_reg(asd_ha, lseq_cio_addr, i);
+		}
+	}
+
+	asd_printk("SCRATCH MEMORY\n");
+
+	asd_printk("LSEQ%d MIP 0 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, Q_TGTXFR_HEAD);
+	PRINT_LMIP_word(asd_ha, lseq, Q_TGTXFR_TAIL);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_NUMBER);
+	PRINT_LMIP_byte(asd_ha, lseq, SCRATCH_FLAGS);
+	PRINT_LMIP_qword(asd_ha, lseq, CONNECTION_STATE);
+	PRINT_LMIP_word(asd_ha, lseq, CONCTL);
+	PRINT_LMIP_byte(asd_ha, lseq, CONSTAT);
+	PRINT_LMIP_byte(asd_ha, lseq, CONNECTION_MODES);
+	PRINT_LMIP_word(asd_ha, lseq, REG1_ISR);
+	PRINT_LMIP_word(asd_ha, lseq, REG2_ISR);
+	PRINT_LMIP_word(asd_ha, lseq, REG3_ISR);
+	PRINT_LMIP_qword(asd_ha, lseq,REG0_ISR);
+
+	asd_printk("LSEQ%d MIP 1 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR0);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR1);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR2);
+	PRINT_LMIP_word(asd_ha, lseq, EST_NEXUS_SCBPTR3);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE0);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE1);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE2);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_OPCODE3);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_HEAD);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_SCB_TAIL);
+	PRINT_LMIP_byte(asd_ha, lseq, EST_NEXUS_BUF_AVAIL);
+	PRINT_LMIP_dword(asd_ha, lseq, TIMEOUT_CONST);
+	PRINT_LMIP_word(asd_ha, lseq, ISR_SAVE_SINDEX);
+	PRINT_LMIP_word(asd_ha, lseq, ISR_SAVE_DINDEX);
+
+	asd_printk("LSEQ%d MIP 2 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR0);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR1);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR2);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_SCB_PTR3);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD0);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD1);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD2);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_OPCD3);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_HEAD);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_SCB_TAIL);
+	PRINT_LMIP_byte(asd_ha, lseq, EMPTY_BUFS_AVAIL);
+
+	asd_printk("LSEQ%d MIP 3 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, DEV_PRES_TMR_TOUT_CONST);
+	PRINT_LMIP_dword(asd_ha, lseq, SATA_INTERLOCK_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, SRST_ASSERT_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, RCV_FIS_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, ONE_MILLISEC_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, TEN_MS_COMINIT_TIMEOUT);
+	PRINT_LMIP_dword(asd_ha, lseq, SMP_RCV_TIMEOUT);
+
+	for (mode = 0; mode < 3; mode++) {
+		asd_printk("LSEQ%d MDP 0 MODE %d >>>>\n", lseq, mode);
+		moffs = mode * LSEQ_MODE_SCRATCH_SIZE;
+
+		asd_printk(STR_16BIT, "RET_ADDR", 0,
+			   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "REG0_MODE", 2,
+			   asd_read_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "MODE_FLAGS", 4,
+			   asd_read_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "RET_ADDR2", 0x6,
+			   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "RET_ADDR1", 0x8,
+			   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq)
+					     + moffs));
+		asd_printk(STR_8BIT, "OPCODE_TO_CSEQ", 0xB,
+			   asd_read_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq)
+					     + moffs));
+		asd_printk(STR_16BIT, "DATA_TO_CSEQ", 0xC,
+			   asd_read_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq)
+					     + moffs));
+	}
+
+	asd_printk("LSEQ%d MDP 0 MODE 5 >>>>\n", lseq);
+	moffs = LSEQ_MODE5_PAGE0_OFFSET;
+	asd_printk(STR_16BIT, "RET_ADDR", 0,
+		   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq) + moffs));
+	asd_printk(STR_16BIT, "REG0_MODE", 2,
+		   asd_read_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq) + moffs));
+	asd_printk(STR_16BIT, "MODE_FLAGS", 4,
+		   asd_read_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq) + moffs));
+	asd_printk(STR_16BIT, "RET_ADDR2", 0x6,
+		   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq) + moffs));
+	asd_printk(STR_16BIT, "RET_ADDR1", 0x8,
+		   asd_read_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq) + moffs));
+	asd_printk(STR_8BIT, "OPCODE_TO_CSEQ", 0xB,
+	   asd_read_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq) + moffs));
+	asd_printk(STR_16BIT, "DATA_TO_CSEQ", 0xC,
+	   asd_read_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq) + moffs));
+
+	asd_printk("LSEQ%d MDP 0 MODE 0 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, FIRST_INV_DDB_SITE);
+	PRINT_LMIP_word(asd_ha, lseq, EMPTY_TRANS_CTX);
+	PRINT_LMIP_word(asd_ha, lseq, RESP_LEN);
+	PRINT_LMIP_word(asd_ha, lseq, FIRST_INV_SCB_SITE);
+	PRINT_LMIP_dword(asd_ha, lseq, INTEN_SAVE);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_FRM_LEN);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_PROTOCOL);
+	PRINT_LMIP_byte(asd_ha, lseq, RESP_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, LAST_LOADED_SGE);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVE_SCBPTR);
+
+	asd_printk("LSEQ%d MDP 0 MODE 1 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, Q_XMIT_HEAD);
+	PRINT_LMIP_word(asd_ha, lseq, M1_EMPTY_TRANS_CTX);
+	PRINT_LMIP_word(asd_ha, lseq, INI_CONN_TAG);
+	PRINT_LMIP_byte(asd_ha, lseq, FAILED_OPEN_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, XMIT_REQUEST_TYPE);
+	PRINT_LMIP_byte(asd_ha, lseq, M1_RESP_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, M1_LAST_LOADED_SGE);
+	PRINT_LMIP_word(asd_ha, lseq, M1_SAVE_SCBPTR);
+
+	asd_printk("LSEQ%d MDP 0 MODE 2 >>>>\n", lseq);
+	PRINT_LMIP_word(asd_ha, lseq, PORT_COUNTER);
+	PRINT_LMIP_word(asd_ha, lseq, PM_TABLE_PTR);
+	PRINT_LMIP_word(asd_ha, lseq, SATA_INTERLOCK_TMR_SAVE);
+	PRINT_LMIP_word(asd_ha, lseq, IP_BITL);
+	PRINT_LMIP_word(asd_ha, lseq, COPY_SMP_CONN_TAG);
+	PRINT_LMIP_byte(asd_ha, lseq, P0M2_OFFS1AH);
+
+	asd_printk("LSEQ%d MDP 0 MODE 4/5 >>>>\n", lseq);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_STATUS);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_MODE);
+	PRINT_LMIP_word(asd_ha, lseq, Q_LINK_HEAD);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RST_ERR);
+	PRINT_LMIP_byte(asd_ha, lseq, SAVED_OOB_SIGNALS);
+	PRINT_LMIP_byte(asd_ha, lseq, SAS_RESET_MODE);
+	PRINT_LMIP_byte(asd_ha, lseq, LINK_RESET_RETRY_COUNT);
+	PRINT_LMIP_byte(asd_ha, lseq, NUM_LINK_RESET_RETRIES);
+	PRINT_LMIP_word(asd_ha, lseq, OOB_INT_ENABLES);
+	PRINT_LMIP_word(asd_ha, lseq, NOTIFY_TIMER_TIMEOUT);
+	PRINT_LMIP_word(asd_ha, lseq, NOTIFY_TIMER_DOWN_COUNT);
+
+	asd_printk("LSEQ%d MDP 1 MODE 0 >>>>\n", lseq);
+	PRINT_LMIP_qword(asd_ha, lseq, SG_LIST_PTR_ADDR0);
+	PRINT_LMIP_qword(asd_ha, lseq, SG_LIST_PTR_ADDR1);
+
+	asd_printk("LSEQ%d MDP 1 MODE 1 >>>>\n", lseq);
+	PRINT_LMIP_qword(asd_ha, lseq, M1_SG_LIST_PTR_ADDR0);
+	PRINT_LMIP_qword(asd_ha, lseq, M1_SG_LIST_PTR_ADDR1);
+
+	asd_printk("LSEQ%d MDP 1 MODE 2 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, INVALID_DWORD_COUNT);
+	PRINT_LMIP_dword(asd_ha, lseq, DISPARITY_ERROR_COUNT);
+	PRINT_LMIP_dword(asd_ha, lseq, LOSS_OF_SYNC_COUNT);
+
+	asd_printk("LSEQ%d MDP 1 MODE 4/5 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, FRAME_TYPE_MASK);
+	PRINT_LMIP_dword(asd_ha, lseq, HASHED_SRC_ADDR_MASK_PRINT);
+	PRINT_LMIP_byte(asd_ha, lseq, NUM_FILL_BYTES_MASK);
+	PRINT_LMIP_word(asd_ha, lseq, TAG_MASK);
+	PRINT_LMIP_word(asd_ha, lseq, TARGET_PORT_XFER_TAG);
+	PRINT_LMIP_dword(asd_ha, lseq, DATA_OFFSET);
+
+	asd_printk("LSEQ%d MDP 2 MODE 0 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, SMP_RCV_TIMER_TERM_TS);
+	PRINT_LMIP_byte(asd_ha, lseq, DEVICE_BITS);
+	PRINT_LMIP_word(asd_ha, lseq, SDB_DDB);
+	PRINT_LMIP_word(asd_ha, lseq, SDB_NUM_TAGS);
+	PRINT_LMIP_word(asd_ha, lseq, SDB_CURR_TAG);
+
+	asd_printk("LSEQ%d MDP 2 MODE 1 >>>>\n", lseq);
+	PRINT_LMIP_qword(asd_ha, lseq, TX_ID_ADDR_FRAME);
+	PRINT_LMIP_dword(asd_ha, lseq, OPEN_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, SRST_AS_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, LAST_LOADED_SG_EL);
+
+	asd_printk("LSEQ%d MDP 2 MODE 2 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, CLOSE_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, BREAK_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, DWS_RESET_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, SATA_INTERLOCK_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, MCTL_TIMER_TERM_TS);
+
+	asd_printk("LSEQ%d MDP 2 MODE 4/5 >>>>\n", lseq);
+	PRINT_LMIP_dword(asd_ha, lseq, COMINIT_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, RCV_ID_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, RCV_FIS_TIMER_TERM_TS);
+	PRINT_LMIP_dword(asd_ha, lseq, DEV_PRES_TIMER_TERM_TS);
+}
+
+/**
+ * asd_dump_ddb_site -- dump a CSEQ DDB site
+ * @asd_ha: pointer to host adapter structure
+ * @site_no: site number of interest
+ */
+void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, u16 site_no)
+{
+	if (site_no >= asd_ha->hw_prof.max_ddbs)
+		return;
+
+#define DDB_FIELDB(__name)                                        \
+	asd_ddbsite_read_byte(asd_ha, site_no,                    \
+			      offsetof(struct asd_ddb_ssp_smp_target_port, __name))
+#define DDB2_FIELDB(__name)                                       \
+	asd_ddbsite_read_byte(asd_ha, site_no,                    \
+			      offsetof(struct asd_ddb_stp_sata_target_port, __name))
+#define DDB_FIELDW(__name)                                        \
+	asd_ddbsite_read_word(asd_ha, site_no,                    \
+			      offsetof(struct asd_ddb_ssp_smp_target_port, __name))
+
+#define DDB_FIELDD(__name)                                         \
+	asd_ddbsite_read_dword(asd_ha, site_no,                    \
+			       offsetof(struct asd_ddb_ssp_smp_target_port, __name))
+
+	asd_printk("DDB: 0x%02x\n", site_no);
+	asd_printk("conn_type: 0x%02x\n", DDB_FIELDB(conn_type));
+	asd_printk("conn_rate: 0x%02x\n", DDB_FIELDB(conn_rate));
+	asd_printk("init_conn_tag: 0x%04x\n", be16_to_cpu(DDB_FIELDW(init_conn_tag)));
+	asd_printk("send_queue_head: 0x%04x\n", be16_to_cpu(DDB_FIELDW(send_queue_head)));
+	asd_printk("sq_suspended: 0x%02x\n", DDB_FIELDB(sq_suspended));
+	asd_printk("DDB Type: 0x%02x\n", DDB_FIELDB(ddb_type));
+	asd_printk("AWT Default: 0x%04x\n", DDB_FIELDW(awt_def));
+	asd_printk("compat_features: 0x%02x\n", DDB_FIELDB(compat_features));
+	asd_printk("Pathway Blocked Count: 0x%02x\n",
+		   DDB_FIELDB(pathway_blocked_count));
+	asd_printk("arb_wait_time: 0x%04x\n", DDB_FIELDW(arb_wait_time));
+	asd_printk("more_compat_features: 0x%08x\n",
+		   DDB_FIELDD(more_compat_features));
+	asd_printk("Conn Mask: 0x%02x\n", DDB_FIELDB(conn_mask));
+	asd_printk("flags: 0x%02x\n", DDB_FIELDB(flags));
+	asd_printk("flags2: 0x%02x\n", DDB2_FIELDB(flags2));
+	asd_printk("ExecQ Tail: 0x%04x\n",DDB_FIELDW(exec_queue_tail));
+	asd_printk("SendQ Tail: 0x%04x\n",DDB_FIELDW(send_queue_tail));
+	asd_printk("Active Task Count: 0x%04x\n",
+		   DDB_FIELDW(active_task_count));
+	asd_printk("ITNL Reason: 0x%02x\n", DDB_FIELDB(itnl_reason));
+	asd_printk("ITNL Timeout Const: 0x%04x\n", DDB_FIELDW(itnl_timeout));
+	asd_printk("ITNL timestamp: 0x%08x\n", DDB_FIELDD(itnl_timestamp));
+}
+
+void asd_dump_ddb_0(struct asd_ha_struct *asd_ha)
+{
+#define DDB0_FIELDB(__name)                                  \
+	asd_ddbsite_read_byte(asd_ha, 0,                     \
+			      offsetof(struct asd_ddb_seq_shared, __name))
+#define DDB0_FIELDW(__name)                                  \
+	asd_ddbsite_read_word(asd_ha, 0,                     \
+			      offsetof(struct asd_ddb_seq_shared, __name))
+
+#define DDB0_FIELDD(__name)                                  \
+	asd_ddbsite_read_dword(asd_ha,0 ,                    \
+			       offsetof(struct asd_ddb_seq_shared, __name))
+
+#define DDB0_FIELDA(__name, _o)                              \
+	asd_ddbsite_read_byte(asd_ha, 0,                     \
+			      offsetof(struct asd_ddb_seq_shared, __name)+_o)
+
+
+	asd_printk("DDB: 0\n");
+	asd_printk("q_free_ddb_head:%04x\n", DDB0_FIELDW(q_free_ddb_head));
+	asd_printk("q_free_ddb_tail:%04x\n", DDB0_FIELDW(q_free_ddb_tail));
+	asd_printk("q_free_ddb_cnt:%04x\n",  DDB0_FIELDW(q_free_ddb_cnt));
+	asd_printk("q_used_ddb_head:%04x\n", DDB0_FIELDW(q_used_ddb_head));
+	asd_printk("q_used_ddb_tail:%04x\n", DDB0_FIELDW(q_used_ddb_tail));
+	asd_printk("shared_mem_lock:%04x\n", DDB0_FIELDW(shared_mem_lock));
+	asd_printk("smp_conn_tag:%04x\n",    DDB0_FIELDW(smp_conn_tag));
+	asd_printk("est_nexus_buf_cnt:%04x\n", DDB0_FIELDW(est_nexus_buf_cnt));
+	asd_printk("est_nexus_buf_thresh:%04x\n",
+		   DDB0_FIELDW(est_nexus_buf_thresh));
+	asd_printk("conn_not_active:%02x\n", DDB0_FIELDB(conn_not_active));
+	asd_printk("phy_is_up:%02x\n",       DDB0_FIELDB(phy_is_up));
+	asd_printk("port_map_by_links:%02x %02x %02x %02x "
+		   "%02x %02x %02x %02x\n",
+		   DDB0_FIELDA(port_map_by_links, 0),
+		   DDB0_FIELDA(port_map_by_links, 1),
+		   DDB0_FIELDA(port_map_by_links, 2),
+		   DDB0_FIELDA(port_map_by_links, 3),
+		   DDB0_FIELDA(port_map_by_links, 4),
+		   DDB0_FIELDA(port_map_by_links, 5),
+		   DDB0_FIELDA(port_map_by_links, 6),
+		   DDB0_FIELDA(port_map_by_links, 7));
+}
+
+static void asd_dump_scb_site(struct asd_ha_struct *asd_ha, u16 site_no)
+{
+
+#define SCB_FIELDB(__name)                                                 \
+	asd_scbsite_read_byte(asd_ha, site_no, sizeof(struct scb_header)   \
+			      + offsetof(struct initiate_ssp_task, __name))
+#define SCB_FIELDW(__name)                                                 \
+	asd_scbsite_read_word(asd_ha, site_no, sizeof(struct scb_header)   \
+			      + offsetof(struct initiate_ssp_task, __name))
+#define SCB_FIELDD(__name)                                                 \
+	asd_scbsite_read_dword(asd_ha, site_no, sizeof(struct scb_header)  \
+			       + offsetof(struct initiate_ssp_task, __name))
+
+	asd_printk("Total Xfer Len: 0x%08x.\n", SCB_FIELDD(total_xfer_len));
+	asd_printk("Frame Type: 0x%02x.\n", SCB_FIELDB(ssp_frame.frame_type));
+	asd_printk("Tag: 0x%04x.\n", SCB_FIELDW(ssp_frame.tag));
+	asd_printk("Target Port Xfer Tag: 0x%04x.\n",
+		   SCB_FIELDW(ssp_frame.tptt));
+	asd_printk("Data Offset: 0x%08x.\n", SCB_FIELDW(ssp_frame.data_offs));
+	asd_printk("Retry Count: 0x%02x.\n", SCB_FIELDB(retry_count));
+}
+
+/**
+ * asd_dump_scb_sites -- dump currently used CSEQ SCB sites
+ * @asd_ha: pointer to host adapter struct
+ */
+void asd_dump_scb_sites(struct asd_ha_struct *asd_ha)
+{
+	u16	site_no;
+
+	for (site_no = 0; site_no < asd_ha->hw_prof.max_scbs; site_no++) {
+		u8 opcode;
+
+		if (!SCB_SITE_VALID(site_no))
+			continue;
+
+		/* We are only interested in SCB sites currently used.
+		 */
+		opcode = asd_scbsite_read_byte(asd_ha, site_no,
+					       offsetof(struct scb_header,
+							opcode));
+		if (opcode == 0xFF)
+			continue;
+
+		asd_printk("\nSCB: 0x%x\n", site_no);
+		asd_dump_scb_site(asd_ha, site_no);
+	}
+}
+
+/**
+ * ads_dump_seq_state -- dump CSEQ and LSEQ states
+ * @asd_ha: pointer to host adapter structure
+ * @lseq_mask: mask of LSEQs of interest
+ */
+void asd_dump_seq_state(struct asd_ha_struct *asd_ha, u8 lseq_mask)
+{
+	int lseq;
+
+	asd_dump_cseq_state(asd_ha);
+
+	if (lseq_mask != 0)
+		for_each_sequencer(lseq_mask, lseq_mask, lseq)
+			asd_dump_lseq_state(asd_ha, lseq);
+}
+
+void asd_dump_frame_rcvd(struct asd_phy *phy,
+			 struct done_list_struct *dl)
+{
+	unsigned long flags;
+	int i;
+
+	switch ((dl->status_block[1] & 0x70) >> 3) {
+	case SAS_PROTO_STP:
+		ASD_DPRINTK("STP proto device-to-host FIS:\n");
+		break;
+	default:
+	case SAS_PROTO_SSP:
+		ASD_DPRINTK("SAS proto IDENTIFY:\n");
+		break;
+	}
+	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
+	for (i = 0; i < phy->sas_phy.frame_rcvd_size; i+=4)
+		ASD_DPRINTK("%02x: %02x %02x %02x %02x\n",
+			    i,
+			    phy->frame_rcvd[i],
+			    phy->frame_rcvd[i+1],
+			    phy->frame_rcvd[i+2],
+			    phy->frame_rcvd[i+3]);
+	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
+}
+
+static inline void asd_dump_scb(struct asd_ascb *ascb, int ind)
+{
+	asd_printk("scb%d: vaddr: 0x%p, dma_handle: 0x%llx, next: 0x%llx, "
+		   "index:%d, opcode:0x%02x\n",
+		   ind, ascb->dma_scb.vaddr,
+		   (unsigned long long)ascb->dma_scb.dma_handle,
+		   (unsigned long long)
+		   le64_to_cpu(ascb->scb->header.next_scb),
+		   le16_to_cpu(ascb->scb->header.index),
+		   ascb->scb->header.opcode);
+}
+
+void asd_dump_scb_list(struct asd_ascb *ascb, int num)
+{
+	int i = 0;
+
+	asd_printk("dumping %d scbs:\n", num);
+
+	asd_dump_scb(ascb, i++);
+	--num;
+
+	if (num > 0 && !list_empty(&ascb->list)) {
+		struct list_head *el;
+
+		list_for_each(el, &ascb->list) {
+			struct asd_ascb *s = list_entry(el, struct asd_ascb,
+							list);
+			asd_dump_scb(s, i++);
+			if (--num <= 0)
+				break;
+		}
+	}
+}
+
+#endif /* ASD_DEBUG */
diff --git a/drivers/scsi/aic94xx/aic94xx_dump.h b/drivers/scsi/aic94xx/aic94xx_dump.h
new file mode 100644
index 0000000..0c388e7
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_dump.h
@@ -0,0 +1,52 @@
+/*
+ * Aic94xx SAS/SATA driver dump header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_DUMP_H_
+#define _AIC94XX_DUMP_H_
+
+#ifdef ASD_DEBUG
+
+void asd_dump_ddb_0(struct asd_ha_struct *asd_ha);
+void asd_dump_target_ddb(struct asd_ha_struct *asd_ha, u16 site_no);
+void asd_dump_scb_sites(struct asd_ha_struct *asd_ha);
+void asd_dump_seq_state(struct asd_ha_struct *asd_ha, u8 lseq_mask);
+void asd_dump_frame_rcvd(struct asd_phy *phy,
+			 struct done_list_struct *dl);
+void asd_dump_scb_list(struct asd_ascb *ascb, int num);
+#else /* ASD_DEBUG */
+
+static inline void asd_dump_ddb_0(struct asd_ha_struct *asd_ha) { }
+static inline void asd_dump_target_ddb(struct asd_ha_struct *asd_ha,
+				     u16 site_no) { }
+static inline void asd_dump_scb_sites(struct asd_ha_struct *asd_ha) { }
+static inline void asd_dump_seq_state(struct asd_ha_struct *asd_ha,
+				      u8 lseq_mask) { }
+static inline void asd_dump_frame_rcvd(struct asd_phy *phy,
+				       struct done_list_struct *dl) { }
+static inline void asd_dump_scb_list(struct asd_ascb *ascb, int num) { }
+#endif /* ASD_DEBUG */
+
+#endif /* _AIC94XX_DUMP_H_ */
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c
new file mode 100644
index 0000000..a242013
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.c
@@ -0,0 +1,1376 @@
+/*
+ * Aic94xx SAS/SATA driver hardware interface.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_seq.h"
+#include "aic94xx_dump.h"
+
+u32 MBAR0_SWB_SIZE;
+
+/* ---------- Initialization ---------- */
+
+static void asd_get_user_sas_addr(struct asd_ha_struct *asd_ha)
+{
+	extern char sas_addr_str[];
+	/* If the user has specified a WWN it overrides other settings
+	 */
+	if (sas_addr_str[0] != '\0')
+		asd_destringify_sas_addr(asd_ha->hw_prof.sas_addr,
+					 sas_addr_str);
+	else if (asd_ha->hw_prof.sas_addr[0] != 0)
+		asd_stringify_sas_addr(sas_addr_str, asd_ha->hw_prof.sas_addr);
+}
+
+static void asd_propagate_sas_addr(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		if (asd_ha->hw_prof.phy_desc[i].sas_addr[0] == 0)
+			continue;
+		/* Set a phy's address only if it has none.
+		 */
+		ASD_DPRINTK("setting phy%d addr to %llx\n", i,
+			    SAS_ADDR(asd_ha->hw_prof.sas_addr));
+		memcpy(asd_ha->hw_prof.phy_desc[i].sas_addr,
+		       asd_ha->hw_prof.sas_addr, SAS_ADDR_SIZE);
+	}
+}
+
+/* ---------- PHY initialization ---------- */
+
+static void asd_init_phy_identify(struct asd_phy *phy)
+{
+	phy->identify_frame = phy->id_frm_tok->vaddr;
+
+	memset(phy->identify_frame, 0, sizeof(*phy->identify_frame));
+
+	phy->identify_frame->dev_type = SAS_END_DEV;
+	if (phy->sas_phy.role & PHY_ROLE_INITIATOR)
+		phy->identify_frame->initiator_bits = phy->sas_phy.iproto;
+	if (phy->sas_phy.role & PHY_ROLE_TARGET)
+		phy->identify_frame->target_bits = phy->sas_phy.tproto;
+	memcpy(phy->identify_frame->sas_addr, phy->phy_desc->sas_addr,
+	       SAS_ADDR_SIZE);
+	phy->identify_frame->phy_id = phy->sas_phy.id;
+}
+
+static int asd_init_phy(struct asd_phy *phy)
+{
+	struct asd_ha_struct *asd_ha = phy->sas_phy.ha->lldd_ha;
+	struct asd_sas_phy *sas_phy = &phy->sas_phy;
+
+	sas_phy->enabled = 1;
+	sas_phy->class = SAS;
+	sas_phy->iproto = SAS_PROTO_ALL;
+	sas_phy->tproto = 0;
+	sas_phy->type = PHY_TYPE_PHYSICAL;
+	sas_phy->role = PHY_ROLE_INITIATOR;
+	sas_phy->oob_mode = OOB_NOT_CONNECTED;
+	sas_phy->linkrate = SAS_LINK_RATE_UNKNOWN;
+
+	phy->id_frm_tok = asd_alloc_coherent(asd_ha,
+					     sizeof(*phy->identify_frame),
+					     GFP_KERNEL);
+	if (!phy->id_frm_tok) {
+		asd_printk("no mem for IDENTIFY for phy%d\n", sas_phy->id);
+		return -ENOMEM;
+	} else
+		asd_init_phy_identify(phy);
+
+	memset(phy->frame_rcvd, 0, sizeof(phy->frame_rcvd));
+
+	return 0;
+}
+
+static int asd_init_phys(struct asd_ha_struct *asd_ha)
+{
+	u8 i;
+	u8 phy_mask = asd_ha->hw_prof.enabled_phys;
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		struct asd_phy *phy = &asd_ha->phys[i];
+
+		phy->phy_desc = &asd_ha->hw_prof.phy_desc[i];
+
+		phy->sas_phy.enabled = 0;
+		phy->sas_phy.id = i;
+		phy->sas_phy.sas_addr = &phy->phy_desc->sas_addr[0];
+		phy->sas_phy.frame_rcvd = &phy->frame_rcvd[0];
+		phy->sas_phy.ha = &asd_ha->sas_ha;
+		phy->sas_phy.lldd_phy = phy;
+	}
+
+	/* Now enable and initialize only the enabled phys. */
+	for_each_phy(phy_mask, phy_mask, i) {
+		int err = asd_init_phy(&asd_ha->phys[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* ---------- Sliding windows ---------- */
+
+static int asd_init_sw(struct asd_ha_struct *asd_ha)
+{
+	struct pci_dev *pcidev = asd_ha->pcidev;
+	int err;
+	u32 v;
+
+	/* Unlock MBARs */
+	err = pci_read_config_dword(pcidev, PCI_CONF_MBAR_KEY, &v);
+	if (err) {
+		asd_printk("couldn't access conf. space of %s\n",
+			   pci_name(pcidev));
+		goto Err;
+	}
+	if (v)
+		err = pci_write_config_dword(pcidev, PCI_CONF_MBAR_KEY, v);
+	if (err) {
+		asd_printk("couldn't write to MBAR_KEY of %s\n",
+			   pci_name(pcidev));
+		goto Err;
+	}
+
+	/* Set sliding windows A, B and C to point to proper internal
+	 * memory regions.
+	 */
+	pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWA, REG_BASE_ADDR);
+	pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWB,
+			       REG_BASE_ADDR_CSEQCIO);
+	pci_write_config_dword(pcidev, PCI_CONF_MBAR0_SWC, REG_BASE_ADDR_EXSI);
+	asd_ha->io_handle[0].swa_base = REG_BASE_ADDR;
+	asd_ha->io_handle[0].swb_base = REG_BASE_ADDR_CSEQCIO;
+	asd_ha->io_handle[0].swc_base = REG_BASE_ADDR_EXSI;
+	MBAR0_SWB_SIZE = asd_ha->io_handle[0].len - 0x80;
+	if (!asd_ha->iospace) {
+		/* MBAR1 will point to OCM (On Chip Memory) */
+		pci_write_config_dword(pcidev, PCI_CONF_MBAR1, OCM_BASE_ADDR);
+		asd_ha->io_handle[1].swa_base = OCM_BASE_ADDR;
+	}
+	spin_lock_init(&asd_ha->iolock);
+Err:
+	return err;
+}
+
+/* ---------- SCB initialization ---------- */
+
+/**
+ * asd_init_scbs - manually allocate the first SCB.
+ * @asd_ha: pointer to host adapter structure
+ *
+ * This allocates the very first SCB which would be sent to the
+ * sequencer for execution.  Its bus address is written to
+ * CSEQ_Q_NEW_POINTER, mode page 2, mode 8.  Since the bus address of
+ * the _next_ scb to be DMA-ed to the host adapter is read from the last
+ * SCB DMA-ed to the host adapter, we have to always stay one step
+ * ahead of the sequencer and keep one SCB already allocated.
+ */
+static int asd_init_scbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int bitmap_bytes;
+
+	/* allocate the index array and bitmap */
+	asd_ha->seq.tc_index_bitmap_bits = asd_ha->hw_prof.max_scbs;
+	asd_ha->seq.tc_index_array = kzalloc(asd_ha->seq.tc_index_bitmap_bits*
+					     sizeof(void *), GFP_KERNEL);
+	if (!asd_ha->seq.tc_index_array)
+		return -ENOMEM;
+
+	bitmap_bytes = (asd_ha->seq.tc_index_bitmap_bits+7)/8;
+	bitmap_bytes = BITS_TO_LONGS(bitmap_bytes*8)*sizeof(unsigned long);
+	asd_ha->seq.tc_index_bitmap = kzalloc(bitmap_bytes, GFP_KERNEL);
+	if (!asd_ha->seq.tc_index_bitmap)
+		return -ENOMEM;
+
+	spin_lock_init(&seq->tc_index_lock);
+
+	seq->next_scb.size = sizeof(struct scb);
+	seq->next_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool, GFP_KERNEL,
+					     &seq->next_scb.dma_handle);
+	if (!seq->next_scb.vaddr) {
+		kfree(asd_ha->seq.tc_index_bitmap);
+		kfree(asd_ha->seq.tc_index_array);
+		asd_ha->seq.tc_index_bitmap = NULL;
+		asd_ha->seq.tc_index_array = NULL;
+		return -ENOMEM;
+	}
+
+	seq->pending = 0;
+	spin_lock_init(&seq->pend_q_lock);
+	INIT_LIST_HEAD(&seq->pend_q);
+
+	return 0;
+}
+
+static inline void asd_get_max_scb_ddb(struct asd_ha_struct *asd_ha)
+{
+	asd_ha->hw_prof.max_scbs = asd_get_cmdctx_size(asd_ha)/ASD_SCB_SIZE;
+	asd_ha->hw_prof.max_ddbs = asd_get_devctx_size(asd_ha)/ASD_DDB_SIZE;
+	ASD_DPRINTK("max_scbs:%d, max_ddbs:%d\n",
+		    asd_ha->hw_prof.max_scbs,
+		    asd_ha->hw_prof.max_ddbs);
+}
+
+/* ---------- Done List initialization ---------- */
+
+static void asd_dl_tasklet_handler(unsigned long);
+
+static int asd_init_dl(struct asd_ha_struct *asd_ha)
+{
+	asd_ha->seq.actual_dl
+		= asd_alloc_coherent(asd_ha,
+			     ASD_DL_SIZE * sizeof(struct done_list_struct),
+				     GFP_KERNEL);
+	if (!asd_ha->seq.actual_dl)
+		return -ENOMEM;
+	asd_ha->seq.dl = asd_ha->seq.actual_dl->vaddr;
+	asd_ha->seq.dl_toggle = ASD_DEF_DL_TOGGLE;
+	asd_ha->seq.dl_next = 0;
+	tasklet_init(&asd_ha->seq.dl_tasklet, asd_dl_tasklet_handler,
+		     (unsigned long) asd_ha);
+
+	return 0;
+}
+
+/* ---------- EDB and ESCB init ---------- */
+
+static int asd_alloc_edbs(struct asd_ha_struct *asd_ha, unsigned int gfp_flags)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	seq->edb_arr = kmalloc(seq->num_edbs*sizeof(*seq->edb_arr), gfp_flags);
+	if (!seq->edb_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < seq->num_edbs; i++) {
+		seq->edb_arr[i] = asd_alloc_coherent(asd_ha, ASD_EDB_SIZE,
+						     gfp_flags);
+		if (!seq->edb_arr[i])
+			goto Err_unroll;
+		memset(seq->edb_arr[i]->vaddr, 0, ASD_EDB_SIZE);
+	}
+
+	ASD_DPRINTK("num_edbs:%d\n", seq->num_edbs);
+
+	return 0;
+
+Err_unroll:
+	for (i-- ; i >= 0; i--)
+		asd_free_coherent(asd_ha, seq->edb_arr[i]);
+	kfree(seq->edb_arr);
+	seq->edb_arr = NULL;
+
+	return -ENOMEM;
+}
+
+static int asd_alloc_escbs(struct asd_ha_struct *asd_ha,
+			   unsigned int gfp_flags)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	struct asd_ascb *escb;
+	int i, escbs;
+
+	seq->escb_arr = kmalloc(seq->num_escbs*sizeof(*seq->escb_arr),
+				gfp_flags);
+	if (!seq->escb_arr)
+		return -ENOMEM;
+
+	escbs = seq->num_escbs;
+	escb = asd_ascb_alloc_list(asd_ha, &escbs, gfp_flags);
+	if (!escb) {
+		asd_printk("couldn't allocate list of escbs\n");
+		goto Err;
+	}
+	seq->num_escbs -= escbs;  /* subtract what was not allocated */
+	ASD_DPRINTK("num_escbs:%d\n", seq->num_escbs);
+
+	for (i = 0; i < seq->num_escbs; i++, escb = list_entry(escb->list.next,
+							       struct asd_ascb,
+							       list)) {
+		seq->escb_arr[i] = escb;
+		escb->scb->header.opcode = EMPTY_SCB;
+	}
+
+	return 0;
+Err:
+	kfree(seq->escb_arr);
+	seq->escb_arr = NULL;
+	return -ENOMEM;
+
+}
+
+static void asd_assign_edbs2escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i, k, z = 0;
+
+	for (i = 0; i < seq->num_escbs; i++) {
+		struct asd_ascb *ascb = seq->escb_arr[i];
+		struct empty_scb *escb = &ascb->scb->escb;
+
+		ascb->edb_index = z;
+
+		escb->num_valid = ASD_EDBS_PER_SCB;
+
+		for (k = 0; k < ASD_EDBS_PER_SCB; k++) {
+			struct sg_el *eb = &escb->eb[k];
+			struct asd_dma_tok *edb = seq->edb_arr[z++];
+
+			memset(eb, 0, sizeof(*eb));
+			eb->bus_addr = cpu_to_le64(((u64) edb->dma_handle));
+			eb->size = cpu_to_le32(((u32) edb->size));
+		}
+	}
+}
+
+/**
+ * asd_init_escbs -- allocate and initialize empty scbs
+ * @asd_ha: pointer to host adapter structure
+ *
+ * An empty SCB has sg_elements of ASD_EDBS_PER_SCB (7) buffers.
+ * They transport sense data, etc.
+ */
+static int asd_init_escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int err = 0;
+
+	/* Allocate two empty data buffers (edb) per sequencer. */
+	int edbs = 2*(1+asd_ha->hw_prof.num_phys);
+
+	seq->num_escbs = (edbs+ASD_EDBS_PER_SCB-1)/ASD_EDBS_PER_SCB;
+	seq->num_edbs = seq->num_escbs * ASD_EDBS_PER_SCB;
+
+	err = asd_alloc_edbs(asd_ha, GFP_KERNEL);
+	if (err) {
+		asd_printk("couldn't allocate edbs\n");
+		return err;
+	}
+
+	err = asd_alloc_escbs(asd_ha, GFP_KERNEL);
+	if (err) {
+		asd_printk("couldn't allocate escbs\n");
+		return err;
+	}
+
+	asd_assign_edbs2escbs(asd_ha);
+	/* In order to insure that normal SCBs do not overfill sequencer
+	 * memory and leave no space for escbs (halting condition),
+	 * we increment pending here by the number of escbs.  However,
+	 * escbs are never pending.
+	 */
+	seq->pending   = seq->num_escbs;
+	seq->can_queue = 1 + (asd_ha->hw_prof.max_scbs - seq->pending)/2;
+
+	return 0;
+}
+
+/* ---------- HW initialization ---------- */
+
+/**
+ * asd_chip_hardrst -- hard reset the chip
+ * @asd_ha: pointer to host adapter structure
+ *
+ * This takes 16 cycles and is synchronous to CFCLK, which runs
+ * at 200 MHz, so this should take at most 80 nanoseconds.
+ */
+int asd_chip_hardrst(struct asd_ha_struct *asd_ha)
+{
+	int i;
+	int count = 100;
+	u32 reg;
+
+	for (i = 0 ; i < 4 ; i++) {
+		asd_write_reg_dword(asd_ha, COMBIST, HARDRST);
+	}
+
+	do {
+		udelay(1);
+		reg = asd_read_reg_dword(asd_ha, CHIMINT);
+		if (reg & HARDRSTDET) {
+			asd_write_reg_dword(asd_ha, CHIMINT,
+					    HARDRSTDET|PORRSTDET);
+			return 0;
+		}
+	} while (--count > 0);
+
+	return -ENODEV;
+}
+
+/**
+ * asd_init_chip -- initialize the chip
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Hard resets the chip, disables HA interrupts, downloads the sequnecer
+ * microcode and starts the sequencers.  The caller has to explicitly
+ * enable HA interrupts with asd_enable_ints(asd_ha).
+ */
+static int asd_init_chip(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = asd_chip_hardrst(asd_ha);
+	if (err) {
+		asd_printk("couldn't hard reset %s\n",
+			    pci_name(asd_ha->pcidev));
+		goto out;
+	}
+
+	asd_disable_ints(asd_ha);
+
+	err = asd_init_seqs(asd_ha);
+	if (err) {
+		asd_printk("couldn't init seqs for %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto out;
+	}
+
+	err = asd_start_seqs(asd_ha);
+	if (err) {
+		asd_printk("coudln't start seqs for %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto out;
+	}
+out:
+	return err;
+}
+
+#define MAX_DEVS ((OCM_MAX_SIZE) / (ASD_DDB_SIZE))
+
+static int max_devs = 0;
+module_param_named(max_devs, max_devs, int, S_IRUGO);
+MODULE_PARM_DESC(max_devs, "\n"
+	"\tMaximum number of SAS devices to support (not LUs).\n"
+	"\tDefault: 2176, Maximum: 65663.\n");
+
+static int max_cmnds = 0;
+module_param_named(max_cmnds, max_cmnds, int, S_IRUGO);
+MODULE_PARM_DESC(max_cmnds, "\n"
+	"\tMaximum number of commands queuable.\n"
+	"\tDefault: 512, Maximum: 66047.\n");
+
+static void asd_extend_devctx_ocm(struct asd_ha_struct *asd_ha)
+{
+	unsigned long dma_addr = OCM_BASE_ADDR;
+	u32 d;
+
+	dma_addr -= asd_ha->hw_prof.max_ddbs * ASD_DDB_SIZE;
+	asd_write_reg_addr(asd_ha, DEVCTXBASE, (dma_addr_t) dma_addr);
+	d = asd_read_reg_dword(asd_ha, CTXDOMAIN);
+	d |= 4;
+	asd_write_reg_dword(asd_ha, CTXDOMAIN, d);
+	asd_ha->hw_prof.max_ddbs += MAX_DEVS;
+}
+
+static int asd_extend_devctx(struct asd_ha_struct *asd_ha)
+{
+	dma_addr_t dma_handle;
+	unsigned long dma_addr;
+	u32 d;
+	int size;
+
+	asd_extend_devctx_ocm(asd_ha);
+
+	asd_ha->hw_prof.ddb_ext = NULL;
+	if (max_devs <= asd_ha->hw_prof.max_ddbs || max_devs > 0xFFFF) {
+		max_devs = asd_ha->hw_prof.max_ddbs;
+		return 0;
+	}
+
+	size = (max_devs - asd_ha->hw_prof.max_ddbs + 1) * ASD_DDB_SIZE;
+
+	asd_ha->hw_prof.ddb_ext = asd_alloc_coherent(asd_ha, size, GFP_KERNEL);
+	if (!asd_ha->hw_prof.ddb_ext) {
+		asd_printk("couldn't allocate memory for %d devices\n",
+			   max_devs);
+		max_devs = asd_ha->hw_prof.max_ddbs;
+		return -ENOMEM;
+	}
+	dma_handle = asd_ha->hw_prof.ddb_ext->dma_handle;
+	dma_addr = ALIGN((unsigned long) dma_handle, ASD_DDB_SIZE);
+	dma_addr -= asd_ha->hw_prof.max_ddbs * ASD_DDB_SIZE;
+	dma_handle = (dma_addr_t) dma_addr;
+	asd_write_reg_addr(asd_ha, DEVCTXBASE, dma_handle);
+	d = asd_read_reg_dword(asd_ha, CTXDOMAIN);
+	d &= ~4;
+	asd_write_reg_dword(asd_ha, CTXDOMAIN, d);
+
+	asd_ha->hw_prof.max_ddbs = max_devs;
+
+	return 0;
+}
+
+static int asd_extend_cmdctx(struct asd_ha_struct *asd_ha)
+{
+	dma_addr_t dma_handle;
+	unsigned long dma_addr;
+	u32 d;
+	int size;
+
+	asd_ha->hw_prof.scb_ext = NULL;
+	if (max_cmnds <= asd_ha->hw_prof.max_scbs || max_cmnds > 0xFFFF) {
+		max_cmnds = asd_ha->hw_prof.max_scbs;
+		return 0;
+	}
+
+	size = (max_cmnds - asd_ha->hw_prof.max_scbs + 1) * ASD_SCB_SIZE;
+
+	asd_ha->hw_prof.scb_ext = asd_alloc_coherent(asd_ha, size, GFP_KERNEL);
+	if (!asd_ha->hw_prof.scb_ext) {
+		asd_printk("couldn't allocate memory for %d commands\n",
+			   max_cmnds);
+		max_cmnds = asd_ha->hw_prof.max_scbs;
+		return -ENOMEM;
+	}
+	dma_handle = asd_ha->hw_prof.scb_ext->dma_handle;
+	dma_addr = ALIGN((unsigned long) dma_handle, ASD_SCB_SIZE);
+	dma_addr -= asd_ha->hw_prof.max_scbs * ASD_SCB_SIZE;
+	dma_handle = (dma_addr_t) dma_addr;
+	asd_write_reg_addr(asd_ha, CMDCTXBASE, dma_handle);
+	d = asd_read_reg_dword(asd_ha, CTXDOMAIN);
+	d &= ~1;
+	asd_write_reg_dword(asd_ha, CTXDOMAIN, d);
+
+	asd_ha->hw_prof.max_scbs = max_cmnds;
+
+	return 0;
+}
+
+/**
+ * asd_init_ctxmem -- initialize context memory
+ * asd_ha: pointer to host adapter structure
+ *
+ * This function sets the maximum number of SCBs and
+ * DDBs which can be used by the sequencer.  This is normally
+ * 512 and 128 respectively.  If support for more SCBs or more DDBs
+ * is required then CMDCTXBASE, DEVCTXBASE and CTXDOMAIN are
+ * initialized here to extend context memory to point to host memory,
+ * thus allowing unlimited support for SCBs and DDBs -- only limited
+ * by host memory.
+ */
+static int asd_init_ctxmem(struct asd_ha_struct *asd_ha)
+{
+	int bitmap_bytes;
+
+	asd_get_max_scb_ddb(asd_ha);
+	asd_extend_devctx(asd_ha);
+	asd_extend_cmdctx(asd_ha);
+
+	/* The kernel wants bitmaps to be unsigned long sized. */
+	bitmap_bytes = (asd_ha->hw_prof.max_ddbs+7)/8;
+	bitmap_bytes = BITS_TO_LONGS(bitmap_bytes*8)*sizeof(unsigned long);
+	asd_ha->hw_prof.ddb_bitmap = kzalloc(bitmap_bytes, GFP_KERNEL);
+	if (!asd_ha->hw_prof.ddb_bitmap)
+		return -ENOMEM;
+	spin_lock_init(&asd_ha->hw_prof.ddb_lock);
+
+	return 0;
+}
+
+int asd_init_hw(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	u32 v;
+
+	err = asd_init_sw(asd_ha);
+	if (err)
+		return err;
+
+	err = pci_read_config_dword(asd_ha->pcidev, PCIC_HSTPCIX_CNTRL, &v);
+	if (err) {
+		asd_printk("couldn't read PCIC_HSTPCIX_CNTRL of %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+	pci_write_config_dword(asd_ha->pcidev, PCIC_HSTPCIX_CNTRL,
+					v | SC_TMR_DIS);
+	if (err) {
+		asd_printk("couldn't disable split completion timer of %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+
+	err = asd_read_ocm(asd_ha);
+	if (err) {
+		asd_printk("couldn't read ocm(%d)\n", err);
+		/* While suspicios, it is not an error that we
+		 * couldn't read the OCM. */
+	}
+
+	err = asd_read_flash(asd_ha);
+	if (err) {
+		asd_printk("couldn't read flash(%d)\n", err);
+		/* While suspicios, it is not an error that we
+		 * couldn't read FLASH memory.
+		 */
+	}
+
+	asd_init_ctxmem(asd_ha);
+
+	asd_get_user_sas_addr(asd_ha);
+	if (!asd_ha->hw_prof.sas_addr[0]) {
+		asd_printk("No SAS Address provided for %s\n",
+			   pci_name(asd_ha->pcidev));
+		err = -ENODEV;
+		goto Out;
+	}
+
+	asd_propagate_sas_addr(asd_ha);
+
+	err = asd_init_phys(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize phys for %s\n",
+			    pci_name(asd_ha->pcidev));
+		goto Out;
+	}
+
+	err = asd_init_scbs(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize scbs for %s\n",
+			    pci_name(asd_ha->pcidev));
+		goto Out;
+	}
+
+	err = asd_init_dl(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize the done list:%d\n",
+			    err);
+		goto Out;
+	}
+
+	err = asd_init_escbs(asd_ha);
+	if (err) {
+		asd_printk("couldn't initialize escbs\n");
+		goto Out;
+	}
+
+	err = asd_init_chip(asd_ha);
+	if (err) {
+		asd_printk("couldn't init the chip\n");
+		goto Out;
+	}
+Out:
+	return err;
+}
+
+/* ---------- Chip reset ---------- */
+
+/**
+ * asd_chip_reset -- reset the host adapter, etc
+ * @asd_ha: pointer to host adapter structure of interest
+ *
+ * Called from the ISR.  Hard reset the chip.  Let everything
+ * timeout.  This should be no different than hot-unplugging the
+ * host adapter.  Once everything times out we'll init the chip with
+ * a call to asd_init_chip() and enable interrupts with asd_enable_ints().
+ * XXX finish.
+ */
+static void asd_chip_reset(struct asd_ha_struct *asd_ha)
+{
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+
+	ASD_DPRINTK("chip reset for %s\n", pci_name(asd_ha->pcidev));
+	asd_chip_hardrst(asd_ha);
+	sas_ha->notify_ha_event(sas_ha, HAE_RESET);
+}
+
+/* ---------- Done List Routines ---------- */
+
+static void asd_dl_tasklet_handler(unsigned long data)
+{
+	struct asd_ha_struct *asd_ha = (struct asd_ha_struct *) data;
+	struct asd_seq_data *seq = &asd_ha->seq;
+	unsigned long flags;
+
+	while (1) {
+		struct done_list_struct *dl = &seq->dl[seq->dl_next];
+		struct asd_ascb *ascb;
+
+		if ((dl->toggle & DL_TOGGLE_MASK) != seq->dl_toggle)
+			break;
+
+		/* find the aSCB */
+		spin_lock_irqsave(&seq->tc_index_lock, flags);
+		ascb = asd_tc_index_find(seq, (int)le16_to_cpu(dl->index));
+		spin_unlock_irqrestore(&seq->tc_index_lock, flags);
+		if (unlikely(!ascb)) {
+			ASD_DPRINTK("BUG:sequencer:dl:no ascb?!\n");
+			goto next_1;
+		} else if (ascb->scb->header.opcode == EMPTY_SCB) {
+			goto out;
+		} else if (!ascb->uldd_timer && !del_timer(&ascb->timer)) {
+			goto next_1;
+		}
+		spin_lock_irqsave(&seq->pend_q_lock, flags);
+		list_del_init(&ascb->list);
+		seq->pending--;
+		spin_unlock_irqrestore(&seq->pend_q_lock, flags);
+	out:
+		ascb->tasklet_complete(ascb, dl);
+
+	next_1:
+		seq->dl_next = (seq->dl_next + 1) & (ASD_DL_SIZE-1);
+		if (!seq->dl_next)
+			seq->dl_toggle ^= DL_TOGGLE_MASK;
+	}
+}
+
+/* ---------- Interrupt Service Routines ---------- */
+
+/**
+ * asd_process_donelist_isr -- schedule processing of done list entries
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_process_donelist_isr(struct asd_ha_struct *asd_ha)
+{
+	tasklet_schedule(&asd_ha->seq.dl_tasklet);
+}
+
+/**
+ * asd_com_sas_isr -- process device communication interrupt (COMINT)
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_com_sas_isr(struct asd_ha_struct *asd_ha)
+{
+	u32 comstat = asd_read_reg_dword(asd_ha, COMSTAT);
+
+	/* clear COMSTAT int */
+	asd_write_reg_dword(asd_ha, COMSTAT, 0xFFFFFFFF);
+
+	if (comstat & CSBUFPERR) {
+		asd_printk("%s: command/status buffer dma parity error\n",
+			   pci_name(asd_ha->pcidev));
+	} else if (comstat & CSERR) {
+		int i;
+		u32 dmaerr = asd_read_reg_dword(asd_ha, DMAERR);
+		dmaerr &= 0xFF;
+		asd_printk("%s: command/status dma error, DMAERR: 0x%02x, "
+			   "CSDMAADR: 0x%04x, CSDMAADR+4: 0x%04x\n",
+			   pci_name(asd_ha->pcidev),
+			   dmaerr,
+			   asd_read_reg_dword(asd_ha, CSDMAADR),
+			   asd_read_reg_dword(asd_ha, CSDMAADR+4));
+		asd_printk("CSBUFFER:\n");
+		for (i = 0; i < 8; i++) {
+			asd_printk("%08x %08x %08x %08x\n",
+				   asd_read_reg_dword(asd_ha, CSBUFFER),
+				   asd_read_reg_dword(asd_ha, CSBUFFER+4),
+				   asd_read_reg_dword(asd_ha, CSBUFFER+8),
+				   asd_read_reg_dword(asd_ha, CSBUFFER+12));
+		}
+		asd_dump_seq_state(asd_ha, 0);
+	} else if (comstat & OVLYERR) {
+		u32 dmaerr = asd_read_reg_dword(asd_ha, DMAERR);
+		dmaerr = (dmaerr >> 8) & 0xFF;
+		asd_printk("%s: overlay dma error:0x%x\n",
+			   pci_name(asd_ha->pcidev),
+			   dmaerr);
+	}
+	asd_chip_reset(asd_ha);
+}
+
+static inline void asd_arp2_err(struct asd_ha_struct *asd_ha, u32 dchstatus)
+{
+	static const char *halt_code[256] = {
+		"UNEXPECTED_INTERRUPT0",
+		"UNEXPECTED_INTERRUPT1",
+		"UNEXPECTED_INTERRUPT2",
+		"UNEXPECTED_INTERRUPT3",
+		"UNEXPECTED_INTERRUPT4",
+		"UNEXPECTED_INTERRUPT5",
+		"UNEXPECTED_INTERRUPT6",
+		"UNEXPECTED_INTERRUPT7",
+		"UNEXPECTED_INTERRUPT8",
+		"UNEXPECTED_INTERRUPT9",
+		"UNEXPECTED_INTERRUPT10",
+		[11 ... 19] = "unknown[11,19]",
+		"NO_FREE_SCB_AVAILABLE",
+		"INVALID_SCB_OPCODE",
+		"INVALID_MBX_OPCODE",
+		"INVALID_ATA_STATE",
+		"ATA_QUEUE_FULL",
+		"ATA_TAG_TABLE_FAULT",
+		"ATA_TAG_MASK_FAULT",
+		"BAD_LINK_QUEUE_STATE",
+		"DMA2CHIM_QUEUE_ERROR",
+		"EMPTY_SCB_LIST_FULL",
+		"unknown[30]",
+		"IN_USE_SCB_ON_FREE_LIST",
+		"BAD_OPEN_WAIT_STATE",
+		"INVALID_STP_AFFILIATION",
+		"unknown[34]",
+		"EXEC_QUEUE_ERROR",
+		"TOO_MANY_EMPTIES_NEEDED",
+		"EMPTY_REQ_QUEUE_ERROR",
+		"Q_MONIRTT_MGMT_ERROR",
+		"TARGET_MODE_FLOW_ERROR",
+		"DEVICE_QUEUE_NOT_FOUND",
+		"START_IRTT_TIMER_ERROR",
+		"ABORT_TASK_ILLEGAL_REQ",
+		[43 ... 255] = "unknown[43,255]"
+	};
+
+	if (dchstatus & CSEQINT) {
+		u32 arp2int = asd_read_reg_dword(asd_ha, CARP2INT);
+
+		if (arp2int & (ARP2WAITTO|ARP2ILLOPC|ARP2PERR|ARP2CIOPERR)) {
+			asd_printk("%s: CSEQ arp2int:0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   arp2int);
+		} else if (arp2int & ARP2HALTC)
+			asd_printk("%s: CSEQ halted: %s\n",
+				   pci_name(asd_ha->pcidev),
+				   halt_code[(arp2int>>16)&0xFF]);
+		else
+			asd_printk("%s: CARP2INT:0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   arp2int);
+	}
+	if (dchstatus & LSEQINT_MASK) {
+		int lseq;
+		u8  lseq_mask = dchstatus & LSEQINT_MASK;
+
+		for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+			u32 arp2int = asd_read_reg_dword(asd_ha,
+							 LmARP2INT(lseq));
+			if (arp2int & (ARP2WAITTO | ARP2ILLOPC | ARP2PERR
+				       | ARP2CIOPERR)) {
+				asd_printk("%s: LSEQ%d arp2int:0x%x\n",
+					   pci_name(asd_ha->pcidev),
+					   lseq, arp2int);
+				/* XXX we should only do lseq reset */
+			} else if (arp2int & ARP2HALTC)
+				asd_printk("%s: LSEQ%d halted: %s\n",
+					   pci_name(asd_ha->pcidev),
+					   lseq,halt_code[(arp2int>>16)&0xFF]);
+			else
+				asd_printk("%s: LSEQ%d ARP2INT:0x%x\n",
+					   pci_name(asd_ha->pcidev), lseq,
+					   arp2int);
+		}
+	}
+	asd_chip_reset(asd_ha);
+}
+
+/**
+ * asd_dch_sas_isr -- process device channel interrupt (DEVINT)
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_dch_sas_isr(struct asd_ha_struct *asd_ha)
+{
+	u32 dchstatus = asd_read_reg_dword(asd_ha, DCHSTATUS);
+
+	if (dchstatus & CFIFTOERR) {
+		asd_printk("%s: CFIFTOERR\n", pci_name(asd_ha->pcidev));
+		asd_chip_reset(asd_ha);
+	} else
+		asd_arp2_err(asd_ha, dchstatus);
+}
+
+/**
+ * ads_rbi_exsi_isr -- process external system interface interrupt (INITERR)
+ * @asd_ha: pointer to host adapter structure
+ */
+static inline void asd_rbi_exsi_isr(struct asd_ha_struct *asd_ha)
+{
+	u32 stat0r = asd_read_reg_dword(asd_ha, ASISTAT0R);
+
+	if (!(stat0r & ASIERR)) {
+		asd_printk("hmm, EXSI interrupted but no error?\n");
+		return;
+	}
+
+	if (stat0r & ASIFMTERR) {
+		asd_printk("ASI SEEPROM format error for %s\n",
+			   pci_name(asd_ha->pcidev));
+	} else if (stat0r & ASISEECHKERR) {
+		u32 stat1r = asd_read_reg_dword(asd_ha, ASISTAT1R);
+		asd_printk("ASI SEEPROM checksum 0x%x error for %s\n",
+			   stat1r & CHECKSUM_MASK,
+			   pci_name(asd_ha->pcidev));
+	} else {
+		u32 statr = asd_read_reg_dword(asd_ha, ASIERRSTATR);
+
+		if (!(statr & CPI2ASIMSTERR_MASK)) {
+			ASD_DPRINTK("hmm, ASIERR?\n");
+			return;
+		} else {
+			u32 addr = asd_read_reg_dword(asd_ha, ASIERRADDR);
+			u32 data = asd_read_reg_dword(asd_ha, ASIERRDATAR);
+
+			asd_printk("%s: CPI2 xfer err: addr: 0x%x, wdata: 0x%x, "
+				   "count: 0x%x, byteen: 0x%x, targerr: 0x%x "
+				   "master id: 0x%x, master err: 0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   addr, data,
+				   (statr & CPI2ASIBYTECNT_MASK) >> 16,
+				   (statr & CPI2ASIBYTEEN_MASK) >> 12,
+				   (statr & CPI2ASITARGERR_MASK) >> 8,
+				   (statr & CPI2ASITARGMID_MASK) >> 4,
+				   (statr & CPI2ASIMSTERR_MASK));
+		}
+	}
+	asd_chip_reset(asd_ha);
+}
+
+/**
+ * asd_hst_pcix_isr -- process host interface interrupts
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Asserted on PCIX errors: target abort, etc.
+ */
+static inline void asd_hst_pcix_isr(struct asd_ha_struct *asd_ha)
+{
+	u16 status;
+	u32 pcix_status;
+	u32 ecc_status;
+
+	pci_read_config_word(asd_ha->pcidev, PCI_STATUS, &status);
+	pci_read_config_dword(asd_ha->pcidev, PCIX_STATUS, &pcix_status);
+	pci_read_config_dword(asd_ha->pcidev, ECC_CTRL_STAT, &ecc_status);
+
+	if (status & PCI_STATUS_DETECTED_PARITY)
+		asd_printk("parity error for %s\n", pci_name(asd_ha->pcidev));
+	else if (status & PCI_STATUS_REC_MASTER_ABORT)
+		asd_printk("master abort for %s\n", pci_name(asd_ha->pcidev));
+	else if (status & PCI_STATUS_REC_TARGET_ABORT)
+		asd_printk("target abort for %s\n", pci_name(asd_ha->pcidev));
+	else if (status & PCI_STATUS_PARITY)
+		asd_printk("data parity for %s\n", pci_name(asd_ha->pcidev));
+	else if (pcix_status & RCV_SCE) {
+		asd_printk("received split completion error for %s\n",
+			   pci_name(asd_ha->pcidev));
+		pci_write_config_dword(asd_ha->pcidev,PCIX_STATUS,pcix_status);
+		/* XXX: Abort task? */
+		return;
+	} else if (pcix_status & UNEXP_SC) {
+		asd_printk("unexpected split completion for %s\n",
+			   pci_name(asd_ha->pcidev));
+		pci_write_config_dword(asd_ha->pcidev,PCIX_STATUS,pcix_status);
+		/* ignore */
+		return;
+	} else if (pcix_status & SC_DISCARD)
+		asd_printk("split completion discarded for %s\n",
+			   pci_name(asd_ha->pcidev));
+	else if (ecc_status & UNCOR_ECCERR)
+		asd_printk("uncorrectable ECC error for %s\n",
+			   pci_name(asd_ha->pcidev));
+	asd_chip_reset(asd_ha);
+}
+
+/**
+ * asd_hw_isr -- host adapter interrupt service routine
+ * @irq: ignored
+ * @dev_id: pointer to host adapter structure
+ * @regs: ignored
+ *
+ * The ISR processes done list entries and level 3 error handling.
+ */
+irqreturn_t asd_hw_isr(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct asd_ha_struct *asd_ha = dev_id;
+	u32 chimint = asd_read_reg_dword(asd_ha, CHIMINT);
+
+	if (!chimint)
+		return IRQ_NONE;
+
+	asd_write_reg_dword(asd_ha, CHIMINT, chimint);
+	(void) asd_read_reg_dword(asd_ha, CHIMINT);
+
+	if (chimint & DLAVAIL)
+		asd_process_donelist_isr(asd_ha);
+	if (chimint & COMINT)
+		asd_com_sas_isr(asd_ha);
+	if (chimint & DEVINT)
+		asd_dch_sas_isr(asd_ha);
+	if (chimint & INITERR)
+		asd_rbi_exsi_isr(asd_ha);
+	if (chimint & HOSTERR)
+		asd_hst_pcix_isr(asd_ha);
+
+	return IRQ_HANDLED;
+}
+
+/* ---------- SCB handling ---------- */
+
+static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha,
+					      unsigned int gfp_flags)
+{
+	extern kmem_cache_t *asd_ascb_cache;
+	struct asd_seq_data *seq = &asd_ha->seq;
+	struct asd_ascb *ascb;
+	unsigned long flags;
+
+	ascb = kmem_cache_alloc(asd_ascb_cache, gfp_flags);
+
+	if (ascb) {
+		memset(ascb, 0, sizeof(*ascb));
+		ascb->dma_scb.size = sizeof(struct scb);
+		ascb->dma_scb.vaddr = dma_pool_alloc(asd_ha->scb_pool,
+						     gfp_flags,
+						    &ascb->dma_scb.dma_handle);
+		if (!ascb->dma_scb.vaddr) {
+			kmem_cache_free(asd_ascb_cache, ascb);
+			return NULL;
+		}
+		memset(ascb->dma_scb.vaddr, 0, sizeof(struct scb));
+		asd_init_ascb(asd_ha, ascb);
+
+		spin_lock_irqsave(&seq->tc_index_lock, flags);
+		ascb->tc_index = asd_tc_index_get(seq, ascb);
+		spin_unlock_irqrestore(&seq->tc_index_lock, flags);
+		if (ascb->tc_index == -1)
+			goto undo;
+
+		ascb->scb->header.index = cpu_to_le16((u16)ascb->tc_index);
+	}
+
+	return ascb;
+undo:
+	dma_pool_free(asd_ha->scb_pool, ascb->dma_scb.vaddr,
+		      ascb->dma_scb.dma_handle);
+	kmem_cache_free(asd_ascb_cache, ascb);
+	ASD_DPRINTK("no index for ascb\n");
+	return NULL;
+}
+
+/**
+ * asd_ascb_alloc_list -- allocate a list of aSCBs
+ * @asd_ha: pointer to host adapter structure
+ * @num: pointer to integer number of aSCBs
+ * @gfp_flags: GFP_ flags.
+ *
+ * This is the only function which is used to allocate aSCBs.
+ * It can allocate one or many. If more than one, then they form
+ * a linked list in two ways: by their list field of the ascb struct
+ * and by the next_scb field of the scb_header.
+ *
+ * Returns NULL if no memory was available, else pointer to a list
+ * of ascbs.  When this function returns, @num would be the number
+ * of SCBs which were not able to be allocated, 0 if all requested
+ * were able to be allocated.
+ */
+struct asd_ascb *asd_ascb_alloc_list(struct asd_ha_struct
+				     *asd_ha, int *num,
+				     unsigned int gfp_flags)
+{
+	struct asd_ascb *first = NULL;
+
+	for ( ; *num > 0; --*num) {
+		struct asd_ascb *ascb = asd_ascb_alloc(asd_ha, gfp_flags);
+
+		if (!ascb)
+			break;
+		else if (!first)
+			first = ascb;
+		else {
+			struct asd_ascb *last = list_entry(first->list.prev,
+							   struct asd_ascb,
+							   list);
+			list_add_tail(&ascb->list, &first->list);
+			last->scb->header.next_scb =
+				cpu_to_le64(((u64)ascb->dma_scb.dma_handle));
+		}
+	}
+
+	return first;
+}
+
+/**
+ * asd_swap_head_scb -- swap the head scb
+ * @asd_ha: pointer to host adapter structure
+ * @ascb: pointer to the head of an ascb list
+ *
+ * The sequencer knows the DMA address of the next SCB to be DMAed to
+ * the host adapter, from initialization or from the last list DMAed.
+ * seq->next_scb keeps the address of this SCB.  The sequencer will
+ * DMA to the host adapter this list of SCBs.  But the head (first
+ * element) of this list is not known to the sequencer.  Here we swap
+ * the head of the list with the known SCB (memcpy()).
+ * Only one memcpy() is required per list so it is in our interest
+ * to keep the list of SCB as long as possible so that the ratio
+ * of number of memcpy calls to the number of SCB DMA-ed is as small
+ * as possible.
+ *
+ * LOCKING: called with the pending list lock held.
+ */
+static inline void asd_swap_head_scb(struct asd_ha_struct *asd_ha,
+				     struct asd_ascb *ascb)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	struct asd_ascb *last = list_entry(ascb->list.prev,
+					   struct asd_ascb,
+					   list);
+	struct asd_dma_tok t = ascb->dma_scb;
+
+	memcpy(seq->next_scb.vaddr, ascb->scb, sizeof(*ascb->scb));
+	ascb->dma_scb = seq->next_scb;
+	ascb->scb = ascb->dma_scb.vaddr;
+	seq->next_scb = t;
+	last->scb->header.next_scb =
+		cpu_to_le64(((u64)seq->next_scb.dma_handle));
+}
+
+/**
+ * asd_start_timers -- (add and) start timers of SCBs
+ * @list: pointer to struct list_head of the scbs
+ * @to: timeout in jiffies
+ *
+ * If an SCB in the @list has no timer function, assign the default
+ * one,  then start the timer of the SCB.  This function is
+ * intended to be called from asd_post_ascb_list(), just prior to
+ * posting the SCBs to the sequencer.
+ */
+static inline void asd_start_scb_timers(struct list_head *list)
+{
+	struct asd_ascb *ascb;
+	list_for_each_entry(ascb, list, list) {
+		if (!ascb->uldd_timer) {
+			ascb->timer.data = (unsigned long) ascb;
+			ascb->timer.function = asd_ascb_timedout;
+			ascb->timer.expires = jiffies + AIC94XX_SCB_TIMEOUT;
+			add_timer(&ascb->timer);
+		}
+	}
+}
+
+/**
+ * asd_post_ascb_list -- post a list of 1 or more aSCBs to the host adapter
+ * @asd_ha: pointer to a host adapter structure
+ * @ascb: pointer to the first aSCB in the list
+ * @num: number of aSCBs in the list (to be posted)
+ *
+ * See queueing comment in asd_post_escb_list().
+ *
+ * Additional note on queuing: In order to minimize the ratio of memcpy()
+ * to the number of ascbs sent, we try to batch-send as many ascbs as possible
+ * in one go.
+ * Two cases are possible:
+ *    A) can_queue >= num,
+ *    B) can_queue < num.
+ * Case A: we can send the whole batch at once.  Increment "pending"
+ * in the beginning of this function, when it is checked, in order to
+ * eliminate races when this function is called by multiple processes.
+ * Case B: should never happen if the managing layer considers
+ * lldd_queue_size.
+ */
+int asd_post_ascb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+		       int num)
+{
+	unsigned long flags;
+	LIST_HEAD(list);
+	int can_queue;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	can_queue = asd_ha->hw_prof.max_scbs - asd_ha->seq.pending;
+	if (can_queue >= num)
+		asd_ha->seq.pending += num;
+	else
+		can_queue = 0;
+
+	if (!can_queue) {
+		spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+		asd_printk("%s: scb queue full\n", pci_name(asd_ha->pcidev));
+		return -SAS_QUEUE_FULL;
+	}
+
+	asd_swap_head_scb(asd_ha, ascb);
+
+	__list_add(&list, ascb->list.prev, &ascb->list);
+
+	asd_start_scb_timers(&list);
+
+	asd_ha->seq.scbpro += num;
+	list_splice_init(&list, asd_ha->seq.pend_q.prev);
+	asd_write_reg_dword(asd_ha, SCBPRO, (u32)asd_ha->seq.scbpro);
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	return 0;
+}
+
+/**
+ * asd_post_escb_list -- post a list of 1 or more empty scb
+ * @asd_ha: pointer to a host adapter structure
+ * @ascb: pointer to the first empty SCB in the list
+ * @num: number of aSCBs in the list (to be posted)
+ *
+ * This is essentially the same as asd_post_ascb_list, but we do not
+ * increment pending, add those to the pending list or get indexes.
+ * See asd_init_escbs() and asd_init_post_escbs().
+ *
+ * Since sending a list of ascbs is a superset of sending a single
+ * ascb, this function exists to generalize this.  More specifically,
+ * when sending a list of those, we want to do only a _single_
+ * memcpy() at swap head, as opposed to for each ascb sent (in the
+ * case of sending them one by one).  That is, we want to minimize the
+ * ratio of memcpy() operations to the number of ascbs sent.  The same
+ * logic applies to asd_post_ascb_list().
+ */
+int asd_post_escb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+		       int num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	asd_swap_head_scb(asd_ha, ascb);
+	asd_ha->seq.scbpro += num;
+	asd_write_reg_dword(asd_ha, SCBPRO, (u32)asd_ha->seq.scbpro);
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	return 0;
+}
+
+/* ---------- LED ---------- */
+
+/**
+ * asd_turn_led -- turn on/off an LED
+ * @asd_ha: pointer to host adapter structure
+ * @phy_id: the PHY id whose LED we want to manupulate
+ * @op: 1 to turn on, 0 to turn off
+ */
+void asd_turn_led(struct asd_ha_struct *asd_ha, int phy_id, int op)
+{
+	if (phy_id < ASD_MAX_PHYS) {
+		u32 v = asd_read_reg_dword(asd_ha, LmCONTROL(phy_id));
+		if (op)
+			v |= LEDPOL;
+		else
+			v &= ~LEDPOL;
+		asd_write_reg_dword(asd_ha, LmCONTROL(phy_id), v);
+	}
+}
+
+/**
+ * asd_control_led -- enable/disable an LED on the board
+ * @asd_ha: pointer to host adapter structure
+ * @phy_id: integer, the phy id
+ * @op: integer, 1 to enable, 0 to disable the LED
+ *
+ * First we output enable the LED, then we set the source
+ * to be an external module.
+ */
+void asd_control_led(struct asd_ha_struct *asd_ha, int phy_id, int op)
+{
+	if (phy_id < ASD_MAX_PHYS) {
+		u32 v;
+
+		v = asd_read_reg_dword(asd_ha, GPIOOER);
+		if (op)
+			v |= (1 << phy_id);
+		else
+			v &= ~(1 << phy_id);
+		asd_write_reg_dword(asd_ha, GPIOOER, v);
+
+		v = asd_read_reg_dword(asd_ha, GPIOCNFGR);
+		if (op)
+			v |= (1 << phy_id);
+		else
+			v &= ~(1 << phy_id);
+		asd_write_reg_dword(asd_ha, GPIOCNFGR, v);
+	}
+}
+
+/* ---------- PHY enable ---------- */
+
+static int asd_enable_phy(struct asd_ha_struct *asd_ha, int phy_id)
+{
+	struct asd_phy *phy = &asd_ha->phys[phy_id];
+
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, INT_ENABLE_2), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, HOT_PLUG_DELAY),
+			   HOTPLUG_DELAY_TIMEOUT);
+
+	/* Get defaults from manuf. sector */
+	/* XXX we need defaults for those in case MS is broken. */
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_0),
+			   phy->phy_desc->phy_control_0);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_1),
+			   phy->phy_desc->phy_control_1);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_2),
+			   phy->phy_desc->phy_control_2);
+	asd_write_reg_byte(asd_ha, LmSEQ_OOB_REG(phy_id, PHY_CONTROL_3),
+			   phy->phy_desc->phy_control_3);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_TEN_MS_COMINIT_TIMEOUT(phy_id),
+			    ASD_COMINIT_TIMEOUT);
+
+	asd_write_reg_addr(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(phy_id),
+			   phy->id_frm_tok->dma_handle);
+
+	asd_control_led(asd_ha, phy_id, 1);
+
+	return 0;
+}
+
+int asd_enable_phys(struct asd_ha_struct *asd_ha, const u8 phy_mask)
+{
+	u8  phy_m;
+	u8  i;
+	int num = 0, k;
+	struct asd_ascb *ascb;
+	struct asd_ascb *ascb_list;
+
+	if (!phy_mask) {
+		asd_printk("%s called with phy_mask of 0!?\n", __FUNCTION__);
+		return 0;
+	}
+
+	for_each_phy(phy_mask, phy_m, i) {
+		num++;
+		asd_enable_phy(asd_ha, i);
+	}
+
+	k = num;
+	ascb_list = asd_ascb_alloc_list(asd_ha, &k, GFP_KERNEL);
+	if (!ascb_list) {
+		asd_printk("no memory for control phy ascb list\n");
+		return -ENOMEM;
+	}
+	num -= k;
+
+	ascb = ascb_list;
+	for_each_phy(phy_mask, phy_m, i) {
+		asd_build_control_phy(ascb, i, ENABLE_PHY);
+		ascb = list_entry(ascb->list.next, struct asd_ascb, list);
+	}
+	ASD_DPRINTK("posting %d control phy scbs\n", num);
+	k = asd_post_ascb_list(asd_ha, ascb_list, num);
+	if (k)
+		asd_ascb_free_list(ascb_list);
+
+	return k;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.h b/drivers/scsi/aic94xx/aic94xx_hwi.h
new file mode 100644
index 0000000..c7d5053
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_hwi.h
@@ -0,0 +1,397 @@
+/*
+ * Aic94xx SAS/SATA driver hardware interface header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_HWI_H_
+#define _AIC94XX_HWI_H_
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+
+#include <scsi/libsas.h>
+
+#include "aic94xx.h"
+#include "aic94xx_sas.h"
+
+/* Define ASD_MAX_PHYS to the maximum phys ever. Currently 8. */
+#define ASD_MAX_PHYS       8
+#define ASD_PCBA_SN_SIZE   12
+
+/* Those are to be further named properly, the "RAZORx" part, and
+ * subsequently included in include/linux/pci_ids.h.
+ */
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR10 0x410
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR12 0x412
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR1E 0x41E
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR30 0x430
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR32 0x432
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR3E 0x43E
+#define PCI_DEVICE_ID_ADAPTEC2_RAZOR3F 0x43F
+
+struct asd_ha_addrspace {
+	void __iomem  *addr;
+	unsigned long  start;       /* pci resource start */
+	unsigned long  len;         /* pci resource len */
+	unsigned long  flags;       /* pci resource flags */
+
+	/* addresses internal to the host adapter */
+	u32 swa_base; /* mmspace 1 (MBAR1) uses this only */
+	u32 swb_base;
+	u32 swc_base;
+};
+
+struct bios_struct {
+	int    present;
+	u8     maj;
+	u8     min;
+	u32    bld;
+};
+
+struct unit_element_struct {
+	u16    num;
+	u16    size;
+	void   *area;
+};
+
+struct flash_struct {
+	u32    bar;
+	int    present;
+	int    wide;
+	u8     manuf;
+	u8     dev_id;
+	u8     sec_prot;
+
+	u32    dir_offs;
+};
+
+struct asd_phy_desc {
+	/* From CTRL-A settings, then set to what is appropriate */
+	u8     sas_addr[SAS_ADDR_SIZE];
+	u8     max_sas_lrate;
+	u8     min_sas_lrate;
+	u8     max_sata_lrate;
+	u8     min_sata_lrate;
+	u8     flags;
+#define ASD_CRC_DIS  1
+#define ASD_SATA_SPINUP_HOLD 2
+
+	u8     phy_control_0; /* mode 5 reg 0x160 */
+	u8     phy_control_1; /* mode 5 reg 0x161 */
+	u8     phy_control_2; /* mode 5 reg 0x162 */
+	u8     phy_control_3; /* mode 5 reg 0x163 */
+};
+
+struct asd_dma_tok {
+	void *vaddr;
+	dma_addr_t dma_handle;
+	size_t size;
+};
+
+struct hw_profile {
+	struct bios_struct bios;
+	struct unit_element_struct ue;
+	struct flash_struct flash;
+
+	u8     sas_addr[SAS_ADDR_SIZE];
+	char   pcba_sn[ASD_PCBA_SN_SIZE+1];
+
+	u8     enabled_phys;	  /* mask of enabled phys */
+	struct asd_phy_desc phy_desc[ASD_MAX_PHYS];
+	u32    max_scbs;	  /* absolute sequencer scb queue size */
+	struct asd_dma_tok *scb_ext;
+	u32    max_ddbs;
+	struct asd_dma_tok *ddb_ext;
+
+	spinlock_t ddb_lock;
+	void  *ddb_bitmap;
+
+	int    num_phys;	  /* ENABLEABLE */
+	int    max_phys;	  /* REPORTED + ENABLEABLE */
+
+	unsigned addr_range;	  /* max # of addrs; max # of possible ports */
+	unsigned port_name_base;
+	unsigned dev_name_base;
+	unsigned sata_name_base;
+};
+
+struct asd_ascb {
+	struct list_head list;
+	struct asd_ha_struct *ha;
+
+	struct scb *scb;	  /* equals dma_scb->vaddr */
+	struct asd_dma_tok dma_scb;
+	struct asd_dma_tok *sg_arr;
+
+	void (*tasklet_complete)(struct asd_ascb *, struct done_list_struct *);
+	u8     uldd_timer:1;
+
+	/* internally generated command */
+	struct timer_list timer;
+	struct completion completion;
+	u8        tag_valid:1;
+	__be16    tag;		  /* error recovery only */
+
+	/* If this is an Empty SCB, index of first edb in seq->edb_arr. */
+	int    edb_index;
+
+	/* Used by the timer timeout function. */
+	int    tc_index;
+
+	void   *uldd_task;
+};
+
+#define ASD_DL_SIZE_BITS   0x8
+#define ASD_DL_SIZE        (1<<(2+ASD_DL_SIZE_BITS))
+#define ASD_DEF_DL_TOGGLE  0x01
+
+struct asd_seq_data {
+	spinlock_t pend_q_lock;
+	u16    scbpro;
+	int    pending;
+	struct list_head pend_q;
+	int    can_queue;	  /* per adapter */
+	struct asd_dma_tok next_scb; /* next scb to be delivered to CSEQ */
+
+	spinlock_t tc_index_lock;
+	void **tc_index_array;
+	void *tc_index_bitmap;
+	int   tc_index_bitmap_bits;
+
+	struct tasklet_struct dl_tasklet;
+	struct done_list_struct *dl; /* array of done list entries, equals */
+	struct asd_dma_tok *actual_dl; /* actual_dl->vaddr */
+	int    dl_toggle;
+	int    dl_next;
+
+	int    num_edbs;
+	struct asd_dma_tok **edb_arr;
+	int    num_escbs;
+	struct asd_ascb **escb_arr; /* array of pointers to escbs */
+};
+
+/* This is the Host Adapter structure.  It describes the hardware
+ * SAS adapter.
+ */
+struct asd_ha_struct {
+	struct pci_dev   *pcidev;
+	const char       *name;
+
+	struct sas_ha_struct sas_ha;
+
+	u8                revision_id;
+
+	int               iospace;
+	spinlock_t        iolock;
+	struct asd_ha_addrspace io_handle[2];
+
+	struct hw_profile hw_prof;
+
+	struct asd_phy    phys[ASD_MAX_PHYS];
+	struct asd_sas_port   ports[ASD_MAX_PHYS];
+
+	struct dma_pool  *scb_pool;
+
+	struct asd_seq_data  seq; /* sequencer related */
+};
+
+/* ---------- Common macros ---------- */
+
+#define ASD_BUSADDR_LO(__dma_handle) ((u32)(__dma_handle))
+#define ASD_BUSADDR_HI(__dma_handle) (((sizeof(dma_addr_t))==8)     \
+                                    ? ((u32)((__dma_handle) >> 32)) \
+                                    : ((u32)0))
+
+#define dev_to_asd_ha(__dev)  pci_get_drvdata(to_pci_dev(__dev))
+#define SCB_SITE_VALID(__site_no) (((__site_no) & 0xF0FF) != 0x00FF   \
+				 && ((__site_no) & 0xF0FF) > 0x001F)
+/* For each bit set in __lseq_mask, set __lseq to equal the bit
+ * position of the set bit and execute the statement following.
+ * __mc is the temporary mask, used as a mask "counter".
+ */
+#define for_each_sequencer(__lseq_mask, __mc, __lseq)                        \
+	for ((__mc)=(__lseq_mask),(__lseq)=0;(__mc)!=0;(__lseq++),(__mc)>>=1)\
+		if (((__mc) & 1))
+#define for_each_phy(__lseq_mask, __mc, __lseq)                              \
+	for ((__mc)=(__lseq_mask),(__lseq)=0;(__mc)!=0;(__lseq++),(__mc)>>=1)\
+		if (((__mc) & 1))
+
+#define PHY_ENABLED(_HA, _I) ((_HA)->hw_prof.enabled_phys & (1<<(_I)))
+
+/* ---------- DMA allocs ---------- */
+
+static inline struct asd_dma_tok *asd_dmatok_alloc(unsigned int flags)
+{
+	return kmem_cache_alloc(asd_dma_token_cache, flags);
+}
+
+static inline void asd_dmatok_free(struct asd_dma_tok *token)
+{
+	kmem_cache_free(asd_dma_token_cache, token);
+}
+
+static inline struct asd_dma_tok *asd_alloc_coherent(struct asd_ha_struct *
+						     asd_ha, size_t size,
+						     unsigned int flags)
+{
+	struct asd_dma_tok *token = asd_dmatok_alloc(flags);
+	if (token) {
+		token->size = size;
+		token->vaddr = dma_alloc_coherent(&asd_ha->pcidev->dev,
+						  token->size,
+						  &token->dma_handle,
+						  flags);
+		if (!token->vaddr) {
+			asd_dmatok_free(token);
+			token = NULL;
+		}
+	}
+	return token;
+}
+
+static inline void asd_free_coherent(struct asd_ha_struct *asd_ha,
+				     struct asd_dma_tok *token)
+{
+	if (token) {
+		dma_free_coherent(&asd_ha->pcidev->dev, token->size,
+				  token->vaddr, token->dma_handle);
+		asd_dmatok_free(token);
+	}
+}
+
+static inline void asd_init_ascb(struct asd_ha_struct *asd_ha,
+				 struct asd_ascb *ascb)
+{
+	INIT_LIST_HEAD(&ascb->list);
+	ascb->scb = ascb->dma_scb.vaddr;
+	ascb->ha = asd_ha;
+	ascb->timer.function = NULL;
+	init_timer(&ascb->timer);
+	ascb->tc_index = -1;
+	init_completion(&ascb->completion);
+}
+
+/* Must be called with the tc_index_lock held!
+ */
+static inline void asd_tc_index_release(struct asd_seq_data *seq, int index)
+{
+	seq->tc_index_array[index] = NULL;
+	clear_bit(index, seq->tc_index_bitmap);
+}
+
+/* Must be called with the tc_index_lock held!
+ */
+static inline int asd_tc_index_get(struct asd_seq_data *seq, void *ptr)
+{
+	int index;
+
+	index = find_first_zero_bit(seq->tc_index_bitmap,
+				    seq->tc_index_bitmap_bits);
+	if (index == seq->tc_index_bitmap_bits)
+		return -1;
+
+	seq->tc_index_array[index] = ptr;
+	set_bit(index, seq->tc_index_bitmap);
+
+	return index;
+}
+
+/* Must be called with the tc_index_lock held!
+ */
+static inline void *asd_tc_index_find(struct asd_seq_data *seq, int index)
+{
+	return seq->tc_index_array[index];
+}
+
+/**
+ * asd_ascb_free -- free a single aSCB after is has completed
+ * @ascb: pointer to the aSCB of interest
+ *
+ * This frees an aSCB after it has been executed/completed by
+ * the sequencer.
+ */
+static inline void asd_ascb_free(struct asd_ascb *ascb)
+{
+	if (ascb) {
+		struct asd_ha_struct *asd_ha = ascb->ha;
+		unsigned long flags;
+
+		BUG_ON(!list_empty(&ascb->list));
+		spin_lock_irqsave(&ascb->ha->seq.tc_index_lock, flags);
+		asd_tc_index_release(&ascb->ha->seq, ascb->tc_index);
+		spin_unlock_irqrestore(&ascb->ha->seq.tc_index_lock, flags);
+		dma_pool_free(asd_ha->scb_pool, ascb->dma_scb.vaddr,
+			      ascb->dma_scb.dma_handle);
+		kmem_cache_free(asd_ascb_cache, ascb);
+	}
+}
+
+/**
+ * asd_ascb_list_free -- free a list of ascbs
+ * @ascb_list: a list of ascbs
+ *
+ * This function will free a list of ascbs allocated by asd_ascb_alloc_list.
+ * It is used when say the scb queueing function returned QUEUE_FULL,
+ * and we do not need the ascbs any more.
+ */
+static inline void asd_ascb_free_list(struct asd_ascb *ascb_list)
+{
+	LIST_HEAD(list);
+	struct list_head *n, *pos;
+
+	__list_add(&list, ascb_list->list.prev, &ascb_list->list);
+	list_for_each_safe(pos, n, &list) {
+		list_del_init(pos);
+		asd_ascb_free(list_entry(pos, struct asd_ascb, list));
+	}
+}
+
+/* ---------- Function declarations ---------- */
+
+int  asd_init_hw(struct asd_ha_struct *asd_ha);
+irqreturn_t asd_hw_isr(int irq, void *dev_id, struct pt_regs *regs);
+
+
+struct asd_ascb *asd_ascb_alloc_list(struct asd_ha_struct
+				     *asd_ha, int *num,
+				     unsigned int gfp_mask);
+
+int  asd_post_ascb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+			int num);
+int  asd_post_escb_list(struct asd_ha_struct *asd_ha, struct asd_ascb *ascb,
+			int num);
+
+int  asd_init_post_escbs(struct asd_ha_struct *asd_ha);
+void asd_build_control_phy(struct asd_ascb *ascb, int phy_id, u8 subfunc);
+void asd_control_led(struct asd_ha_struct *asd_ha, int phy_id, int op);
+void asd_turn_led(struct asd_ha_struct *asd_ha, int phy_id, int op);
+int  asd_enable_phys(struct asd_ha_struct *asd_ha, const u8 phy_mask);
+void asd_build_initiate_link_adm_task(struct asd_ascb *ascb, int phy_id,
+				      u8 subfunc);
+
+void asd_ascb_timedout(unsigned long data);
+int  asd_chip_hardrst(struct asd_ha_struct *asd_ha);
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
new file mode 100644
index 0000000..ee2ccad
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -0,0 +1,866 @@
+/*
+ * Aic94xx SAS/SATA driver initialization.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include <scsi/scsi_host.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_seq.h"
+
+/* The format is "version.release.patchlevel" */
+#define ASD_DRIVER_VERSION "1.0.2"
+
+static int use_msi = 0;
+module_param_named(use_msi, use_msi, int, S_IRUGO);
+MODULE_PARM_DESC(use_msi, "\n"
+	"\tEnable(1) or disable(0) using PCI MSI.\n"
+	"\tDefault: 0");
+
+static int lldd_max_execute_num = 0;
+module_param_named(collector, lldd_max_execute_num, int, S_IRUGO);
+MODULE_PARM_DESC(collector, "\n"
+	"\tIf greater than one, tells the SAS Layer to run in Task Collector\n"
+	"\tMode.  If 1 or 0, tells the SAS Layer to run in Direct Mode.\n"
+	"\tThe aic94xx SAS LLDD supports both modes.\n"
+	"\tDefault: 0 (Direct Mode).\n");
+
+char sas_addr_str[2*SAS_ADDR_SIZE + 1] = "";
+
+static struct scsi_transport_template *aic94xx_transport_template;
+
+static struct scsi_host_template aic94xx_sht = {
+	.module			= THIS_MODULE,
+	/* .name is initialized */
+	.name			= "aic94xx",
+	.queuecommand		= sas_queuecommand,
+	.target_alloc		= sas_target_alloc,
+	.slave_configure	= sas_slave_configure,
+	.slave_destroy		= sas_slave_destroy,
+	.change_queue_depth	= sas_change_queue_depth,
+	.change_queue_type	= sas_change_queue_type,
+	.bios_param		= sas_bios_param,
+	.can_queue		= 1,
+	.cmd_per_lun		= 1,
+	.this_id		= -1,
+	.sg_tablesize		= SG_ALL,
+	.max_sectors		= SCSI_DEFAULT_MAX_SECTORS,
+	.use_clustering		= ENABLE_CLUSTERING,
+};
+
+static int __devinit asd_map_memio(struct asd_ha_struct *asd_ha)
+{
+	int err, i;
+	struct asd_ha_addrspace *io_handle;
+
+	asd_ha->iospace = 0;
+	for (i = 0; i < 3; i += 2) {
+		io_handle = &asd_ha->io_handle[i==0?0:1];
+		io_handle->start = pci_resource_start(asd_ha->pcidev, i);
+		io_handle->len   = pci_resource_len(asd_ha->pcidev, i);
+		io_handle->flags = pci_resource_flags(asd_ha->pcidev, i);
+		err = -ENODEV;
+		if (!io_handle->start || !io_handle->len) {
+			asd_printk("MBAR%d start or length for %s is 0.\n",
+				   i==0?0:1, pci_name(asd_ha->pcidev));
+			goto Err;
+		}
+		err = pci_request_region(asd_ha->pcidev, i, ASD_DRIVER_NAME);
+		if (err) {
+			asd_printk("couldn't reserve memory region for %s\n",
+				   pci_name(asd_ha->pcidev));
+			goto Err;
+		}
+		if (io_handle->flags & IORESOURCE_CACHEABLE)
+			io_handle->addr = ioremap(io_handle->start,
+						  io_handle->len);
+		else
+			io_handle->addr = ioremap_nocache(io_handle->start,
+							  io_handle->len);
+		if (!io_handle->addr) {
+			asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1,
+				   pci_name(asd_ha->pcidev));
+			goto Err_unreq;
+		}
+	}
+
+	return 0;
+Err_unreq:
+	pci_release_region(asd_ha->pcidev, i);
+Err:
+	if (i > 0) {
+		io_handle = &asd_ha->io_handle[0];
+		iounmap(io_handle->addr);
+		pci_release_region(asd_ha->pcidev, 0);
+	}
+	return err;
+}
+
+static void __devexit asd_unmap_memio(struct asd_ha_struct *asd_ha)
+{
+	struct asd_ha_addrspace *io_handle;
+
+	io_handle = &asd_ha->io_handle[1];
+	iounmap(io_handle->addr);
+	pci_release_region(asd_ha->pcidev, 2);
+
+	io_handle = &asd_ha->io_handle[0];
+	iounmap(io_handle->addr);
+	pci_release_region(asd_ha->pcidev, 0);
+}
+
+static int __devinit asd_map_ioport(struct asd_ha_struct *asd_ha)
+{
+	int i = PCI_IOBAR_OFFSET, err;
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0];
+
+	asd_ha->iospace = 1;
+	io_handle->start = pci_resource_start(asd_ha->pcidev, i);
+	io_handle->len   = pci_resource_len(asd_ha->pcidev, i);
+	io_handle->flags = pci_resource_flags(asd_ha->pcidev, i);
+	io_handle->addr  = (void __iomem *) io_handle->start;
+	if (!io_handle->start || !io_handle->len) {
+		asd_printk("couldn't get IO ports for %s\n",
+			   pci_name(asd_ha->pcidev));
+		return -ENODEV;
+	}
+	err = pci_request_region(asd_ha->pcidev, i, ASD_DRIVER_NAME);
+	if (err) {
+		asd_printk("couldn't reserve io space for %s\n",
+			   pci_name(asd_ha->pcidev));
+	}
+
+	return err;
+}
+
+static void __devexit asd_unmap_ioport(struct asd_ha_struct *asd_ha)
+{
+	pci_release_region(asd_ha->pcidev, PCI_IOBAR_OFFSET);
+}
+
+static int __devinit asd_map_ha(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	u16 cmd_reg;
+
+	err = pci_read_config_word(asd_ha->pcidev, PCI_COMMAND, &cmd_reg);
+	if (err) {
+		asd_printk("couldn't read command register of %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err;
+	}
+
+	err = -ENODEV;
+	if (cmd_reg & PCI_COMMAND_MEMORY) {
+		if ((err = asd_map_memio(asd_ha)))
+			goto Err;
+	} else if (cmd_reg & PCI_COMMAND_IO) {
+		if ((err = asd_map_ioport(asd_ha)))
+			goto Err;
+		asd_printk("%s ioport mapped -- upgrade your hardware\n",
+			   pci_name(asd_ha->pcidev));
+	} else {
+		asd_printk("no proper device access to %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err;
+	}
+
+	return 0;
+Err:
+	return err;
+}
+
+static void __devexit asd_unmap_ha(struct asd_ha_struct *asd_ha)
+{
+	if (asd_ha->iospace)
+		asd_unmap_ioport(asd_ha);
+	else
+		asd_unmap_memio(asd_ha);
+}
+
+static const char *asd_dev_rev[30] = {
+	[0] = "A0",
+	[1] = "A1",
+	[8] = "B0",
+};
+
+static int __devinit asd_common_setup(struct asd_ha_struct *asd_ha)
+{
+	int err, i;
+
+	err = pci_read_config_byte(asd_ha->pcidev, PCI_REVISION_ID,
+				   &asd_ha->revision_id);
+	if (err) {
+		asd_printk("couldn't read REVISION ID register of %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err;
+	}
+	err = -ENODEV;
+	if (asd_ha->revision_id < AIC9410_DEV_REV_B0) {
+		asd_printk("%s is revision %s (%X), which is not supported\n",
+			   pci_name(asd_ha->pcidev),
+			   asd_dev_rev[asd_ha->revision_id],
+			   asd_ha->revision_id);
+		goto Err;
+	}
+	/* Provide some sane default values. */
+	asd_ha->hw_prof.max_scbs = 512;
+	asd_ha->hw_prof.max_ddbs = 128;
+	asd_ha->hw_prof.num_phys = ASD_MAX_PHYS;
+	/* All phys are enabled, by default. */
+	asd_ha->hw_prof.enabled_phys = 0xFF;
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		asd_ha->hw_prof.phy_desc[i].max_sas_lrate =
+			SAS_LINK_RATE_3_0_GBPS;
+		asd_ha->hw_prof.phy_desc[i].min_sas_lrate =
+			SAS_LINK_RATE_1_5_GBPS;
+		asd_ha->hw_prof.phy_desc[i].max_sata_lrate =
+			SAS_LINK_RATE_1_5_GBPS;
+		asd_ha->hw_prof.phy_desc[i].min_sata_lrate =
+			SAS_LINK_RATE_1_5_GBPS;
+	}
+
+	return 0;
+Err:
+	return err;
+}
+
+static int __devinit asd_aic9410_setup(struct asd_ha_struct *asd_ha)
+{
+	int err = asd_common_setup(asd_ha);
+
+	if (err)
+		return err;
+
+	asd_ha->hw_prof.addr_range = 8;
+	asd_ha->hw_prof.port_name_base = 0;
+	asd_ha->hw_prof.dev_name_base = 8;
+	asd_ha->hw_prof.sata_name_base = 16;
+
+	return 0;
+}
+
+static int __devinit asd_aic9405_setup(struct asd_ha_struct *asd_ha)
+{
+	int err = asd_common_setup(asd_ha);
+
+	if (err)
+		return err;
+
+	asd_ha->hw_prof.addr_range = 4;
+	asd_ha->hw_prof.port_name_base = 0;
+	asd_ha->hw_prof.dev_name_base = 4;
+	asd_ha->hw_prof.sata_name_base = 8;
+
+	return 0;
+}
+
+static ssize_t asd_show_dev_rev(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev);
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			asd_dev_rev[asd_ha->revision_id]);
+}
+static DEVICE_ATTR(revision, S_IRUGO, asd_show_dev_rev, NULL);
+
+static ssize_t asd_show_dev_bios_build(struct device *dev,
+				       struct device_attribute *attr,char *buf)
+{
+	struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev);
+	return snprintf(buf, PAGE_SIZE, "%d\n", asd_ha->hw_prof.bios.bld);
+}
+static DEVICE_ATTR(bios_build, S_IRUGO, asd_show_dev_bios_build, NULL);
+
+static ssize_t asd_show_dev_pcba_sn(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct asd_ha_struct *asd_ha = dev_to_asd_ha(dev);
+	return snprintf(buf, PAGE_SIZE, "%s\n", asd_ha->hw_prof.pcba_sn);
+}
+static DEVICE_ATTR(pcba_sn, S_IRUGO, asd_show_dev_pcba_sn, NULL);
+
+static void asd_create_dev_attrs(struct asd_ha_struct *asd_ha)
+{
+	device_create_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+	device_create_file(&asd_ha->pcidev->dev, &dev_attr_bios_build);
+	device_create_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn);
+}
+
+static void asd_remove_dev_attrs(struct asd_ha_struct *asd_ha)
+{
+	device_remove_file(&asd_ha->pcidev->dev, &dev_attr_revision);
+	device_remove_file(&asd_ha->pcidev->dev, &dev_attr_bios_build);
+	device_remove_file(&asd_ha->pcidev->dev, &dev_attr_pcba_sn);
+}
+
+/* The first entry, 0, is used for dynamic ids, the rest for devices
+ * we know about.
+ */
+static struct asd_pcidev_struct {
+	const char * name;
+	int (*setup)(struct asd_ha_struct *asd_ha);
+} asd_pcidev_data[] = {
+	/* Id 0 is used for dynamic ids. */
+	{ .name  = "Adaptec AIC-94xx SAS/SATA Host Adapter",
+	  .setup = asd_aic9410_setup
+	},
+	{ .name  = "Adaptec AIC-9410W SAS/SATA Host Adapter",
+	  .setup = asd_aic9410_setup
+	},
+	{ .name  = "Adaptec AIC-9405W SAS/SATA Host Adapter",
+	  .setup = asd_aic9405_setup
+	},
+};
+
+static inline int asd_create_ha_caches(struct asd_ha_struct *asd_ha)
+{
+	asd_ha->scb_pool = dma_pool_create(ASD_DRIVER_NAME "_scb_pool",
+					   &asd_ha->pcidev->dev,
+					   sizeof(struct scb),
+					   8, 0);
+	if (!asd_ha->scb_pool) {
+		asd_printk("couldn't create scb pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * asd_free_edbs -- free empty data buffers
+ * asd_ha: pointer to host adapter structure
+ */
+static inline void asd_free_edbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	for (i = 0; i < seq->num_edbs; i++)
+		asd_free_coherent(asd_ha, seq->edb_arr[i]);
+	kfree(seq->edb_arr);
+	seq->edb_arr = NULL;
+}
+
+static inline void asd_free_escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	for (i = 0; i < seq->num_escbs; i++) {
+		if (!list_empty(&seq->escb_arr[i]->list))
+			list_del_init(&seq->escb_arr[i]->list);
+
+		asd_ascb_free(seq->escb_arr[i]);
+	}
+	kfree(seq->escb_arr);
+	seq->escb_arr = NULL;
+}
+
+static inline void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	if (asd_ha->hw_prof.ddb_ext)
+		asd_free_coherent(asd_ha, asd_ha->hw_prof.ddb_ext);
+	if (asd_ha->hw_prof.scb_ext)
+		asd_free_coherent(asd_ha, asd_ha->hw_prof.scb_ext);
+
+	if (asd_ha->hw_prof.ddb_bitmap)
+		kfree(asd_ha->hw_prof.ddb_bitmap);
+	asd_ha->hw_prof.ddb_bitmap = NULL;
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		struct asd_phy *phy = &asd_ha->phys[i];
+
+		asd_free_coherent(asd_ha, phy->id_frm_tok);
+	}
+	if (asd_ha->seq.escb_arr)
+		asd_free_escbs(asd_ha);
+	if (asd_ha->seq.edb_arr)
+		asd_free_edbs(asd_ha);
+	if (asd_ha->hw_prof.ue.area) {
+		kfree(asd_ha->hw_prof.ue.area);
+		asd_ha->hw_prof.ue.area = NULL;
+	}
+	if (asd_ha->seq.tc_index_array) {
+		kfree(asd_ha->seq.tc_index_array);
+		kfree(asd_ha->seq.tc_index_bitmap);
+		asd_ha->seq.tc_index_array = NULL;
+		asd_ha->seq.tc_index_bitmap = NULL;
+	}
+	if (asd_ha->seq.actual_dl) {
+			asd_free_coherent(asd_ha, asd_ha->seq.actual_dl);
+			asd_ha->seq.actual_dl = NULL;
+			asd_ha->seq.dl = NULL;
+	}
+	if (asd_ha->seq.next_scb.vaddr) {
+		dma_pool_free(asd_ha->scb_pool, asd_ha->seq.next_scb.vaddr,
+			      asd_ha->seq.next_scb.dma_handle);
+		asd_ha->seq.next_scb.vaddr = NULL;
+	}
+	dma_pool_destroy(asd_ha->scb_pool);
+	asd_ha->scb_pool = NULL;
+}
+
+kmem_cache_t *asd_dma_token_cache;
+kmem_cache_t *asd_ascb_cache;
+
+static int asd_create_global_caches(void)
+{
+	if (!asd_dma_token_cache) {
+		asd_dma_token_cache
+			= kmem_cache_create(ASD_DRIVER_NAME "_dma_token",
+					    sizeof(struct asd_dma_tok),
+					    0,
+					    SLAB_HWCACHE_ALIGN,
+					    NULL, NULL);
+		if (!asd_dma_token_cache) {
+			asd_printk("couldn't create dma token cache\n");
+			return -ENOMEM;
+		}
+	}
+
+	if (!asd_ascb_cache) {
+		asd_ascb_cache = kmem_cache_create(ASD_DRIVER_NAME "_ascb",
+						   sizeof(struct asd_ascb),
+						   0,
+						   SLAB_HWCACHE_ALIGN,
+						   NULL, NULL);
+		if (!asd_ascb_cache) {
+			asd_printk("couldn't create ascb cache\n");
+			goto Err;
+		}
+	}
+
+	return 0;
+Err:
+	kmem_cache_destroy(asd_dma_token_cache);
+	asd_dma_token_cache = NULL;
+	return -ENOMEM;
+}
+
+static void asd_destroy_global_caches(void)
+{
+	if (asd_dma_token_cache)
+		kmem_cache_destroy(asd_dma_token_cache);
+	asd_dma_token_cache = NULL;
+
+	if (asd_ascb_cache)
+		kmem_cache_destroy(asd_ascb_cache);
+	asd_ascb_cache = NULL;
+}
+
+static int asd_register_sas_ha(struct asd_ha_struct *asd_ha)
+{
+	int i;
+	struct asd_sas_phy   **sas_phys =
+		kmalloc(ASD_MAX_PHYS * sizeof(struct asd_sas_phy), GFP_KERNEL);
+	struct asd_sas_port  **sas_ports =
+		kmalloc(ASD_MAX_PHYS * sizeof(struct asd_sas_port), GFP_KERNEL);
+
+	if (!sas_phys || !sas_ports) {
+		kfree(sas_phys);
+		kfree(sas_ports);
+		return -ENOMEM;
+	}
+
+	asd_ha->sas_ha.sas_ha_name = (char *) asd_ha->name;
+	asd_ha->sas_ha.lldd_module = THIS_MODULE;
+	asd_ha->sas_ha.sas_addr = &asd_ha->hw_prof.sas_addr[0];
+
+	for (i = 0; i < ASD_MAX_PHYS; i++) {
+		sas_phys[i] = &asd_ha->phys[i].sas_phy;
+		sas_ports[i] = &asd_ha->ports[i];
+	}
+
+	asd_ha->sas_ha.sas_phy = sas_phys;
+	asd_ha->sas_ha.sas_port= sas_ports;
+	asd_ha->sas_ha.num_phys= ASD_MAX_PHYS;
+
+	asd_ha->sas_ha.lldd_queue_size = asd_ha->seq.can_queue;
+
+	return sas_register_ha(&asd_ha->sas_ha);
+}
+
+static int asd_unregister_sas_ha(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = sas_unregister_ha(&asd_ha->sas_ha);
+
+	sas_remove_host(asd_ha->sas_ha.core.shost);
+	scsi_remove_host(asd_ha->sas_ha.core.shost);
+	scsi_host_put(asd_ha->sas_ha.core.shost);
+
+	kfree(asd_ha->sas_ha.sas_phy);
+	kfree(asd_ha->sas_ha.sas_port);
+
+	return err;
+}
+
+static int __devinit asd_pci_probe(struct pci_dev *dev,
+				   const struct pci_device_id *id)
+{
+	struct asd_pcidev_struct *asd_dev;
+	unsigned asd_id = (unsigned) id->driver_data;
+	struct asd_ha_struct *asd_ha;
+	struct Scsi_Host *shost;
+	int err;
+
+	if (asd_id >= ARRAY_SIZE(asd_pcidev_data)) {
+		asd_printk("wrong driver_data in PCI table\n");
+		return -ENODEV;
+	}
+
+	if ((err = pci_enable_device(dev))) {
+		asd_printk("couldn't enable device %s\n", pci_name(dev));
+		return err;
+	}
+
+	pci_set_master(dev);
+
+	err = -ENOMEM;
+
+	shost = scsi_host_alloc(&aic94xx_sht, sizeof(void *));
+	if (!shost)
+		goto Err;
+
+	asd_dev = &asd_pcidev_data[asd_id];
+
+	asd_ha = kzalloc(sizeof(*asd_ha), GFP_KERNEL);
+	if (!asd_ha) {
+		asd_printk("out of memory\n");
+		goto Err;
+	}
+	asd_ha->pcidev = dev;
+	asd_ha->sas_ha.pcidev = asd_ha->pcidev;
+	asd_ha->sas_ha.lldd_ha = asd_ha;
+
+	asd_ha->name = asd_dev->name;
+	asd_printk("found %s, device %s\n", asd_ha->name, pci_name(dev));
+
+	SHOST_TO_SAS_HA(shost) = &asd_ha->sas_ha;
+	asd_ha->sas_ha.core.shost = shost;
+	shost->transportt = aic94xx_transport_template;
+	shost->max_id = ~0;
+	shost->max_lun = ~0;
+	shost->max_cmd_len = 16;
+
+	err = scsi_add_host(shost, &dev->dev);
+	if (err) {
+		scsi_host_put(shost);
+		goto Err_free;
+	}
+
+
+
+	err = asd_dev->setup(asd_ha);
+	if (err)
+		goto Err_free;
+
+	err = -ENODEV;
+	if (!pci_set_dma_mask(dev, DMA_64BIT_MASK)
+	    && !pci_set_consistent_dma_mask(dev, DMA_64BIT_MASK))
+		;
+	else if (!pci_set_dma_mask(dev, DMA_32BIT_MASK)
+		 && !pci_set_consistent_dma_mask(dev, DMA_32BIT_MASK))
+		;
+	else {
+		asd_printk("no suitable DMA mask for %s\n", pci_name(dev));
+		goto Err_free;
+	}
+
+	pci_set_drvdata(dev, asd_ha);
+
+	err = asd_map_ha(asd_ha);
+	if (err)
+		goto Err_free;
+
+	err = asd_create_ha_caches(asd_ha);
+        if (err)
+		goto Err_unmap;
+
+	err = asd_init_hw(asd_ha);
+	if (err)
+		goto Err_free_cache;
+
+	asd_printk("device %s: SAS addr %llx, PCBA SN %s, %d phys, %d enabled "
+		   "phys, flash %s, BIOS %s%d\n",
+		   pci_name(dev), SAS_ADDR(asd_ha->hw_prof.sas_addr),
+		   asd_ha->hw_prof.pcba_sn, asd_ha->hw_prof.max_phys,
+		   asd_ha->hw_prof.num_phys,
+		   asd_ha->hw_prof.flash.present ? "present" : "not present",
+		   asd_ha->hw_prof.bios.present ? "build " : "not present",
+		   asd_ha->hw_prof.bios.bld);
+
+	shost->can_queue = asd_ha->seq.can_queue;
+
+	if (use_msi)
+		pci_enable_msi(asd_ha->pcidev);
+
+	err = request_irq(asd_ha->pcidev->irq, asd_hw_isr, SA_SHIRQ,
+			  ASD_DRIVER_NAME, asd_ha);
+	if (err) {
+		asd_printk("couldn't get irq %d for %s\n",
+			   asd_ha->pcidev->irq, pci_name(asd_ha->pcidev));
+		goto Err_irq;
+	}
+	asd_enable_ints(asd_ha);
+
+	err = asd_init_post_escbs(asd_ha);
+	if (err) {
+		asd_printk("couldn't post escbs for %s\n",
+			   pci_name(asd_ha->pcidev));
+		goto Err_escbs;
+	}
+	ASD_DPRINTK("escbs posted\n");
+
+	asd_create_dev_attrs(asd_ha);
+
+	err = asd_register_sas_ha(asd_ha);
+	if (err)
+		goto Err_reg_sas;
+
+	err = asd_enable_phys(asd_ha, asd_ha->hw_prof.enabled_phys);
+	if (err) {
+		asd_printk("coudln't enable phys, err:%d\n", err);
+		goto Err_en_phys;
+	}
+	ASD_DPRINTK("enabled phys\n");
+	/* give the phy enabling interrupt event time to come in (1s
+	 * is empirically about all it takes) */
+	ssleep(1);
+	/* Wait for discovery to finish */
+	scsi_flush_work(asd_ha->sas_ha.core.shost);
+
+	return 0;
+Err_en_phys:
+	asd_unregister_sas_ha(asd_ha);
+Err_reg_sas:
+	asd_remove_dev_attrs(asd_ha);
+Err_escbs:
+	asd_disable_ints(asd_ha);
+	free_irq(dev->irq, asd_ha);
+Err_irq:
+	if (use_msi)
+		pci_disable_msi(dev);
+	asd_chip_hardrst(asd_ha);
+Err_free_cache:
+	asd_destroy_ha_caches(asd_ha);
+Err_unmap:
+	asd_unmap_ha(asd_ha);
+Err_free:
+	kfree(asd_ha);
+	scsi_remove_host(shost);
+Err:
+	pci_disable_device(dev);
+	return err;
+}
+
+static void asd_free_queues(struct asd_ha_struct *asd_ha)
+{
+	unsigned long flags;
+	LIST_HEAD(pending);
+	struct list_head *n, *pos;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	asd_ha->seq.pending = 0;
+	list_splice_init(&asd_ha->seq.pend_q, &pending);
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	if (!list_empty(&pending))
+		ASD_DPRINTK("Uh-oh! Pending is not empty!\n");
+
+	list_for_each_safe(pos, n, &pending) {
+		struct asd_ascb *ascb = list_entry(pos, struct asd_ascb, list);
+		list_del_init(pos);
+		ASD_DPRINTK("freeing from pending\n");
+		asd_ascb_free(ascb);
+	}
+}
+
+static void asd_turn_off_leds(struct asd_ha_struct *asd_ha)
+{
+	u8 phy_mask = asd_ha->hw_prof.enabled_phys;
+	u8 i;
+
+	for_each_phy(phy_mask, phy_mask, i) {
+		asd_turn_led(asd_ha, i, 0);
+		asd_control_led(asd_ha, i, 0);
+	}
+}
+
+static void __devexit asd_pci_remove(struct pci_dev *dev)
+{
+	struct asd_ha_struct *asd_ha = pci_get_drvdata(dev);
+
+	if (!asd_ha)
+		return;
+
+	asd_unregister_sas_ha(asd_ha);
+
+	asd_disable_ints(asd_ha);
+
+	asd_remove_dev_attrs(asd_ha);
+
+	/* XXX more here as needed */
+
+	free_irq(dev->irq, asd_ha);
+	if (use_msi)
+		pci_disable_msi(asd_ha->pcidev);
+	asd_turn_off_leds(asd_ha);
+	asd_chip_hardrst(asd_ha);
+	asd_free_queues(asd_ha);
+	asd_destroy_ha_caches(asd_ha);
+	asd_unmap_ha(asd_ha);
+	kfree(asd_ha);
+	pci_disable_device(dev);
+	return;
+}
+
+static ssize_t asd_version_show(struct device_driver *driver, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", ASD_DRIVER_VERSION);
+}
+static DRIVER_ATTR(version, S_IRUGO, asd_version_show, NULL);
+
+static void asd_create_driver_attrs(struct device_driver *driver)
+{
+	driver_create_file(driver, &driver_attr_version);
+}
+
+static void asd_remove_driver_attrs(struct device_driver *driver)
+{
+	driver_remove_file(driver, &driver_attr_version);
+}
+
+static struct sas_domain_function_template aic94xx_transport_functions = {
+	.lldd_port_formed	= asd_update_port_links,
+
+	.lldd_dev_found		= asd_dev_found,
+	.lldd_dev_gone		= asd_dev_gone,
+
+	.lldd_execute_task	= asd_execute_task,
+
+	.lldd_abort_task	= asd_abort_task,
+	.lldd_abort_task_set	= asd_abort_task_set,
+	.lldd_clear_aca		= asd_clear_aca,
+	.lldd_clear_task_set	= asd_clear_task_set,
+	.lldd_I_T_nexus_reset	= NULL,
+	.lldd_lu_reset		= asd_lu_reset,
+	.lldd_query_task	= asd_query_task,
+
+	.lldd_clear_nexus_port	= asd_clear_nexus_port,
+	.lldd_clear_nexus_ha	= asd_clear_nexus_ha,
+
+	.lldd_control_phy	= asd_control_phy,
+};
+
+static const struct pci_device_id aic94xx_pci_table[] __devinitdata = {
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR10),
+	 0, 0, 1},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR12),
+	 0, 0, 1},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR1E),
+	 0, 0, 1},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR30),
+	 0, 0, 2},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR32),
+	 0, 0, 2},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR3E),
+	 0, 0, 2},
+	{PCI_DEVICE(PCI_VENDOR_ID_ADAPTEC2, PCI_DEVICE_ID_ADAPTEC2_RAZOR3F),
+	 0, 0, 2},
+	{}
+};
+
+MODULE_DEVICE_TABLE(pci, aic94xx_pci_table);
+
+static struct pci_driver aic94xx_pci_driver = {
+	.name		= ASD_DRIVER_NAME,
+	.id_table	= aic94xx_pci_table,
+	.probe		= asd_pci_probe,
+	.remove		= __devexit_p(asd_pci_remove),
+};
+
+static int __init aic94xx_init(void)
+{
+	int err;
+
+
+	asd_printk("%s version %s loaded\n", ASD_DRIVER_DESCRIPTION,
+		   ASD_DRIVER_VERSION);
+
+	err = asd_create_global_caches();
+	if (err)
+		return err;
+
+	aic94xx_transport_template =
+		sas_domain_attach_transport(&aic94xx_transport_functions);
+	if (!aic94xx_transport_template)
+		goto out_destroy_caches;
+
+	err = pci_register_driver(&aic94xx_pci_driver);
+	if (err)
+		goto out_release_transport;
+
+	asd_create_driver_attrs(&aic94xx_pci_driver.driver);
+
+	return err;
+
+ out_release_transport:
+	sas_release_transport(aic94xx_transport_template);
+ out_destroy_caches:
+	asd_destroy_global_caches();
+
+	return err;
+}
+
+static void __exit aic94xx_exit(void)
+{
+	asd_remove_driver_attrs(&aic94xx_pci_driver.driver);
+	pci_unregister_driver(&aic94xx_pci_driver);
+	sas_release_transport(aic94xx_transport_template);
+	asd_destroy_global_caches();
+	asd_printk("%s version %s unloaded\n", ASD_DRIVER_DESCRIPTION,
+		   ASD_DRIVER_VERSION);
+}
+
+module_init(aic94xx_init);
+module_exit(aic94xx_exit);
+
+MODULE_AUTHOR("Luben Tuikov <luben_tuikov@adaptec.com>");
+MODULE_DESCRIPTION(ASD_DRIVER_DESCRIPTION);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(ASD_DRIVER_VERSION);
diff --git a/drivers/scsi/aic94xx/aic94xx_reg.c b/drivers/scsi/aic94xx/aic94xx_reg.c
new file mode 100644
index 0000000..f210dac
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_reg.c
@@ -0,0 +1,332 @@
+/*
+ * Aic94xx SAS/SATA driver register access.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include "aic94xx_reg.h"
+#include "aic94xx.h"
+
+/* Writing to device address space.
+ * Offset comes before value to remind that the operation of
+ * this function is *offs = val.
+ */
+static inline void asd_write_byte(struct asd_ha_struct *asd_ha,
+				  unsigned long offs, u8 val)
+{
+	if (unlikely(asd_ha->iospace))
+		outb(val,
+		     (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF));
+	else
+		writeb(val, asd_ha->io_handle[0].addr + offs);
+	wmb();
+}
+
+static inline void asd_write_word(struct asd_ha_struct *asd_ha,
+				  unsigned long offs, u16 val)
+{
+	if (unlikely(asd_ha->iospace))
+		outw(val,
+		     (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF));
+	else
+		writew(val, asd_ha->io_handle[0].addr + offs);
+	wmb();
+}
+
+static inline void asd_write_dword(struct asd_ha_struct *asd_ha,
+				   unsigned long offs, u32 val)
+{
+	if (unlikely(asd_ha->iospace))
+		outl(val,
+		     (unsigned long)asd_ha->io_handle[0].addr + (offs & 0xFF));
+	else
+		writel(val, asd_ha->io_handle[0].addr + offs);
+	wmb();
+}
+
+/* Reading from device address space.
+ */
+static inline u8 asd_read_byte(struct asd_ha_struct *asd_ha,
+			       unsigned long offs)
+{
+	u8 val;
+	if (unlikely(asd_ha->iospace))
+		val = inb((unsigned long) asd_ha->io_handle[0].addr
+			  + (offs & 0xFF));
+	else
+		val = readb(asd_ha->io_handle[0].addr + offs);
+	rmb();
+	return val;
+}
+
+static inline u16 asd_read_word(struct asd_ha_struct *asd_ha,
+				unsigned long offs)
+{
+	u16 val;
+	if (unlikely(asd_ha->iospace))
+		val = inw((unsigned long)asd_ha->io_handle[0].addr
+			  + (offs & 0xFF));
+	else
+		val = readw(asd_ha->io_handle[0].addr + offs);
+	rmb();
+	return val;
+}
+
+static inline u32 asd_read_dword(struct asd_ha_struct *asd_ha,
+				 unsigned long offs)
+{
+	u32 val;
+	if (unlikely(asd_ha->iospace))
+		val = inl((unsigned long) asd_ha->io_handle[0].addr
+			  + (offs & 0xFF));
+	else
+		val = readl(asd_ha->io_handle[0].addr + offs);
+	rmb();
+	return val;
+}
+
+static inline u32 asd_mem_offs_swa(void)
+{
+	return 0;
+}
+
+static inline u32 asd_mem_offs_swc(void)
+{
+	return asd_mem_offs_swa() + MBAR0_SWA_SIZE;
+}
+
+static inline u32 asd_mem_offs_swb(void)
+{
+	return asd_mem_offs_swc() + MBAR0_SWC_SIZE + 0x20;
+}
+
+/* We know that the register wanted is in the range
+ * of the sliding window.
+ */
+#define ASD_READ_SW(ww, type, ord)                                     \
+static inline type asd_read_##ww##_##ord (struct asd_ha_struct *asd_ha,\
+					  u32 reg)                     \
+{                                                                      \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0];    \
+	u32 map_offs=(reg - io_handle-> ww##_base )+asd_mem_offs_##ww ();\
+	return asd_read_##ord (asd_ha, (unsigned long) map_offs);      \
+}
+
+#define ASD_WRITE_SW(ww, type, ord)                                    \
+static inline void asd_write_##ww##_##ord (struct asd_ha_struct *asd_ha,\
+				  u32 reg, type val)                   \
+{                                                                      \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[0];    \
+	u32 map_offs=(reg - io_handle-> ww##_base )+asd_mem_offs_##ww ();\
+	asd_write_##ord (asd_ha, (unsigned long) map_offs, val);       \
+}
+
+ASD_READ_SW(swa, u8,  byte);
+ASD_READ_SW(swa, u16, word);
+ASD_READ_SW(swa, u32, dword);
+
+ASD_READ_SW(swb, u8,  byte);
+ASD_READ_SW(swb, u16, word);
+ASD_READ_SW(swb, u32, dword);
+
+ASD_READ_SW(swc, u8,  byte);
+ASD_READ_SW(swc, u16, word);
+ASD_READ_SW(swc, u32, dword);
+
+ASD_WRITE_SW(swa, u8,  byte);
+ASD_WRITE_SW(swa, u16, word);
+ASD_WRITE_SW(swa, u32, dword);
+
+ASD_WRITE_SW(swb, u8,  byte);
+ASD_WRITE_SW(swb, u16, word);
+ASD_WRITE_SW(swb, u32, dword);
+
+ASD_WRITE_SW(swc, u8,  byte);
+ASD_WRITE_SW(swc, u16, word);
+ASD_WRITE_SW(swc, u32, dword);
+
+/*
+ * A word about sliding windows:
+ * MBAR0 is divided into sliding windows A, C and B, in that order.
+ * SWA starts at offset 0 of MBAR0, up to 0x57, with size 0x58 bytes.
+ * SWC starts at offset 0x58 of MBAR0, up to 0x60, with size 0x8 bytes.
+ * From 0x60 to 0x7F, we have a copy of PCI config space 0x60-0x7F.
+ * SWB starts at offset 0x80 of MBAR0 and extends to the end of MBAR0.
+ * See asd_init_sw() in aic94xx_hwi.c
+ *
+ * We map the most common registers we'd access of the internal 4GB
+ * host adapter memory space.  If a register/internal memory location
+ * is wanted which is not mapped, we slide SWB, by paging it,
+ * see asd_move_swb() in aic94xx_reg.c.
+ */
+
+/**
+ * asd_move_swb -- move sliding window B
+ * @asd_ha: pointer to host adapter structure
+ * @reg: register desired to be within range of the new window
+ */
+static inline void asd_move_swb(struct asd_ha_struct *asd_ha, u32 reg)
+{
+	u32 base = reg & ~(MBAR0_SWB_SIZE-1);
+	pci_write_config_dword(asd_ha->pcidev, PCI_CONF_MBAR0_SWB, base);
+	asd_ha->io_handle[0].swb_base = base;
+}
+
+static void __asd_write_reg_byte(struct asd_ha_struct *asd_ha, u32 reg, u8 val)
+{
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0];
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);
+	if (io_handle->swa_base <= reg
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)
+		asd_write_swa_byte (asd_ha, reg,val);
+	else if (io_handle->swb_base <= reg
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)
+		asd_write_swb_byte (asd_ha, reg, val);
+	else if (io_handle->swc_base <= reg
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)
+		asd_write_swc_byte (asd_ha, reg, val);
+	else {
+		/* Ok, we have to move SWB */
+		asd_move_swb(asd_ha, reg);
+		asd_write_swb_byte (asd_ha, reg, val);
+	}
+}
+
+#define ASD_WRITE_REG(type, ord)                                  \
+void asd_write_reg_##ord (struct asd_ha_struct *asd_ha, u32 reg, type val)\
+{                                                                 \
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; \
+	unsigned long flags;                                      \
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);         \
+	spin_lock_irqsave(&asd_ha->iolock, flags);                \
+	if (io_handle->swa_base <= reg                            \
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)        \
+		asd_write_swa_##ord (asd_ha, reg,val);            \
+	else if (io_handle->swb_base <= reg                       \
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)   \
+		asd_write_swb_##ord (asd_ha, reg, val);           \
+	else if (io_handle->swc_base <= reg                       \
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)   \
+		asd_write_swc_##ord (asd_ha, reg, val);           \
+	else {                                                    \
+		/* Ok, we have to move SWB */                     \
+		asd_move_swb(asd_ha, reg);                        \
+		asd_write_swb_##ord (asd_ha, reg, val);           \
+	}                                                         \
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);           \
+}
+
+ASD_WRITE_REG(u8, byte);
+ASD_WRITE_REG(u16,word);
+ASD_WRITE_REG(u32,dword);
+
+static u8 __asd_read_reg_byte(struct asd_ha_struct *asd_ha, u32 reg)
+{
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0];
+	u8 val;
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);
+	if (io_handle->swa_base <= reg
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)
+		val = asd_read_swa_byte (asd_ha, reg);
+	else if (io_handle->swb_base <= reg
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)
+		val = asd_read_swb_byte (asd_ha, reg);
+	else if (io_handle->swc_base <= reg
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)
+		val = asd_read_swc_byte (asd_ha, reg);
+	else {
+		/* Ok, we have to move SWB */
+		asd_move_swb(asd_ha, reg);
+		val = asd_read_swb_byte (asd_ha, reg);
+	}
+	return val;
+}
+
+#define ASD_READ_REG(type, ord)                                   \
+type asd_read_reg_##ord (struct asd_ha_struct *asd_ha, u32 reg)   \
+{                                                                 \
+	struct asd_ha_addrspace *io_handle=&asd_ha->io_handle[0]; \
+	type val;                                                 \
+	unsigned long flags;                                      \
+	BUG_ON(reg >= 0xC0000000 || reg < ALL_BASE_ADDR);         \
+	spin_lock_irqsave(&asd_ha->iolock, flags);                \
+	if (io_handle->swa_base <= reg                            \
+	    && reg < io_handle->swa_base + MBAR0_SWA_SIZE)        \
+		val = asd_read_swa_##ord (asd_ha, reg);           \
+	else if (io_handle->swb_base <= reg                       \
+		 && reg < io_handle->swb_base + MBAR0_SWB_SIZE)   \
+		val = asd_read_swb_##ord (asd_ha, reg);           \
+	else if (io_handle->swc_base <= reg                       \
+		 && reg < io_handle->swc_base + MBAR0_SWC_SIZE)   \
+		val = asd_read_swc_##ord (asd_ha, reg);           \
+	else {                                                    \
+		/* Ok, we have to move SWB */                     \
+		asd_move_swb(asd_ha, reg);                        \
+		val = asd_read_swb_##ord (asd_ha, reg);           \
+	}                                                         \
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);           \
+	return val;                                               \
+}
+
+ASD_READ_REG(u8, byte);
+ASD_READ_REG(u16,word);
+ASD_READ_REG(u32,dword);
+
+/**
+ * asd_read_reg_string -- read a string of bytes from io space memory
+ * @asd_ha: pointer to host adapter structure
+ * @dst: pointer to a destination buffer where data will be written to
+ * @offs: start offset (register) to read from
+ * @count: number of bytes to read
+ */
+void asd_read_reg_string(struct asd_ha_struct *asd_ha, void *dst,
+			 u32 offs, int count)
+{
+	u8 *p = dst;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->iolock, flags);
+	for ( ; count > 0; count--, offs++, p++)
+		*p = __asd_read_reg_byte(asd_ha, offs);
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);
+}
+
+/**
+ * asd_write_reg_string -- write a string of bytes to io space memory
+ * @asd_ha: pointer to host adapter structure
+ * @src: pointer to source buffer where data will be read from
+ * @offs: start offset (register) to write to
+ * @count: number of bytes to write
+ */
+void asd_write_reg_string(struct asd_ha_struct *asd_ha, void *src,
+			  u32 offs, int count)
+{
+	u8 *p = src;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->iolock, flags);
+	for ( ; count > 0; count--, offs++, p++)
+		__asd_write_reg_byte(asd_ha, offs, *p);
+	spin_unlock_irqrestore(&asd_ha->iolock, flags);
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_reg.h b/drivers/scsi/aic94xx/aic94xx_reg.h
new file mode 100644
index 0000000..2279307
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_reg.h
@@ -0,0 +1,302 @@
+/*
+ * Aic94xx SAS/SATA driver hardware registers definitions.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_REG_H_
+#define _AIC94XX_REG_H_
+
+#include <asm/io.h>
+#include "aic94xx_hwi.h"
+
+/* Values */
+#define AIC9410_DEV_REV_B0            0x8
+
+/* MBAR0, SWA, SWB, SWC, internal memory space addresses */
+#define REG_BASE_ADDR                 0xB8000000
+#define REG_BASE_ADDR_CSEQCIO         0xB8002000
+#define REG_BASE_ADDR_EXSI            0xB8042800
+
+#define MBAR0_SWA_SIZE                0x58
+extern  u32    MBAR0_SWB_SIZE;
+#define MBAR0_SWC_SIZE                0x8
+
+/* MBAR1, points to On Chip Memory */
+#define OCM_BASE_ADDR                 0xA0000000
+#define OCM_MAX_SIZE                  0x20000
+
+/* Smallest address possible to reference */
+#define ALL_BASE_ADDR                 OCM_BASE_ADDR
+
+/* PCI configuration space registers */
+#define PCI_IOBAR_OFFSET              4
+
+#define PCI_CONF_MBAR1                0x6C
+#define PCI_CONF_MBAR0_SWA            0x70
+#define PCI_CONF_MBAR0_SWB            0x74
+#define PCI_CONF_MBAR0_SWC            0x78
+#define PCI_CONF_MBAR_KEY             0x7C
+#define PCI_CONF_FLSH_BAR             0xB8
+
+#include "aic94xx_reg_def.h"
+
+u8  asd_read_reg_byte(struct asd_ha_struct *asd_ha, u32 reg);
+u16 asd_read_reg_word(struct asd_ha_struct *asd_ha, u32 reg);
+u32 asd_read_reg_dword(struct asd_ha_struct *asd_ha, u32 reg);
+
+void asd_write_reg_byte(struct asd_ha_struct *asd_ha, u32 reg, u8 val);
+void asd_write_reg_word(struct asd_ha_struct *asd_ha, u32 reg, u16 val);
+void asd_write_reg_dword(struct asd_ha_struct *asd_ha, u32 reg, u32 val);
+
+void asd_read_reg_string(struct asd_ha_struct *asd_ha, void *dst,
+			 u32 offs, int count);
+void asd_write_reg_string(struct asd_ha_struct *asd_ha, void *src,
+			  u32 offs, int count);
+
+#define ASD_READ_OCM(type, ord, S)                                    \
+static inline type asd_read_ocm_##ord (struct asd_ha_struct *asd_ha,  \
+					 u32 offs)                    \
+{                                                                     \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[1];   \
+	type val = read##S (io_handle->addr + (unsigned long) offs);  \
+	rmb();                                                        \
+	return val;                                                   \
+}
+
+ASD_READ_OCM(u8, byte, b);
+ASD_READ_OCM(u16,word, w);
+ASD_READ_OCM(u32,dword,l);
+
+#define ASD_WRITE_OCM(type, ord, S)                                    \
+static inline void asd_write_ocm_##ord (struct asd_ha_struct *asd_ha,  \
+					 u32 offs, type val)          \
+{                                                                     \
+	struct asd_ha_addrspace *io_handle = &asd_ha->io_handle[1];   \
+	write##S (val, io_handle->addr + (unsigned long) offs);       \
+	return;                                                       \
+}
+
+ASD_WRITE_OCM(u8, byte, b);
+ASD_WRITE_OCM(u16,word, w);
+ASD_WRITE_OCM(u32,dword,l);
+
+#define ASD_DDBSITE_READ(type, ord)                                        \
+static inline type asd_ddbsite_read_##ord (struct asd_ha_struct *asd_ha,   \
+					   u16 ddb_site_no,                \
+					   u16 offs)                       \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnDDB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ADDBPTR, ddb_site_no);                  \
+	return asd_read_reg_##ord (asd_ha, CTXACCESS);                     \
+}
+
+ASD_DDBSITE_READ(u32, dword);
+ASD_DDBSITE_READ(u16, word);
+
+static inline u8 asd_ddbsite_read_byte(struct asd_ha_struct *asd_ha,
+				       u16 ddb_site_no,
+				       u16 offs)
+{
+	if (offs & 1)
+		return asd_ddbsite_read_word(asd_ha, ddb_site_no,
+					     offs & ~1) >> 8;
+	else
+		return asd_ddbsite_read_word(asd_ha, ddb_site_no,
+					     offs) & 0xFF;
+}
+
+
+#define ASD_DDBSITE_WRITE(type, ord)                                       \
+static inline void asd_ddbsite_write_##ord (struct asd_ha_struct *asd_ha,  \
+					u16 ddb_site_no,                   \
+					u16 offs, type val)                \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnDDB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ADDBPTR, ddb_site_no);                  \
+	asd_write_reg_##ord (asd_ha, CTXACCESS, val);                      \
+}
+
+ASD_DDBSITE_WRITE(u32, dword);
+ASD_DDBSITE_WRITE(u16, word);
+
+static inline void asd_ddbsite_write_byte(struct asd_ha_struct *asd_ha,
+					  u16 ddb_site_no,
+					  u16 offs, u8 val)
+{
+	u16 base = offs & ~1;
+	u16 rval = asd_ddbsite_read_word(asd_ha, ddb_site_no, base);
+	if (offs & 1)
+		rval = (val << 8) | (rval & 0xFF);
+	else
+		rval = (rval & 0xFF00) | val;
+	asd_ddbsite_write_word(asd_ha, ddb_site_no, base, rval);
+}
+
+
+#define ASD_SCBSITE_READ(type, ord)                                        \
+static inline type asd_scbsite_read_##ord (struct asd_ha_struct *asd_ha,   \
+					   u16 scb_site_no,                \
+					   u16 offs)                       \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnSCB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ASCBPTR, scb_site_no);                  \
+	return asd_read_reg_##ord (asd_ha, CTXACCESS);                     \
+}
+
+ASD_SCBSITE_READ(u32, dword);
+ASD_SCBSITE_READ(u16, word);
+
+static inline u8 asd_scbsite_read_byte(struct asd_ha_struct *asd_ha,
+				       u16 scb_site_no,
+				       u16 offs)
+{
+	if (offs & 1)
+		return asd_scbsite_read_word(asd_ha, scb_site_no,
+					     offs & ~1) >> 8;
+	else
+		return asd_scbsite_read_word(asd_ha, scb_site_no,
+					     offs) & 0xFF;
+}
+
+
+#define ASD_SCBSITE_WRITE(type, ord)                                       \
+static inline void asd_scbsite_write_##ord (struct asd_ha_struct *asd_ha,  \
+					u16 scb_site_no,                   \
+					u16 offs, type val)                \
+{                                                                          \
+	asd_write_reg_word(asd_ha, ALTCIOADR, MnSCB_SITE + offs);          \
+	asd_write_reg_word(asd_ha, ASCBPTR, scb_site_no);                  \
+	asd_write_reg_##ord (asd_ha, CTXACCESS, val);                      \
+}
+
+ASD_SCBSITE_WRITE(u32, dword);
+ASD_SCBSITE_WRITE(u16, word);
+
+static inline void asd_scbsite_write_byte(struct asd_ha_struct *asd_ha,
+					  u16 scb_site_no,
+					  u16 offs, u8 val)
+{
+	u16 base = offs & ~1;
+	u16 rval = asd_scbsite_read_word(asd_ha, scb_site_no, base);
+	if (offs & 1)
+		rval = (val << 8) | (rval & 0xFF);
+	else
+		rval = (rval & 0xFF00) | val;
+	asd_scbsite_write_word(asd_ha, scb_site_no, base, rval);
+}
+
+/**
+ * asd_ddbsite_update_word -- atomically update a word in a ddb site
+ * @asd_ha: pointer to host adapter structure
+ * @ddb_site_no: the DDB site number
+ * @offs: the offset into the DDB
+ * @oldval: old value found in that offset
+ * @newval: the new value to replace it
+ *
+ * This function is used when the sequencers are running and we need to
+ * update a DDB site atomically without expensive pausing and upausing
+ * of the sequencers and accessing the DDB site through the CIO bus.
+ *
+ * Return 0 on success; -EFAULT on parity error; -EAGAIN if the old value
+ * is different than the current value at that offset.
+ */
+static inline int asd_ddbsite_update_word(struct asd_ha_struct *asd_ha,
+					  u16 ddb_site_no, u16 offs,
+					  u16 oldval, u16 newval)
+{
+	u8  done;
+	u16 oval = asd_ddbsite_read_word(asd_ha, ddb_site_no, offs);
+	if (oval != oldval)
+		return -EAGAIN;
+	asd_write_reg_word(asd_ha, AOLDDATA, oldval);
+	asd_write_reg_word(asd_ha, ANEWDATA, newval);
+	do {
+		done = asd_read_reg_byte(asd_ha, ATOMICSTATCTL);
+	} while (!(done & ATOMICDONE));
+	if (done & ATOMICERR)
+		return -EFAULT;	  /* parity error */
+	else if (done & ATOMICWIN)
+		return 0;	  /* success */
+	else
+		return -EAGAIN;	  /* oldval different than current value */
+}
+
+static inline int asd_ddbsite_update_byte(struct asd_ha_struct *asd_ha,
+					  u16 ddb_site_no, u16 offs,
+					  u8 _oldval, u8 _newval)
+{
+	u16 base = offs & ~1;
+	u16 oval;
+	u16 nval = asd_ddbsite_read_word(asd_ha, ddb_site_no, base);
+	if (offs & 1) {
+		if ((nval >> 8) != _oldval)
+			return -EAGAIN;
+		nval = (_newval << 8) | (nval & 0xFF);
+		oval = (_oldval << 8) | (nval & 0xFF);
+	} else {
+		if ((nval & 0xFF) != _oldval)
+			return -EAGAIN;
+		nval = (nval & 0xFF00) | _newval;
+		oval = (nval & 0xFF00) | _oldval;
+	}
+	return asd_ddbsite_update_word(asd_ha, ddb_site_no, base, oval, nval);
+}
+
+static inline void asd_write_reg_addr(struct asd_ha_struct *asd_ha, u32 reg,
+				      dma_addr_t dma_handle)
+{
+	asd_write_reg_dword(asd_ha, reg,   ASD_BUSADDR_LO(dma_handle));
+	asd_write_reg_dword(asd_ha, reg+4, ASD_BUSADDR_HI(dma_handle));
+}
+
+static inline u32 asd_get_cmdctx_size(struct asd_ha_struct *asd_ha)
+{
+	/* DCHREVISION returns 0, possibly broken */
+	u32 ctxmemsize = asd_read_reg_dword(asd_ha, LmMnINT(0,0)) & CTXMEMSIZE;
+	return ctxmemsize ? 65536 : 32768;
+}
+
+static inline u32 asd_get_devctx_size(struct asd_ha_struct *asd_ha)
+{
+	u32 ctxmemsize = asd_read_reg_dword(asd_ha, LmMnINT(0,0)) & CTXMEMSIZE;
+	return ctxmemsize ? 8192 : 4096;
+}
+
+static inline void asd_disable_ints(struct asd_ha_struct *asd_ha)
+{
+	asd_write_reg_dword(asd_ha, CHIMINTEN, RST_CHIMINTEN);
+}
+
+static inline void asd_enable_ints(struct asd_ha_struct *asd_ha)
+{
+	/* Enable COM SAS interrupt on errors, COMSTAT */
+	asd_write_reg_dword(asd_ha, COMSTATEN,
+			    EN_CSBUFPERR | EN_CSERR | EN_OVLYERR);
+	/* Enable DCH SAS CFIFTOERR */
+	asd_write_reg_dword(asd_ha, DCHSTATUS, EN_CFIFTOERR);
+	/* Enable Host Device interrupts */
+	asd_write_reg_dword(asd_ha, CHIMINTEN, SET_CHIMINTEN);
+}
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_reg_def.h b/drivers/scsi/aic94xx/aic94xx_reg_def.h
new file mode 100644
index 0000000..b79f45f
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_reg_def.h
@@ -0,0 +1,2398 @@
+/*
+ * Aic94xx SAS/SATA driver hardware registers defintions.
+ *
+ * Copyright (C) 2004 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2004 David Chaw <david_chaw@adaptec.com>
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * Luben Tuikov: Some register value updates to make it work with the window
+ * agnostic register r/w functions.  Some register corrections, sizes,
+ * etc.
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * $Id: //depot/aic94xx/aic94xx_reg_def.h#27 $
+ *
+ */
+
+#ifndef _ADP94XX_REG_DEF_H_
+#define _ADP94XX_REG_DEF_H_
+
+/*
+ * Common definitions.
+ */
+#define CSEQ_MODE_PAGE_SIZE	0x200		/* CSEQ mode page size */
+#define LmSEQ_MODE_PAGE_SIZE	0x200		/* LmSEQ mode page size */
+#define LmSEQ_HOST_REG_SIZE   	0x4000		/* LmSEQ Host Register size */
+
+/********************* COM_SAS registers definition *************************/
+
+/* The base is REG_BASE_ADDR, defined in aic94xx_reg.h.
+ */
+
+/*
+ * CHIM Registers, Address Range : (0x00-0xFF)
+ */
+#define COMBIST		(REG_BASE_ADDR + 0x00)
+
+/* bits 31:24 */
+#define		L7BLKRST		0x80000000
+#define		L6BLKRST		0x40000000
+#define		L5BLKRST		0x20000000
+#define		L4BLKRST		0x10000000
+#define		L3BLKRST		0x08000000
+#define		L2BLKRST		0x04000000
+#define		L1BLKRST		0x02000000
+#define		L0BLKRST		0x01000000
+#define		LmBLKRST		0xFF000000
+#define LmBLKRST_COMBIST(phyid)		(1 << (24 + phyid))
+
+#define		OCMBLKRST		0x00400000
+#define		CTXMEMBLKRST		0x00200000
+#define		CSEQBLKRST		0x00100000
+#define		EXSIBLKRST		0x00040000
+#define		DPIBLKRST		0x00020000
+#define		DFIFBLKRST		0x00010000
+#define		HARDRST			0x00000200
+#define		COMBLKRST		0x00000100
+#define		FRCDFPERR		0x00000080
+#define		FRCCIOPERR		0x00000020
+#define		FRCBISTERR		0x00000010
+#define		COMBISTEN		0x00000004
+#define		COMBISTDONE		0x00000002	/* ro */
+#define 	COMBISTFAIL		0x00000001	/* ro */
+
+#define COMSTAT		(REG_BASE_ADDR + 0x04)
+
+#define		REQMBXREAD		0x00000040
+#define 	RSPMBXAVAIL		0x00000020
+#define 	CSBUFPERR		0x00000008
+#define		OVLYERR			0x00000004
+#define 	CSERR			0x00000002
+#define		OVLYDMADONE		0x00000001
+
+#define		COMSTAT_MASK		(REQMBXREAD | RSPMBXAVAIL | \
+					 CSBUFPERR | OVLYERR | CSERR |\
+					 OVLYDMADONE)
+
+#define COMSTATEN	(REG_BASE_ADDR + 0x08)
+
+#define		EN_REQMBXREAD		0x00000040
+#define		EN_RSPMBXAVAIL		0x00000020
+#define		EN_CSBUFPERR		0x00000008
+#define		EN_OVLYERR		0x00000004
+#define		EN_CSERR		0x00000002
+#define		EN_OVLYDONE		0x00000001
+
+#define SCBPRO		(REG_BASE_ADDR + 0x0C)
+
+#define		SCBCONS_MASK		0xFFFF0000
+#define		SCBPRO_MASK		0x0000FFFF
+
+#define CHIMREQMBX	(REG_BASE_ADDR + 0x10)
+
+#define CHIMRSPMBX	(REG_BASE_ADDR + 0x14)
+
+#define CHIMINT		(REG_BASE_ADDR + 0x18)
+
+#define		EXT_INT0		0x00000800
+#define		EXT_INT1		0x00000400
+#define		PORRSTDET		0x00000200
+#define		HARDRSTDET		0x00000100
+#define		DLAVAILQ		0x00000080	/* ro */
+#define		HOSTERR			0x00000040
+#define		INITERR			0x00000020
+#define		DEVINT			0x00000010
+#define		COMINT			0x00000008
+#define		DEVTIMER2		0x00000004
+#define		DEVTIMER1		0x00000002
+#define		DLAVAIL			0x00000001
+
+#define		CHIMINT_MASK		(HOSTERR | INITERR | DEVINT | COMINT |\
+					 DEVTIMER2 | DEVTIMER1 | DLAVAIL)
+
+#define 	DEVEXCEPT_MASK		(HOSTERR | INITERR | DEVINT | COMINT)
+
+#define CHIMINTEN	(REG_BASE_ADDR + 0x1C)
+
+#define		RST_EN_EXT_INT1		0x01000000
+#define		RST_EN_EXT_INT0		0x00800000
+#define		RST_EN_HOSTERR		0x00400000
+#define		RST_EN_INITERR		0x00200000
+#define		RST_EN_DEVINT		0x00100000
+#define		RST_EN_COMINT		0x00080000
+#define		RST_EN_DEVTIMER2	0x00040000
+#define		RST_EN_DEVTIMER1	0x00020000
+#define		RST_EN_DLAVAIL		0x00010000
+#define		SET_EN_EXT_INT1		0x00000100
+#define		SET_EN_EXT_INT0		0x00000080
+#define		SET_EN_HOSTERR		0x00000040
+#define		SET_EN_INITERR		0x00000020
+#define		SET_EN_DEVINT		0x00000010
+#define		SET_EN_COMINT		0x00000008
+#define		SET_EN_DEVTIMER2	0x00000004
+#define		SET_EN_DEVTIMER1	0x00000002
+#define		SET_EN_DLAVAIL		0x00000001
+
+#define		RST_CHIMINTEN		(RST_EN_HOSTERR | RST_EN_INITERR | \
+					 RST_EN_DEVINT | RST_EN_COMINT | \
+					 RST_EN_DEVTIMER2 | RST_EN_DEVTIMER1 |\
+					 RST_EN_DLAVAIL)
+
+#define		SET_CHIMINTEN		(SET_EN_HOSTERR | SET_EN_INITERR |\
+					 SET_EN_DEVINT | SET_EN_COMINT |\
+					 SET_EN_DLAVAIL)
+
+#define OVLYDMACTL	(REG_BASE_ADDR + 0x20)
+
+#define		OVLYADR_MASK		0x07FF0000
+#define		OVLYLSEQ_MASK		0x0000FF00
+#define		OVLYCSEQ		0x00000080
+#define		OVLYHALTERR		0x00000040
+#define		PIOCMODE		0x00000020
+#define		RESETOVLYDMA		0x00000008	/* wo */
+#define		STARTOVLYDMA		0x00000004
+#define		STOPOVLYDMA		0x00000002	/* wo */
+#define		OVLYDMAACT		0x00000001	/* ro */
+
+#define OVLYDMACNT	(REG_BASE_ADDR + 0x24)
+
+#define		OVLYDOMAIN1		0x20000000	/* ro */
+#define		OVLYDOMAIN0		0x10000000
+#define		OVLYBUFADR_MASK		0x007F0000
+#define		OVLYDMACNT_MASK		0x00003FFF
+
+#define OVLYDMAADR	(REG_BASE_ADDR + 0x28)
+
+#define DMAERR		(REG_BASE_ADDR + 0x30)
+
+#define		OVLYERRSTAT_MASK	0x0000FF00	/* ro */
+#define		CSERRSTAT_MASK		0x000000FF	/* ro */
+
+#define SPIODATA	(REG_BASE_ADDR + 0x34)
+
+/* 0x38 - 0x3C are reserved  */
+
+#define T1CNTRLR	(REG_BASE_ADDR + 0x40)
+
+#define		T1DONE			0x00010000	/* ro */
+#define		TIMER64			0x00000400
+#define		T1ENABLE		0x00000200
+#define		T1RELOAD		0x00000100
+#define		T1PRESCALER_MASK	0x00000003
+
+#define	T1CMPR		(REG_BASE_ADDR + 0x44)
+
+#define T1CNTR		(REG_BASE_ADDR + 0x48)
+
+#define T2CNTRLR	(REG_BASE_ADDR + 0x4C)
+
+#define		T2DONE			0x00010000	/* ro */
+#define		T2ENABLE		0x00000200
+#define		T2RELOAD		0x00000100
+#define		T2PRESCALER_MASK	0x00000003
+
+#define	T2CMPR		(REG_BASE_ADDR + 0x50)
+
+#define T2CNTR		(REG_BASE_ADDR + 0x54)
+
+/* 0x58h - 0xFCh are reserved */
+
+/*
+ * DCH_SAS Registers, Address Range : (0x800-0xFFF)
+ */
+#define CMDCTXBASE	(REG_BASE_ADDR + 0x800)
+
+#define DEVCTXBASE	(REG_BASE_ADDR + 0x808)
+
+#define CTXDOMAIN	(REG_BASE_ADDR + 0x810)
+
+#define		DEVCTXDOMAIN1		0x00000008	/* ro */
+#define		DEVCTXDOMAIN0		0x00000004
+#define		CMDCTXDOMAIN1		0x00000002	/* ro */
+#define		CMDCTXDOMAIN0		0x00000001
+
+#define DCHCTL		(REG_BASE_ADDR + 0x814)
+
+#define		OCMBISTREPAIR		0x00080000
+#define		OCMBISTEN		0x00040000
+#define		OCMBISTDN		0x00020000	/* ro */
+#define		OCMBISTFAIL		0x00010000	/* ro */
+#define		DDBBISTEN		0x00004000
+#define		DDBBISTDN		0x00002000	/* ro */
+#define		DDBBISTFAIL		0x00001000	/* ro */
+#define		SCBBISTEN		0x00000400
+#define		SCBBISTDN		0x00000200	/* ro */
+#define		SCBBISTFAIL		0x00000100	/* ro */
+
+#define		MEMSEL_MASK		0x000000E0
+#define		MEMSEL_CCM_LSEQ		0x00000000
+#define		MEMSEL_CCM_IOP		0x00000020
+#define		MEMSEL_CCM_SASCTL	0x00000040
+#define		MEMSEL_DCM_LSEQ		0x00000060
+#define		MEMSEL_DCM_IOP		0x00000080
+#define		MEMSEL_OCM		0x000000A0
+
+#define		FRCERR			0x00000010
+#define		AUTORLS			0x00000001
+
+#define DCHREVISION	(REG_BASE_ADDR + 0x818)
+
+#define		DCHREVISION_MASK	0x000000FF
+
+#define DCHSTATUS	(REG_BASE_ADDR + 0x81C)
+
+#define		EN_CFIFTOERR		0x00020000
+#define		CFIFTOERR		0x00000200
+#define		CSEQINT			0x00000100	/* ro */
+#define		LSEQ7INT		0x00000080	/* ro */
+#define		LSEQ6INT		0x00000040	/* ro */
+#define		LSEQ5INT		0x00000020	/* ro */
+#define		LSEQ4INT		0x00000010	/* ro */
+#define		LSEQ3INT		0x00000008	/* ro */
+#define		LSEQ2INT		0x00000004	/* ro */
+#define		LSEQ1INT		0x00000002	/* ro */
+#define		LSEQ0INT		0x00000001	/* ro */
+
+#define		LSEQINT_MASK		(LSEQ7INT | LSEQ6INT | LSEQ5INT |\
+					 LSEQ4INT | LSEQ3INT | LSEQ2INT	|\
+					 LSEQ1INT | LSEQ0INT)
+
+#define DCHDFIFDEBUG	(REG_BASE_ADDR + 0x820)
+#define		ENFAIRMST		0x00FF0000
+#define		DISWRMST9		0x00000200
+#define		DISWRMST8		0x00000100
+#define		DISRDMST		0x000000FF
+
+#define ATOMICSTATCTL	(REG_BASE_ADDR + 0x824)
+/* 8 bit wide */
+#define		AUTOINC			0x80
+#define		ATOMICERR		0x04
+#define		ATOMICWIN		0x02
+#define		ATOMICDONE		0x01
+
+
+#define ALTCIOADR	(REG_BASE_ADDR + 0x828)
+/* 16 bit; bits 8:0 define CIO addr space of CSEQ */
+
+#define ASCBPTR		(REG_BASE_ADDR + 0x82C)
+/* 16 bit wide */
+
+#define ADDBPTR		(REG_BASE_ADDR + 0x82E)
+/* 16 bit wide */
+
+#define ANEWDATA	(REG_BASE_ADDR + 0x830)
+/* 16 bit */
+
+#define AOLDDATA	(REG_BASE_ADDR + 0x834)
+/* 16 bit */
+
+#define CTXACCESS	(REG_BASE_ADDR + 0x838)
+/* 32 bit */
+
+/* 0x83Ch - 0xFFCh are reserved */
+
+/*
+ * ARP2 External Processor Registers, Address Range : (0x00-0x1F)
+ */
+#define ARP2CTL		0x00
+
+#define		FRCSCRPERR		0x00040000
+#define		FRCARP2PERR		0x00020000
+#define		FRCARP2ILLOPC		0x00010000
+#define		ENWAITTO		0x00008000
+#define		PERRORDIS		0x00004000
+#define		FAILDIS			0x00002000
+#define		CIOPERRDIS		0x00001000
+#define		BREAKEN3		0x00000800
+#define		BREAKEN2		0x00000400
+#define		BREAKEN1		0x00000200
+#define		BREAKEN0		0x00000100
+#define		EPAUSE			0x00000008
+#define		PAUSED			0x00000004	/* ro */
+#define		STEP			0x00000002
+#define		ARP2RESET		0x00000001	/* wo */
+
+#define ARP2INT		0x04
+
+#define		HALTCODE_MASK		0x00FF0000	/* ro */
+#define		ARP2WAITTO		0x00000100
+#define		ARP2HALTC		0x00000080
+#define		ARP2ILLOPC		0x00000040
+#define		ARP2PERR		0x00000020
+#define		ARP2CIOPERR		0x00000010
+#define		ARP2BREAK3		0x00000008
+#define		ARP2BREAK2		0x00000004
+#define		ARP2BREAK1		0x00000002
+#define		ARP2BREAK0		0x00000001
+
+#define ARP2INTEN	0x08
+
+#define		EN_ARP2WAITTO		0x00000100
+#define		EN_ARP2HALTC		0x00000080
+#define		EN_ARP2ILLOPC		0x00000040
+#define		EN_ARP2PERR		0x00000020
+#define		EN_ARP2CIOPERR		0x00000010
+#define		EN_ARP2BREAK3		0x00000008
+#define		EN_ARP2BREAK2		0x00000004
+#define		EN_ARP2BREAK1		0x00000002
+#define		EN_ARP2BREAK0		0x00000001
+
+#define ARP2BREAKADR01	0x0C
+
+#define		BREAKADR1_MASK		0x0FFF0000
+#define		BREAKADR0_MASK		0x00000FFF
+
+#define	ARP2BREAKADR23	0x10
+
+#define		BREAKADR3_MASK		0x0FFF0000
+#define		BREAKADR2_MASK		0x00000FFF
+
+/* 0x14h - 0x1Ch are reserved */
+
+/*
+ * ARP2 Registers, Address Range : (0x00-0x1F)
+ * The definitions have the same address offset for CSEQ and LmSEQ
+ * CIO Bus Registers.
+ */
+#define MODEPTR		0x00
+
+#define		DSTMODE			0xF0
+#define		SRCMODE			0x0F
+
+#define ALTMODE		0x01
+
+#define		ALTDMODE		0xF0
+#define		ALTSMODE		0x0F
+
+#define ATOMICXCHG	0x02
+
+#define FLAG		0x04
+
+#define		INTCODE_MASK		0xF0
+#define		ALTMODEV2		0x04
+#define		CARRY_INT		0x02
+#define		CARRY			0x01
+
+#define ARP2INTCTL	0x05
+
+#define 	PAUSEDIS		0x80
+#define		RSTINTCTL		0x40
+#define		POPALTMODE		0x08
+#define		ALTMODEV		0x04
+#define		INTMASK			0x02
+#define		IRET			0x01
+
+#define STACK		0x06
+
+#define FUNCTION1	0x07
+
+#define PRGMCNT		0x08
+
+#define ACCUM		0x0A
+
+#define SINDEX		0x0C
+
+#define DINDEX		0x0E
+
+#define ALLONES		0x10
+
+#define ALLZEROS	0x11
+
+#define SINDIR		0x12
+
+#define DINDIR		0x13
+
+#define JUMLDIR		0x14
+
+#define ARP2HALTCODE	0x15
+
+#define CURRADDR	0x16
+
+#define LASTADDR	0x18
+
+#define NXTLADDR	0x1A
+
+#define DBGPORTPTR	0x1C
+
+#define DBGPORT		0x1D
+
+/*
+ * CIO Registers.
+ * The definitions have the same address offset for CSEQ and LmSEQ
+ * CIO Bus Registers.
+ */
+#define MnSCBPTR      	0x20
+
+#define MnDDBPTR      	0x22
+
+#define SCRATCHPAGE	0x24
+
+#define MnSCRATCHPAGE	0x25
+
+#define SCRATCHPAGESV	0x26
+
+#define MnSCRATCHPAGESV	0x27
+
+#define MnDMAERRS	0x46
+
+#define MnSGDMAERRS	0x47
+
+#define MnSGBUF		0x53
+
+#define MnSGDMASTAT	0x5b
+
+#define MnDDMACTL	0x5c	/* RAZOR.rspec.fm rev 1.5 is wrong */
+
+#define MnDDMASTAT	0x5d	/* RAZOR.rspec.fm rev 1.5 is wrong */
+
+#define MnDDMAMODE	0x5e	/* RAZOR.rspec.fm rev 1.5 is wrong */
+
+#define MnDMAENG	0x60
+
+#define MnPIPECTL	0x61
+
+#define MnSGBADR	0x65
+
+#define MnSCB_SITE	0x100
+
+#define MnDDB_SITE	0x180
+
+/*
+ * The common definitions below have the same address offset for both
+ * CSEQ and LmSEQ.
+ */
+#define BISTCTL0	0x4C
+
+#define BISTCTL1	0x50
+
+#define MAPPEDSCR	0x800
+
+/*
+ * CSEQ Host Register, Address Range : (0x000-0xFFC)
+ */
+#define CSEQ_HOST_REG_BASE_ADR		0xB8001000
+
+#define CARP2CTL			(CSEQ_HOST_REG_BASE_ADR	+ ARP2CTL)
+
+#define CARP2INT			(CSEQ_HOST_REG_BASE_ADR	+ ARP2INT)
+
+#define CARP2INTEN			(CSEQ_HOST_REG_BASE_ADR	+ ARP2INTEN)
+
+#define CARP2BREAKADR01			(CSEQ_HOST_REG_BASE_ADR+ARP2BREAKADR01)
+
+#define CARP2BREAKADR23			(CSEQ_HOST_REG_BASE_ADR+ARP2BREAKADR23)
+
+#define CBISTCTL			(CSEQ_HOST_REG_BASE_ADR	+ BISTCTL1)
+
+#define		CSEQRAMBISTEN		0x00000040
+#define		CSEQRAMBISTDN		0x00000020	/* ro */
+#define		CSEQRAMBISTFAIL		0x00000010	/* ro */
+#define		CSEQSCRBISTEN		0x00000004
+#define		CSEQSCRBISTDN		0x00000002	/* ro */
+#define		CSEQSCRBISTFAIL		0x00000001	/* ro */
+
+#define CMAPPEDSCR			(CSEQ_HOST_REG_BASE_ADR	+ MAPPEDSCR)
+
+/*
+ * CSEQ CIO Bus Registers, Address Range : (0x0000-0x1FFC)
+ * 16 modes, each mode is 512 bytes.
+ * Unless specified, the register should valid for all modes.
+ */
+#define CSEQ_CIO_REG_BASE_ADR		REG_BASE_ADDR_CSEQCIO
+
+#define CSEQm_CIO_REG(Mode, Reg) \
+		(CSEQ_CIO_REG_BASE_ADR  + \
+		((u32) (Mode) * CSEQ_MODE_PAGE_SIZE) + (u32) (Reg))
+
+#define CMODEPTR	(CSEQ_CIO_REG_BASE_ADR + MODEPTR)
+
+#define CALTMODE	(CSEQ_CIO_REG_BASE_ADR + ALTMODE)
+
+#define CATOMICXCHG	(CSEQ_CIO_REG_BASE_ADR + ATOMICXCHG)
+
+#define CFLAG		(CSEQ_CIO_REG_BASE_ADR + FLAG)
+
+#define CARP2INTCTL	(CSEQ_CIO_REG_BASE_ADR + ARP2INTCTL)
+
+#define CSTACK		(CSEQ_CIO_REG_BASE_ADR + STACK)
+
+#define CFUNCTION1	(CSEQ_CIO_REG_BASE_ADR + FUNCTION1)
+
+#define CPRGMCNT	(CSEQ_CIO_REG_BASE_ADR + PRGMCNT)
+
+#define CACCUM		(CSEQ_CIO_REG_BASE_ADR + ACCUM)
+
+#define CSINDEX		(CSEQ_CIO_REG_BASE_ADR + SINDEX)
+
+#define CDINDEX		(CSEQ_CIO_REG_BASE_ADR + DINDEX)
+
+#define CALLONES	(CSEQ_CIO_REG_BASE_ADR + ALLONES)
+
+#define CALLZEROS	(CSEQ_CIO_REG_BASE_ADR + ALLZEROS)
+
+#define CSINDIR		(CSEQ_CIO_REG_BASE_ADR + SINDIR)
+
+#define CDINDIR		(CSEQ_CIO_REG_BASE_ADR + DINDIR)
+
+#define CJUMLDIR	(CSEQ_CIO_REG_BASE_ADR + JUMLDIR)
+
+#define CARP2HALTCODE	(CSEQ_CIO_REG_BASE_ADR + ARP2HALTCODE)
+
+#define CCURRADDR	(CSEQ_CIO_REG_BASE_ADR + CURRADDR)
+
+#define CLASTADDR	(CSEQ_CIO_REG_BASE_ADR + LASTADDR)
+
+#define CNXTLADDR	(CSEQ_CIO_REG_BASE_ADR + NXTLADDR)
+
+#define CDBGPORTPTR	(CSEQ_CIO_REG_BASE_ADR + DBGPORTPTR)
+
+#define CDBGPORT	(CSEQ_CIO_REG_BASE_ADR + DBGPORT)
+
+#define CSCRATCHPAGE	(CSEQ_CIO_REG_BASE_ADR + SCRATCHPAGE)
+
+#define CMnSCBPTR(Mode)       CSEQm_CIO_REG(Mode, MnSCBPTR)
+
+#define CMnDDBPTR(Mode)       CSEQm_CIO_REG(Mode, MnDDBPTR)
+
+#define CMnSCRATCHPAGE(Mode)		CSEQm_CIO_REG(Mode, MnSCRATCHPAGE)
+
+#define CLINKCON	(CSEQ_CIO_REG_BASE_ADR + 0x28)
+
+#define	CCIOAACESS	(CSEQ_CIO_REG_BASE_ADR + 0x2C)
+
+/* mode 0-7 */
+#define MnREQMBX 0x30
+#define CMnREQMBX(Mode)			CSEQm_CIO_REG(Mode, 0x30)
+
+/* mode 8 */
+#define CSEQCON				CSEQm_CIO_REG(8, 0x30)
+
+/* mode 0-7 */
+#define MnRSPMBX 0x34
+#define CMnRSPMBX(Mode)			CSEQm_CIO_REG(Mode, 0x34)
+
+/* mode 8 */
+#define CSEQCOMCTL			CSEQm_CIO_REG(8, 0x34)
+
+/* mode 8 */
+#define CSEQCOMSTAT			CSEQm_CIO_REG(8, 0x35)
+
+/* mode 8 */
+#define CSEQCOMINTEN			CSEQm_CIO_REG(8, 0x36)
+
+/* mode 8 */
+#define CSEQCOMDMACTL			CSEQm_CIO_REG(8, 0x37)
+
+#define		CSHALTERR		0x10
+#define		RESETCSDMA		0x08		/* wo */
+#define		STARTCSDMA		0x04
+#define		STOPCSDMA		0x02		/* wo */
+#define		CSDMAACT		0x01		/* ro */
+
+/* mode 0-7 */
+#define MnINT 0x38
+#define CMnINT(Mode)			CSEQm_CIO_REG(Mode, 0x38)
+
+#define		CMnREQMBXE		0x02
+#define		CMnRSPMBXF		0x01
+#define		CMnINT_MASK		0x00000003
+
+/* mode 8 */
+#define CSEQREQMBX			CSEQm_CIO_REG(8, 0x38)
+
+/* mode 0-7 */
+#define MnINTEN 0x3C
+#define CMnINTEN(Mode)			CSEQm_CIO_REG(Mode, 0x3C)
+
+#define		EN_CMnRSPMBXF		0x01
+
+/* mode 8 */
+#define CSEQRSPMBX			CSEQm_CIO_REG(8, 0x3C)
+
+/* mode 8 */
+#define CSDMAADR			CSEQm_CIO_REG(8, 0x40)
+
+/* mode 8 */
+#define CSDMACNT			CSEQm_CIO_REG(8, 0x48)
+
+/* mode 8 */
+#define CSEQDLCTL			CSEQm_CIO_REG(8, 0x4D)
+
+#define		DONELISTEND		0x10
+#define 	DONELISTSIZE_MASK	0x0F
+#define		DONELISTSIZE_8ELEM	0x01
+#define		DONELISTSIZE_16ELEM	0x02
+#define		DONELISTSIZE_32ELEM	0x03
+#define		DONELISTSIZE_64ELEM	0x04
+#define		DONELISTSIZE_128ELEM	0x05
+#define		DONELISTSIZE_256ELEM	0x06
+#define		DONELISTSIZE_512ELEM	0x07
+#define		DONELISTSIZE_1024ELEM	0x08
+#define		DONELISTSIZE_2048ELEM	0x09
+#define		DONELISTSIZE_4096ELEM	0x0A
+#define		DONELISTSIZE_8192ELEM	0x0B
+#define		DONELISTSIZE_16384ELEM	0x0C
+
+/* mode 8 */
+#define CSEQDLOFFS			CSEQm_CIO_REG(8, 0x4E)
+
+/* mode 11 */
+#define CM11INTVEC0			CSEQm_CIO_REG(11, 0x50)
+
+/* mode 11 */
+#define CM11INTVEC1			CSEQm_CIO_REG(11, 0x52)
+
+/* mode 11 */
+#define CM11INTVEC2			CSEQm_CIO_REG(11, 0x54)
+
+#define	CCONMSK	  			(CSEQ_CIO_REG_BASE_ADR + 0x60)
+
+#define	CCONEXIST			(CSEQ_CIO_REG_BASE_ADR + 0x61)
+
+#define	CCONMODE			(CSEQ_CIO_REG_BASE_ADR + 0x62)
+
+#define CTIMERCALC			(CSEQ_CIO_REG_BASE_ADR + 0x64)
+
+#define CINTDIS				(CSEQ_CIO_REG_BASE_ADR + 0x68)
+
+/* mode 8, 32x32 bits, 128 bytes of mapped buffer */
+#define CSBUFFER			CSEQm_CIO_REG(8, 0x80)
+
+#define	CSCRATCH			(CSEQ_CIO_REG_BASE_ADR + 0x1C0)
+
+/* mode 0-8 */
+#define CMnSCRATCH(Mode)		CSEQm_CIO_REG(Mode, 0x1E0)
+
+/*
+ * CSEQ Mapped Instruction RAM Page, Address Range : (0x0000-0x1FFC)
+ */
+#define CSEQ_RAM_REG_BASE_ADR		0xB8004000
+
+/*
+ * The common definitions below have the same address offset for all the Link
+ * sequencers.
+ */
+#define MODECTL		0x40
+
+#define DBGMODE		0x44
+
+#define CONTROL		0x48
+#define LEDTIMER		0x00010000
+#define LEDTIMERS_10us		0x00000000
+#define LEDTIMERS_1ms		0x00000800
+#define LEDTIMERS_100ms		0x00001000
+#define LEDMODE_TXRX		0x00000000
+#define LEDMODE_CONNECTED	0x00000200
+#define LEDPOL			0x00000100
+
+#define LSEQRAM		0x1000
+
+/*
+ * LmSEQ Host Registers, Address Range : (0x0000-0x3FFC)
+ */
+#define LSEQ0_HOST_REG_BASE_ADR		0xB8020000
+#define LSEQ1_HOST_REG_BASE_ADR		0xB8024000
+#define LSEQ2_HOST_REG_BASE_ADR		0xB8028000
+#define LSEQ3_HOST_REG_BASE_ADR		0xB802C000
+#define LSEQ4_HOST_REG_BASE_ADR		0xB8030000
+#define LSEQ5_HOST_REG_BASE_ADR		0xB8034000
+#define LSEQ6_HOST_REG_BASE_ADR		0xB8038000
+#define LSEQ7_HOST_REG_BASE_ADR		0xB803C000
+
+#define LmARP2CTL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2CTL)
+
+#define LmARP2INT(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2INT)
+
+#define LmARP2INTEN(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2INTEN)
+
+#define LmDBGMODE(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					DBGMODE)
+
+#define LmCONTROL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					CONTROL)
+
+#define LmARP2BREAKADR01(LinkNum)	(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2BREAKADR01)
+
+#define LmARP2BREAKADR23(LinkNum)	(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					ARP2BREAKADR23)
+
+#define LmMODECTL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					MODECTL)
+
+#define		LmAUTODISCI		0x08000000
+#define		LmDSBLBITLT		0x04000000
+#define		LmDSBLANTT		0x02000000
+#define		LmDSBLCRTT		0x01000000
+#define		LmDSBLCONT		0x00000100
+#define		LmPRIMODE		0x00000080
+#define		LmDSBLHOLD		0x00000040
+#define		LmDISACK		0x00000020
+#define		LmBLIND48		0x00000010
+#define		LmRCVMODE_MASK		0x0000000C
+#define		LmRCVMODE_PLD		0x00000000
+#define		LmRCVMODE_HPC		0x00000004
+
+#define LmDBGMODE(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					DBGMODE)
+
+#define		LmFRCPERR		0x80000000
+#define		LmMEMSEL_MASK		0x30000000
+#define		LmFRCRBPERR		0x00000000
+#define		LmFRCTBPERR		0x10000000
+#define		LmFRCSGBPERR		0x20000000
+#define		LmFRCARBPERR		0x30000000
+#define		LmRCVIDW		0x00080000
+#define		LmINVDWERR		0x00040000
+#define		LmRCVDISP		0x00004000
+#define		LmDISPERR		0x00002000
+#define		LmDSBLDSCR		0x00000800
+#define		LmDSBLSCR		0x00000400
+#define		LmFRCNAK		0x00000200
+#define		LmFRCROFS		0x00000100
+#define		LmFRCCRC		0x00000080
+#define		LmFRMTYPE_MASK		0x00000070
+#define		LmSG_DATA		0x00000000
+#define		LmSG_COMMAND		0x00000010
+#define		LmSG_TASK		0x00000020
+#define		LmSG_TGTXFER		0x00000030
+#define		LmSG_RESPONSE		0x00000040
+#define		LmSG_IDENADDR		0x00000050
+#define		LmSG_OPENADDR		0x00000060
+#define		LmDISCRCGEN		0x00000008
+#define		LmDISCRCCHK		0x00000004
+#define		LmSSXMTFRM		0x00000002
+#define		LmSSRCVFRM		0x00000001
+
+#define LmCONTROL(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					CONTROL)
+
+#define		LmSTEPXMTFRM		0x00000002
+#define		LmSTEPRCVFRM		0x00000001
+
+#define LmBISTCTL0(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	  \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) + \
+					BISTCTL0)
+
+#define		ARBBISTEN		0x40000000
+#define		ARBBISTDN		0x20000000	/* ro */
+#define		ARBBISTFAIL		0x10000000	/* ro */
+#define		TBBISTEN		0x00000400
+#define		TBBISTDN		0x00000200	/* ro */
+#define		TBBISTFAIL		0x00000100	/* ro */
+#define		RBBISTEN		0x00000040
+#define		RBBISTDN		0x00000020	/* ro */
+#define		RBBISTFAIL		0x00000010	/* ro */
+#define		SGBISTEN		0x00000004
+#define		SGBISTDN		0x00000002	/* ro */
+#define		SGBISTFAIL		0x00000001	/* ro */
+
+#define LmBISTCTL1(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	 \
+					((LinkNum)*LmSEQ_HOST_REG_SIZE) +\
+					BISTCTL1)
+
+#define		LmRAMPAGE1		0x00000200
+#define		LmRAMPAGE0		0x00000100
+#define		LmIMEMBISTEN		0x00000040
+#define		LmIMEMBISTDN		0x00000020	/* ro */
+#define		LmIMEMBISTFAIL		0x00000010	/* ro */
+#define		LmSCRBISTEN		0x00000004
+#define		LmSCRBISTDN		0x00000002	/* ro */
+#define		LmSCRBISTFAIL		0x00000001	/* ro */
+#define		LmRAMPAGE		(LmRAMPAGE1 + LmRAMPAGE0)
+#define		LmRAMPAGE_LSHIFT	0x8
+
+#define LmSCRATCH(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	   \
+					((LinkNum) * LmSEQ_HOST_REG_SIZE) +\
+					MAPPEDSCR)
+
+#define LmSEQRAM(LinkNum)		(LSEQ0_HOST_REG_BASE_ADR +	   \
+					((LinkNum) * LmSEQ_HOST_REG_SIZE) +\
+					LSEQRAM)
+
+/*
+ * LmSEQ CIO Bus Register, Address Range : (0x0000-0xFFC)
+ * 8 modes, each mode is 512 bytes.
+ * Unless specified, the register should valid for all modes.
+ */
+#define LmSEQ_CIOBUS_REG_BASE		0x2000
+
+#define  LmSEQ_PHY_BASE(Mode, LinkNum) \
+		(LSEQ0_HOST_REG_BASE_ADR + \
+		(LmSEQ_HOST_REG_SIZE * (u32) (LinkNum)) + \
+		LmSEQ_CIOBUS_REG_BASE + \
+		((u32) (Mode) * LmSEQ_MODE_PAGE_SIZE))
+
+#define  LmSEQ_PHY_REG(Mode, LinkNum, Reg) \
+                 (LmSEQ_PHY_BASE(Mode, LinkNum) + (u32) (Reg))
+
+#define LmMODEPTR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, MODEPTR)
+
+#define LmALTMODE(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ALTMODE)
+
+#define LmATOMICXCHG(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ATOMICXCHG)
+
+#define LmFLAG(LinkNum)			LmSEQ_PHY_REG(0, LinkNum, FLAG)
+
+#define LmARP2INTCTL(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ARP2INTCTL)
+
+#define LmSTACK(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, STACK)
+
+#define LmFUNCTION1(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, FUNCTION1)
+
+#define LmPRGMCNT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, PRGMCNT)
+
+#define LmACCUM(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ACCUM)
+
+#define LmSINDEX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, SINDEX)
+
+#define LmDINDEX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DINDEX)
+
+#define LmALLONES(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ALLONES)
+
+#define LmALLZEROS(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ALLZEROS)
+
+#define LmSINDIR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, SINDIR)
+
+#define LmDINDIR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DINDIR)
+
+#define LmJUMLDIR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, JUMLDIR)
+
+#define LmARP2HALTCODE(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, ARP2HALTCODE)
+
+#define LmCURRADDR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, CURRADDR)
+
+#define LmLASTADDR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, LASTADDR)
+
+#define LmNXTLADDR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, NXTLADDR)
+
+#define LmDBGPORTPTR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DBGPORTPTR)
+
+#define LmDBGPORT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, DBGPORT)
+
+#define LmSCRATCHPAGE(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, SCRATCHPAGE)
+
+#define LmMnSCRATCHPAGE(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 	\
+						      MnSCRATCHPAGE)
+
+#define LmTIMERCALC(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x28)
+
+#define LmREQMBX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x30)
+
+#define LmRSPMBX(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x34)
+
+#define LmMnINT(LinkNum, Mode)		LmSEQ_PHY_REG(Mode, LinkNum, 0x38)
+
+#define		CTXMEMSIZE		0x80000000	/* ro */
+#define		LmACKREQ		0x08000000
+#define		LmNAKREQ		0x04000000
+#define		LmMnXMTERR		0x02000000
+#define		LmM5OOBSVC		0x01000000
+#define		LmHWTINT		0x00800000
+#define		LmMnCTXDONE		0x00100000
+#define		LmM2REQMBXF		0x00080000
+#define		LmM2RSPMBXE		0x00040000
+#define		LmMnDMAERR		0x00020000
+#define		LmRCVPRIM		0x00010000
+#define		LmRCVERR		0x00008000
+#define		LmADDRRCV		0x00004000
+#define		LmMnHDRMISS		0x00002000
+#define		LmMnWAITSCB		0x00001000
+#define		LmMnRLSSCB		0x00000800
+#define		LmMnSAVECTX		0x00000400
+#define		LmMnFETCHSG		0x00000200
+#define		LmMnLOADCTX		0x00000100
+#define		LmMnCFGICL		0x00000080
+#define		LmMnCFGSATA		0x00000040
+#define		LmMnCFGEXPSATA		0x00000020
+#define		LmMnCFGCMPLT		0x00000010
+#define		LmMnCFGRBUF		0x00000008
+#define		LmMnSAVETTR		0x00000004
+#define		LmMnCFGRDAT		0x00000002
+#define		LmMnCFGHDR		0x00000001
+
+#define LmMnINTEN(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x3C)
+
+#define		EN_LmACKREQ		0x08000000
+#define		EN_LmNAKREQ		0x04000000
+#define		EN_LmMnXMTERR		0x02000000
+#define		EN_LmM5OOBSVC		0x01000000
+#define		EN_LmHWTINT		0x00800000
+#define		EN_LmMnCTXDONE		0x00100000
+#define		EN_LmM2REQMBXF		0x00080000
+#define		EN_LmM2RSPMBXE		0x00040000
+#define		EN_LmMnDMAERR		0x00020000
+#define		EN_LmRCVPRIM		0x00010000
+#define		EN_LmRCVERR		0x00008000
+#define		EN_LmADDRRCV		0x00004000
+#define		EN_LmMnHDRMISS		0x00002000
+#define		EN_LmMnWAITSCB		0x00001000
+#define		EN_LmMnRLSSCB		0x00000800
+#define		EN_LmMnSAVECTX		0x00000400
+#define		EN_LmMnFETCHSG		0x00000200
+#define		EN_LmMnLOADCTX		0x00000100
+#define		EN_LmMnCFGICL		0x00000080
+#define		EN_LmMnCFGSATA		0x00000040
+#define		EN_LmMnCFGEXPSATA	0x00000020
+#define		EN_LmMnCFGCMPLT		0x00000010
+#define		EN_LmMnCFGRBUF		0x00000008
+#define		EN_LmMnSAVETTR		0x00000004
+#define		EN_LmMnCFGRDAT		0x00000002
+#define		EN_LmMnCFGHDR		0x00000001
+
+#define		LmM0INTEN_MASK		(EN_LmMnCFGCMPLT | EN_LmMnCFGRBUF | \
+					 EN_LmMnSAVETTR | EN_LmMnCFGRDAT | \
+					 EN_LmMnCFGHDR | EN_LmRCVERR | \
+					 EN_LmADDRRCV | EN_LmMnHDRMISS | \
+					 EN_LmMnRLSSCB | EN_LmMnSAVECTX | \
+					 EN_LmMnFETCHSG | EN_LmMnLOADCTX | \
+					 EN_LmHWTINT | EN_LmMnCTXDONE | \
+					 EN_LmRCVPRIM | EN_LmMnCFGSATA | \
+					 EN_LmMnCFGEXPSATA | EN_LmMnDMAERR)
+
+#define		LmM1INTEN_MASK		(EN_LmMnCFGCMPLT | EN_LmADDRRCV | \
+					 EN_LmMnRLSSCB | EN_LmMnSAVECTX | \
+					 EN_LmMnFETCHSG | EN_LmMnLOADCTX | \
+					 EN_LmMnXMTERR | EN_LmHWTINT | \
+					 EN_LmMnCTXDONE | EN_LmRCVPRIM | \
+					 EN_LmRCVERR | EN_LmMnDMAERR)
+
+#define		LmM2INTEN_MASK		(EN_LmADDRRCV | EN_LmHWTINT | \
+					 EN_LmM2REQMBXF | EN_LmRCVPRIM | \
+					 EN_LmRCVERR)
+
+#define		LmM5INTEN_MASK		(EN_LmADDRRCV | EN_LmM5OOBSVC | \
+					 EN_LmHWTINT | EN_LmRCVPRIM | \
+					 EN_LmRCVERR)
+
+#define LmXMTPRIMD(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x40)
+
+#define LmXMTPRIMCS(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x44)
+
+#define LmCONSTAT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x45)
+
+#define LmMnDMAERRS(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x46)
+
+#define LmMnSGDMAERRS(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x47)
+
+#define LmM0EXPHDRP(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x48)
+
+#define LmM1SASALIGN(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x48)
+#define SAS_ALIGN_DEFAULT		0xFF
+
+#define LmM0MSKHDRP(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x49)
+
+#define LmM1STPALIGN(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x49)
+#define STP_ALIGN_DEFAULT		0x1F
+
+#define LmM0RCVHDRP(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x4A)
+
+#define LmM1XMTHDRP(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x4A)
+
+#define LmM0ICLADR(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x4B)
+
+#define LmM1ALIGNMODE(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x4B)
+
+#define		LmDISALIGN		0x20
+#define		LmROTSTPALIGN		0x10
+#define		LmSTPALIGN		0x08
+#define		LmROTNOTIFY		0x04
+#define		LmDUALALIGN		0x02
+#define		LmROTALIGN		0x01
+
+#define LmM0EXPRCVNT(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x4C)
+
+#define LmM1XMTCNT(LinkNum)		LmSEQ_PHY_REG(1, LinkNum, 0x4C)
+
+#define LmMnBUFSTAT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x4E)
+
+#define		LmMnBUFPERR		0x01
+
+/* mode 0-1 */
+#define LmMnXFRLVL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x59)
+
+#define		LmMnXFRLVL_128		0x05
+#define		LmMnXFRLVL_256		0x04
+#define		LmMnXFRLVL_512		0x03
+#define		LmMnXFRLVL_1024		0x02
+#define		LmMnXFRLVL_1536		0x01
+#define		LmMnXFRLVL_2048		0x00
+
+ /* mode 0-1 */
+#define LmMnSGDMACTL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5A)
+
+#define 	LmMnRESETSG		0x04
+#define 	LmMnSTOPSG		0x02
+#define 	LmMnSTARTSG		0x01
+
+/* mode 0-1 */
+#define LmMnSGDMASTAT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5B)
+
+/* mode 0-1 */
+#define LmMnDDMACTL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5C)
+
+#define 	LmMnFLUSH		0x40		/* wo */
+#define 	LmMnRLSRTRY		0x20		/* wo */
+#define 	LmMnDISCARD		0x10		/* wo */
+#define 	LmMnRESETDAT		0x08		/* wo */
+#define 	LmMnSUSDAT		0x04		/* wo */
+#define 	LmMnSTOPDAT		0x02		/* wo */
+#define 	LmMnSTARTDAT		0x01		/* wo */
+
+/* mode 0-1 */
+#define LmMnDDMASTAT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5D)
+
+#define		LmMnDPEMPTY		0x80
+#define		LmMnFLUSHING		0x40
+#define		LmMnDDMAREQ		0x20
+#define		LmMnHDMAREQ		0x10
+#define		LmMnDATFREE		0x08
+#define		LmMnDATSUS		0x04
+#define		LmMnDATACT		0x02
+#define		LmMnDATEN		0x01
+
+/* mode 0-1 */
+#define LmMnDDMAMODE(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x5E)
+
+#define 	LmMnDMATYPE_NORMAL		0x0000
+#define 	LmMnDMATYPE_HOST_ONLY_TX	0x0001
+#define 	LmMnDMATYPE_DEVICE_ONLY_TX	0x0002
+#define 	LmMnDMATYPE_INVALID		0x0003
+#define 	LmMnDMATYPE_MASK	0x0003
+
+#define 	LmMnDMAWRAP		0x0004
+#define 	LmMnBITBUCKET		0x0008
+#define 	LmMnDISHDR		0x0010
+#define 	LmMnSTPCRC		0x0020
+#define 	LmXTEST			0x0040
+#define 	LmMnDISCRC		0x0080
+#define 	LmMnENINTLK		0x0100
+#define 	LmMnADDRFRM		0x0400
+#define 	LmMnENXMTCRC		0x0800
+
+/* mode 0-1 */
+#define LmMnXFRCNT(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x70)
+
+/* mode 0-1 */
+#define LmMnDPSEL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x7B)
+#define 	LmMnDPSEL_MASK		0x07
+#define 	LmMnEOLPRE		0x40
+#define 	LmMnEOSPRE		0x80
+
+/* Registers used in conjunction with LmMnDPSEL and LmMnDPACC registers */
+/* Receive Mode n = 0 */
+#define LmMnHRADDR			0x00
+#define LmMnHBYTECNT			0x01
+#define LmMnHREWIND			0x02
+#define LmMnDWADDR			0x03
+#define LmMnDSPACECNT			0x04
+#define LmMnDFRMSIZE			0x05
+
+/* Registers used in conjunction with LmMnDPSEL and LmMnDPACC registers */
+/* Transmit Mode n = 1 */
+#define LmMnHWADDR			0x00
+#define LmMnHSPACECNT			0x01
+/* #define LmMnHREWIND			0x02 */
+#define LmMnDRADDR			0x03
+#define LmMnDBYTECNT			0x04
+/* #define LmMnDFRMSIZE			0x05 */
+
+/* mode 0-1 */
+#define LmMnDPACC(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x78)
+#define 	LmMnDPACC_MASK		0x00FFFFFF
+
+/* mode 0-1 */
+#define LmMnHOLDLVL(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x7D)
+
+#define LmPRMSTAT0(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x80)
+#define LmPRMSTAT0BYTE0			0x80
+#define LmPRMSTAT0BYTE1			0x81
+#define LmPRMSTAT0BYTE2			0x82
+#define LmPRMSTAT0BYTE3			0x83
+
+#define		LmFRAMERCVD		0x80000000
+#define		LmXFRRDYRCVD		0x40000000
+#define		LmUNKNOWNP		0x20000000
+#define		LmBREAK			0x10000000
+#define		LmDONE			0x08000000
+#define		LmOPENACPT		0x04000000
+#define		LmOPENRJCT		0x02000000
+#define		LmOPENRTRY		0x01000000
+#define		LmCLOSERV1		0x00800000
+#define		LmCLOSERV0		0x00400000
+#define		LmCLOSENORM		0x00200000
+#define		LmCLOSECLAF		0x00100000
+#define		LmNOTIFYRV2		0x00080000
+#define		LmNOTIFYRV1		0x00040000
+#define		LmNOTIFYRV0		0x00020000
+#define		LmNOTIFYSPIN		0x00010000
+#define		LmBROADRV4		0x00008000
+#define		LmBROADRV3		0x00004000
+#define		LmBROADRV2		0x00002000
+#define		LmBROADRV1		0x00001000
+#define		LmBROADSES		0x00000800
+#define		LmBROADRVCH1		0x00000400
+#define		LmBROADRVCH0		0x00000200
+#define		LmBROADCH		0x00000100
+#define		LmAIPRVWP		0x00000080
+#define		LmAIPWP			0x00000040
+#define		LmAIPWD			0x00000020
+#define		LmAIPWC			0x00000010
+#define		LmAIPRV2		0x00000008
+#define		LmAIPRV1		0x00000004
+#define		LmAIPRV0		0x00000002
+#define		LmAIPNRML		0x00000001
+
+#define		LmBROADCAST_MASK	(LmBROADCH | LmBROADRVCH0 | \
+					 LmBROADRVCH1)
+
+#define LmPRMSTAT1(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0x84)
+#define LmPRMSTAT1BYTE0			0x84
+#define LmPRMSTAT1BYTE1			0x85
+#define LmPRMSTAT1BYTE2			0x86
+#define LmPRMSTAT1BYTE3			0x87
+
+#define		LmFRMRCVDSTAT		0x80000000
+#define		LmBREAK_DET		0x04000000
+#define		LmCLOSE_DET		0x02000000
+#define		LmDONE_DET		0x01000000
+#define		LmXRDY			0x00040000
+#define 	LmSYNCSRST		0x00020000
+#define 	LmSYNC			0x00010000
+#define 	LmXHOLD			0x00008000
+#define 	LmRRDY			0x00004000
+#define 	LmHOLD			0x00002000
+#define 	LmROK			0x00001000
+#define 	LmRIP			0x00000800
+#define 	LmCRBLK			0x00000400
+#define 	LmACK			0x00000200
+#define 	LmNAK			0x00000100
+#define 	LmHARDRST		0x00000080
+#define 	LmERROR			0x00000040
+#define 	LmRERR			0x00000020
+#define 	LmPMREQP		0x00000010
+#define 	LmPMREQS		0x00000008
+#define 	LmPMACK			0x00000004
+#define 	LmPMNAK			0x00000002
+#define 	LmDMAT			0x00000001
+
+/* mode 1 */
+#define	LmMnSATAFS(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x7E)
+#define	LmMnXMTSIZE(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0x93)
+
+/* mode 0 */
+#define LmMnFRMERR(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0xB0)
+
+#define		LmACRCERR		0x00000800
+#define		LmPHYOVRN		0x00000400
+#define		LmOBOVRN		0x00000200
+#define 	LmMnZERODATA		0x00000100
+#define		LmSATAINTLK		0x00000080
+#define		LmMnCRCERR		0x00000020
+#define		LmRRDYOVRN		0x00000010
+#define		LmMISSSOAF		0x00000008
+#define		LmMISSSOF		0x00000004
+#define		LmMISSEOAF		0x00000002
+#define		LmMISSEOF		0x00000001
+
+#define LmFRMERREN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xB4)
+
+#define 	EN_LmACRCERR		0x00000800
+#define 	EN_LmPHYOVRN		0x00000400
+#define 	EN_LmOBOVRN		0x00000200
+#define 	EN_LmMnZERODATA		0x00000100
+#define 	EN_LmSATAINTLK		0x00000080
+#define 	EN_LmFRMBAD		0x00000040
+#define 	EN_LmMnCRCERR		0x00000020
+#define 	EN_LmRRDYOVRN		0x00000010
+#define 	EN_LmMISSSOAF		0x00000008
+#define 	EN_LmMISSSOF		0x00000004
+#define 	EN_LmMISSEOAF		0x00000002
+#define 	EN_LmMISSEOF		0x00000001
+
+#define 	LmFRMERREN_MASK  	(EN_LmSATAINTLK | EN_LmMnCRCERR | \
+					 EN_LmRRDYOVRN | EN_LmMISSSOF | \
+					 EN_LmMISSEOAF | EN_LmMISSEOF | \
+					 EN_LmACRCERR | LmPHYOVRN | \
+					 EN_LmOBOVRN | EN_LmMnZERODATA)
+
+#define LmHWTSTATEN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xC5)
+
+#define		EN_LmDONETO		0x80
+#define		EN_LmINVDISP		0x40
+#define		EN_LmINVDW		0x20
+#define		EN_LmDWSEVENT		0x08
+#define		EN_LmCRTTTO		0x04
+#define		EN_LmANTTTO		0x02
+#define		EN_LmBITLTTO		0x01
+
+#define		LmHWTSTATEN_MASK	(EN_LmINVDISP | EN_LmINVDW | \
+					 EN_LmDWSEVENT | EN_LmCRTTTO | \
+					 EN_LmANTTTO | EN_LmDONETO | \
+					 EN_LmBITLTTO)
+
+#define LmHWTSTAT(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xC7)
+
+#define		LmDONETO		0x80
+#define		LmINVDISP		0x40
+#define		LmINVDW			0x20
+#define		LmDWSEVENT		0x08
+#define		LmCRTTTO		0x04
+#define		LmANTTTO		0x02
+#define		LmBITLTTO		0x01
+
+#define LmMnDATABUFADR(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0xC8)
+#define		LmDATABUFADR_MASK	0x0FFF
+
+#define LmMnDATABUF(LinkNum, Mode)	LmSEQ_PHY_REG(Mode, LinkNum, 0xCA)
+
+#define	LmPRIMSTAT0EN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xE0)
+
+#define 	EN_LmUNKNOWNP 		0x20000000
+#define 	EN_LmBREAK		0x10000000
+#define 	EN_LmDONE		0x08000000
+#define 	EN_LmOPENACPT		0x04000000
+#define 	EN_LmOPENRJCT		0x02000000
+#define 	EN_LmOPENRTRY		0x01000000
+#define 	EN_LmCLOSERV1		0x00800000
+#define 	EN_LmCLOSERV0		0x00400000
+#define 	EN_LmCLOSENORM		0x00200000
+#define 	EN_LmCLOSECLAF		0x00100000
+#define 	EN_LmNOTIFYRV2		0x00080000
+#define 	EN_LmNOTIFYRV1		0x00040000
+#define 	EN_LmNOTIFYRV0		0x00020000
+#define 	EN_LmNOTIFYSPIN		0x00010000
+#define 	EN_LmBROADRV4		0x00008000
+#define 	EN_LmBROADRV3		0x00004000
+#define 	EN_LmBROADRV2		0x00002000
+#define 	EN_LmBROADRV1		0x00001000
+#define 	EN_LmBROADRV0		0x00000800
+#define 	EN_LmBROADRVCH1		0x00000400
+#define 	EN_LmBROADRVCH0		0x00000200
+#define 	EN_LmBROADCH		0x00000100
+#define 	EN_LmAIPRVWP		0x00000080
+#define 	EN_LmAIPWP		0x00000040
+#define 	EN_LmAIPWD		0x00000020
+#define 	EN_LmAIPWC		0x00000010
+#define 	EN_LmAIPRV2		0x00000008
+#define 	EN_LmAIPRV1		0x00000004
+#define 	EN_LmAIPRV0		0x00000002
+#define 	EN_LmAIPNRML		0x00000001
+
+#define		LmPRIMSTAT0EN_MASK	(EN_LmBREAK | \
+					 EN_LmDONE | EN_LmOPENACPT | \
+					 EN_LmOPENRJCT | EN_LmOPENRTRY | \
+					 EN_LmCLOSERV1 | EN_LmCLOSERV0 | \
+					 EN_LmCLOSENORM | EN_LmCLOSECLAF | \
+					 EN_LmBROADRV4 | EN_LmBROADRV3 | \
+					 EN_LmBROADRV2 | EN_LmBROADRV1 | \
+					 EN_LmBROADRV0 | EN_LmBROADRVCH1 | \
+					 EN_LmBROADRVCH0 | EN_LmBROADCH | \
+					 EN_LmAIPRVWP | EN_LmAIPWP | \
+					 EN_LmAIPWD | EN_LmAIPWC | \
+					 EN_LmAIPRV2 | EN_LmAIPRV1 | \
+					 EN_LmAIPRV0 | EN_LmAIPNRML)
+
+#define LmPRIMSTAT1EN(LinkNum)		LmSEQ_PHY_REG(0, LinkNum, 0xE4)
+
+#define		EN_LmXRDY		0x00040000
+#define		EN_LmSYNCSRST		0x00020000
+#define		EN_LmSYNC		0x00010000
+#define 	EN_LmXHOLD		0x00008000
+#define 	EN_LmRRDY		0x00004000
+#define 	EN_LmHOLD		0x00002000
+#define 	EN_LmROK		0x00001000
+#define 	EN_LmRIP		0x00000800
+#define 	EN_LmCRBLK		0x00000400
+#define 	EN_LmACK		0x00000200
+#define 	EN_LmNAK		0x00000100
+#define 	EN_LmHARDRST		0x00000080
+#define 	EN_LmERROR		0x00000040
+#define 	EN_LmRERR		0x00000020
+#define 	EN_LmPMREQP		0x00000010
+#define 	EN_LmPMREQS		0x00000008
+#define 	EN_LmPMACK		0x00000004
+#define 	EN_LmPMNAK		0x00000002
+#define 	EN_LmDMAT		0x00000001
+
+#define LmPRIMSTAT1EN_MASK		(EN_LmHARDRST | \
+					 EN_LmSYNCSRST | \
+					 EN_LmPMREQP | EN_LmPMREQS | \
+					 EN_LmPMACK | EN_LmPMNAK)
+
+#define LmSMSTATE(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xE8)
+
+#define LmSMSTATEBRK(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xEC)
+
+#define LmSMDBGCTL(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xF0)
+
+
+/*
+ * LmSEQ CIO Bus Mode 3 Register.
+ * Mode 3: Configuration and Setup, IOP Context SCB.
+ */
+#define LmM3SATATIMER(LinkNum) 		LmSEQ_PHY_REG(3, LinkNum, 0x48)
+
+#define LmM3INTVEC0(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x90)
+
+#define LmM3INTVEC1(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x92)
+
+#define LmM3INTVEC2(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x94)
+
+#define LmM3INTVEC3(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x96)
+
+#define LmM3INTVEC4(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x98)
+
+#define LmM3INTVEC5(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x9A)
+
+#define LmM3INTVEC6(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x9C)
+
+#define LmM3INTVEC7(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0x9E)
+
+#define LmM3INTVEC8(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xA4)
+
+#define LmM3INTVEC9(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xA6)
+
+#define LmM3INTVEC10(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xB0)
+
+#define LmM3FRMGAP(LinkNum)		LmSEQ_PHY_REG(3, LinkNum, 0xB4)
+
+#define LmBITL_TIMER(LinkNum) 		LmSEQ_PHY_REG(0, LinkNum, 0xA2)
+
+#define LmWWN(LinkNum) 			LmSEQ_PHY_REG(0, LinkNum, 0xA8)
+
+
+/*
+ * LmSEQ CIO Bus Mode 5 Registers.
+ * Mode 5: Phy/OOB Control and Status.
+ */
+#define LmSEQ_OOB_REG(phy_id, reg)	LmSEQ_PHY_REG(5, (phy_id), (reg))
+
+#define OOB_BFLTR	0x100
+
+#define		BFLTR_THR_MASK		0xF0
+#define		BFLTR_TC_MASK		0x0F
+
+#define OOB_INIT_MIN	0x102
+
+#define OOB_INIT_MAX	0x104
+
+#define OOB_INIT_NEG	0x106
+
+#define	OOB_SAS_MIN	0x108
+
+#define OOB_SAS_MAX	0x10A
+
+#define OOB_SAS_NEG	0x10C
+
+#define OOB_WAKE_MIN	0x10E
+
+#define OOB_WAKE_MAX	0x110
+
+#define OOB_WAKE_NEG	0x112
+
+#define OOB_IDLE_MAX	0x114
+
+#define OOB_BURST_MAX	0x116
+
+#define OOB_DATA_KBITS	0x126
+
+#define OOB_ALIGN_0_DATA	0x12C
+
+#define OOB_ALIGN_1_DATA	0x130
+
+#define D10_2_DATA_k		0x00
+#define SYNC_DATA_k		0x02
+#define ALIGN_1_DATA_k		0x04
+#define ALIGN_0_DATA_k		0x08
+#define BURST_DATA_k		0x10
+
+#define OOB_PHY_RESET_COUNT	0x13C
+
+#define OOB_SIG_GEN	0x140
+
+#define		START_OOB		0x80
+#define		START_DWS		0x40
+#define		ALIGN_CNT3		0x30
+#define 	ALIGN_CNT2		0x20
+#define 	ALIGN_CNT1		0x10
+#define 	ALIGN_CNT4		0x00
+#define		STOP_DWS		0x08
+#define		SEND_COMSAS		0x04
+#define		SEND_COMINIT		0x02
+#define		SEND_COMWAKE		0x01
+
+#define OOB_XMIT	0x141
+
+#define		TX_ENABLE		0x80
+#define		XMIT_OOB_BURST		0x10
+#define		XMIT_D10_2		0x08
+#define		XMIT_SYNC		0x04
+#define		XMIT_ALIGN_1		0x02
+#define		XMIT_ALIGN_0		0x01
+
+#define FUNCTION_MASK	0x142
+
+#define		SAS_MODE_DIS		0x80
+#define		SATA_MODE_DIS		0x40
+#define		SPINUP_HOLD_DIS		0x20
+#define		HOT_PLUG_DIS		0x10
+#define		SATA_PS_DIS		0x08
+#define		FUNCTION_MASK_DEFAULT	(SPINUP_HOLD_DIS | SATA_PS_DIS)
+
+#define OOB_MODE	0x143
+
+#define		SAS_MODE		0x80
+#define		SATA_MODE		0x40
+#define		SLOW_CLK		0x20
+#define		FORCE_XMIT_15		0x08
+#define		PHY_SPEED_60		0x04
+#define		PHY_SPEED_30		0x02
+#define		PHY_SPEED_15		0x01
+
+#define	CURRENT_STATUS	0x144
+
+#define		CURRENT_OOB_DONE	0x80
+#define		CURRENT_LOSS_OF_SIGNAL	0x40
+#define		CURRENT_SPINUP_HOLD	0x20
+#define		CURRENT_HOT_PLUG_CNCT	0x10
+#define		CURRENT_GTO_TIMEOUT	0x08
+#define		CURRENT_OOB_TIMEOUT	0x04
+#define		CURRENT_DEVICE_PRESENT	0x02
+#define		CURRENT_OOB_ERROR	0x01
+
+#define 	CURRENT_OOB1_ERROR	(CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_GTO_TIMEOUT)
+
+#define 	CURRENT_OOB2_ERROR	(CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_OOB_ERROR)
+
+#define		DEVICE_ADDED_W_CNT	(CURRENT_OOB_DONE | \
+					 CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_DEVICE_PRESENT)
+
+#define		DEVICE_ADDED_WO_CNT	(CURRENT_OOB_DONE | \
+					 CURRENT_DEVICE_PRESENT)
+
+#define 	DEVICE_REMOVED		CURRENT_LOSS_OF_SIGNAL
+
+#define		CURRENT_PHY_MASK	(CURRENT_OOB_DONE | \
+					 CURRENT_LOSS_OF_SIGNAL | \
+					 CURRENT_SPINUP_HOLD | \
+					 CURRENT_HOT_PLUG_CNCT | \
+					 CURRENT_GTO_TIMEOUT | \
+					 CURRENT_DEVICE_PRESENT | \
+					 CURRENT_OOB_ERROR )
+
+#define		CURRENT_ERR_MASK	(CURRENT_LOSS_OF_SIGNAL | \
+					 CURRENT_GTO_TIMEOUT | \
+					 CURRENT_OOB_TIMEOUT | \
+					 CURRENT_OOB_ERROR )
+
+#define SPEED_MASK	0x145
+
+#define		SATA_SPEED_30_DIS	0x10
+#define		SATA_SPEED_15_DIS	0x08
+#define		SAS_SPEED_60_DIS	0x04
+#define		SAS_SPEED_30_DIS	0x02
+#define		SAS_SPEED_15_DIS	0x01
+#define		SAS_SPEED_MASK_DEFAULT	0x00
+
+#define OOB_TIMER_ENABLE	0x14D
+
+#define		HOT_PLUG_EN		0x80
+#define		RCD_EN			0x40
+#define 	COMTIMER_EN		0x20
+#define		SNTT_EN			0x10
+#define		SNLT_EN			0x04
+#define		SNWT_EN			0x02
+#define		ALIGN_EN		0x01
+
+#define OOB_STATUS		0x14E
+
+#define		OOB_DONE		0x80
+#define		LOSS_OF_SIGNAL		0x40		/* ro */
+#define		SPINUP_HOLD		0x20
+#define		HOT_PLUG_CNCT		0x10		/* ro */
+#define		GTO_TIMEOUT		0x08		/* ro */
+#define		OOB_TIMEOUT		0x04		/* ro */
+#define		DEVICE_PRESENT		0x02		/* ro */
+#define		OOB_ERROR		0x01		/* ro */
+
+#define		OOB_STATUS_ERROR_MASK	(LOSS_OF_SIGNAL | GTO_TIMEOUT | \
+					 OOB_TIMEOUT | OOB_ERROR)
+
+#define OOB_STATUS_CLEAR	0x14F
+
+#define		OOB_DONE_CLR		0x80
+#define		LOSS_OF_SIGNAL_CLR 	0x40
+#define		SPINUP_HOLD_CLR		0x20
+#define		HOT_PLUG_CNCT_CLR     	0x10
+#define		GTO_TIMEOUT_CLR		0x08
+#define		OOB_TIMEOUT_CLR		0x04
+#define		OOB_ERROR_CLR		0x01
+
+#define HOT_PLUG_DELAY		0x150
+/* In 5 ms units. 20 = 100 ms. */
+#define	HOTPLUG_DELAY_TIMEOUT		20
+
+
+#define INT_ENABLE_2		0x15A
+
+#define		OOB_DONE_EN		0x80
+#define		LOSS_OF_SIGNAL_EN	0x40
+#define		SPINUP_HOLD_EN		0x20
+#define		HOT_PLUG_CNCT_EN	0x10
+#define		GTO_TIMEOUT_EN		0x08
+#define		OOB_TIMEOUT_EN		0x04
+#define		DEVICE_PRESENT_EN	0x02
+#define		OOB_ERROR_EN		0x01
+
+#define PHY_CONTROL_0		0x160
+
+#define		PHY_LOWPWREN_TX		0x80
+#define		PHY_LOWPWREN_RX		0x40
+#define		SPARE_REG_160_B5	0x20
+#define		OFFSET_CANCEL_RX	0x10
+
+/* bits 3:2 */
+#define		PHY_RXCOMCENTER_60V	0x00
+#define		PHY_RXCOMCENTER_70V	0x04
+#define		PHY_RXCOMCENTER_80V	0x08
+#define		PHY_RXCOMCENTER_90V	0x0C
+#define 	PHY_RXCOMCENTER_MASK	0x0C
+
+#define		PHY_RESET		0x02
+#define		SAS_DEFAULT_SEL		0x01
+
+#define PHY_CONTROL_1		0x161
+
+/* bits 2:0 */
+#define		SATA_PHY_DETLEVEL_50mv	0x00
+#define		SATA_PHY_DETLEVEL_75mv	0x01
+#define		SATA_PHY_DETLEVEL_100mv	0x02
+#define		SATA_PHY_DETLEVEL_125mv	0x03
+#define		SATA_PHY_DETLEVEL_150mv	0x04
+#define		SATA_PHY_DETLEVEL_175mv	0x05
+#define		SATA_PHY_DETLEVEL_200mv	0x06
+#define		SATA_PHY_DETLEVEL_225mv	0x07
+#define		SATA_PHY_DETLEVEL_MASK	0x07
+
+/* bits 5:3 */
+#define		SAS_PHY_DETLEVEL_50mv	0x00
+#define		SAS_PHY_DETLEVEL_75mv	0x08
+#define		SAS_PHY_DETLEVEL_100mv	0x10
+#define		SAS_PHY_DETLEVEL_125mv	0x11
+#define		SAS_PHY_DETLEVEL_150mv	0x20
+#define		SAS_PHY_DETLEVEL_175mv	0x21
+#define		SAS_PHY_DETLEVEL_200mv	0x30
+#define		SAS_PHY_DETLEVEL_225mv	0x31
+#define		SAS_PHY_DETLEVEL_MASK	0x38
+
+#define PHY_CONTROL_2		0x162
+
+/* bits 7:5 */
+#define 	SATA_PHY_DRV_400mv	0x00
+#define 	SATA_PHY_DRV_450mv	0x20
+#define 	SATA_PHY_DRV_500mv	0x40
+#define 	SATA_PHY_DRV_550mv	0x60
+#define 	SATA_PHY_DRV_600mv	0x80
+#define 	SATA_PHY_DRV_650mv	0xA0
+#define 	SATA_PHY_DRV_725mv	0xC0
+#define 	SATA_PHY_DRV_800mv	0xE0
+#define		SATA_PHY_DRV_MASK	0xE0
+
+/* bits 4:3 */
+#define 	SATA_PREEMP_0		0x00
+#define 	SATA_PREEMP_1		0x08
+#define 	SATA_PREEMP_2		0x10
+#define 	SATA_PREEMP_3		0x18
+#define 	SATA_PREEMP_MASK	0x18
+
+#define 	SATA_CMSH1P5		0x04
+
+/* bits 1:0 */
+#define 	SATA_SLEW_0		0x00
+#define 	SATA_SLEW_1		0x01
+#define 	SATA_SLEW_2		0x02
+#define 	SATA_SLEW_3		0x03
+#define 	SATA_SLEW_MASK		0x03
+
+#define PHY_CONTROL_3		0x163
+
+/* bits 7:5 */
+#define 	SAS_PHY_DRV_400mv	0x00
+#define 	SAS_PHY_DRV_450mv	0x20
+#define 	SAS_PHY_DRV_500mv	0x40
+#define 	SAS_PHY_DRV_550mv	0x60
+#define 	SAS_PHY_DRV_600mv	0x80
+#define 	SAS_PHY_DRV_650mv	0xA0
+#define 	SAS_PHY_DRV_725mv	0xC0
+#define 	SAS_PHY_DRV_800mv	0xE0
+#define		SAS_PHY_DRV_MASK	0xE0
+
+/* bits 4:3 */
+#define 	SAS_PREEMP_0		0x00
+#define 	SAS_PREEMP_1		0x08
+#define 	SAS_PREEMP_2		0x10
+#define 	SAS_PREEMP_3		0x18
+#define 	SAS_PREEMP_MASK		0x18
+
+#define 	SAS_CMSH1P5		0x04
+
+/* bits 1:0 */
+#define 	SAS_SLEW_0		0x00
+#define 	SAS_SLEW_1		0x01
+#define 	SAS_SLEW_2		0x02
+#define 	SAS_SLEW_3		0x03
+#define 	SAS_SLEW_MASK		0x03
+
+#define PHY_CONTROL_4		0x168
+
+#define		PHY_DONE_CAL_TX		0x80
+#define		PHY_DONE_CAL_RX		0x40
+#define		RX_TERM_LOAD_DIS	0x20
+#define		TX_TERM_LOAD_DIS	0x10
+#define		AUTO_TERM_CAL_DIS	0x08
+#define		PHY_SIGDET_FLTR_EN	0x04
+#define		OSC_FREQ		0x02
+#define		PHY_START_CAL		0x01
+
+/*
+ * HST_PCIX2 Registers, Addresss Range: (0x00-0xFC)
+ */
+#define PCIX_REG_BASE_ADR		0xB8040000
+
+#define PCIC_VENDOR_ID	0x00
+
+#define PCIC_DEVICE_ID	0x02
+
+#define PCIC_COMMAND	0x04
+
+#define		INT_DIS			0x0400
+#define		FBB_EN			0x0200		/* ro */
+#define		SERR_EN			0x0100
+#define		STEP_EN			0x0080		/* ro */
+#define		PERR_EN			0x0040
+#define		VGA_EN			0x0020		/* ro */
+#define		MWI_EN			0x0010
+#define		SPC_EN			0x0008
+#define		MST_EN			0x0004
+#define		MEM_EN			0x0002
+#define		IO_EN			0x0001
+
+#define	PCIC_STATUS	0x06
+
+#define		PERR_DET		0x8000
+#define		SERR_GEN		0x4000
+#define		MABT_DET		0x2000
+#define		TABT_DET		0x1000
+#define		TABT_GEN		0x0800
+#define		DPERR_DET		0x0100
+#define		CAP_LIST		0x0010
+#define		INT_STAT		0x0008
+
+#define	PCIC_DEVREV_ID	0x08
+
+#define	PCIC_CLASS_CODE	0x09
+
+#define	PCIC_CACHELINE_SIZE	0x0C
+
+#define	PCIC_MBAR0	0x10
+
+#define 	PCIC_MBAR0_OFFSET	0
+
+#define	PCIC_MBAR1	0x18
+
+#define 	PCIC_MBAR1_OFFSET	2
+
+#define	PCIC_IOBAR	0x20
+
+#define 	PCIC_IOBAR_OFFSET	4
+
+#define	PCIC_SUBVENDOR_ID	0x2C
+
+#define PCIC_SUBSYTEM_ID	0x2E
+
+#define PCIX_STATUS		0x44
+#define 	RCV_SCE		0x20000000
+#define 	UNEXP_SC	0x00080000
+#define 	SC_DISCARD	0x00040000
+
+#define ECC_CTRL_STAT		0x48
+#define 	UNCOR_ECCERR	0x00000008
+
+#define PCIC_PM_CSR		0x5C
+
+#define		PWR_STATE_D0		0
+#define		PWR_STATE_D1		1	/* not supported */
+#define		PWR_STATE_D2		2 	/* not supported */
+#define		PWR_STATE_D3		3
+
+#define PCIC_BASE1	0x6C	/* internal use only */
+
+#define		BASE1_RSVD		0xFFFFFFF8
+
+#define PCIC_BASEA	0x70	/* internal use only */
+
+#define		BASEA_RSVD		0xFFFFFFC0
+#define 	BASEA_START		0
+
+#define PCIC_BASEB	0x74	/* internal use only */
+
+#define		BASEB_RSVD		0xFFFFFF80
+#define		BASEB_IOMAP_MASK	0x7F
+#define 	BASEB_START		0x80
+
+#define PCIC_BASEC	0x78	/* internal use only */
+
+#define		BASEC_RSVD		0xFFFFFFFC
+#define 	BASEC_MASK		0x03
+#define 	BASEC_START		0x58
+
+#define PCIC_MBAR_KEY	0x7C	/* internal use only */
+
+#define 	MBAR_KEY_MASK		0xFFFFFFFF
+
+#define PCIC_HSTPCIX_CNTRL	0xA0
+
+#define 	REWIND_DIS		0x0800
+#define		SC_TMR_DIS		0x04000000
+
+#define PCIC_MBAR0_MASK	0xA8
+#define		PCIC_MBAR0_SIZE_MASK 	0x1FFFE000
+#define		PCIC_MBAR0_SIZE_SHIFT 	13
+#define		PCIC_MBAR0_SIZE(val)	\
+		    (((val) & PCIC_MBAR0_SIZE_MASK) >> PCIC_MBAR0_SIZE_SHIFT)
+
+#define PCIC_FLASH_MBAR	0xB8
+
+#define PCIC_INTRPT_STAT 0xD4
+
+#define PCIC_TP_CTRL	0xFC
+
+/*
+ * EXSI Registers, Addresss Range: (0x00-0xFC)
+ */
+#define EXSI_REG_BASE_ADR		REG_BASE_ADDR_EXSI
+
+#define	EXSICNFGR	(EXSI_REG_BASE_ADR + 0x00)
+
+#define		OCMINITIALIZED		0x80000000
+#define		ASIEN			0x00400000
+#define		HCMODE			0x00200000
+#define		PCIDEF			0x00100000
+#define		COMSTOCK		0x00080000
+#define		SEEPROMEND		0x00040000
+#define		MSTTIMEN		0x00020000
+#define		XREGEX			0x00000200
+#define		NVRAMW			0x00000100
+#define		NVRAMEX			0x00000080
+#define		SRAMW			0x00000040
+#define		SRAMEX			0x00000020
+#define		FLASHW			0x00000010
+#define		FLASHEX			0x00000008
+#define		SEEPROMCFG		0x00000004
+#define		SEEPROMTYP		0x00000002
+#define		SEEPROMEX		0x00000001
+
+
+#define EXSICNTRLR	(EXSI_REG_BASE_ADR + 0x04)
+
+#define		MODINT_EN		0x00000001
+
+
+#define PMSTATR		(EXSI_REG_BASE_ADR + 0x10)
+
+#define		FLASHRST		0x00000002
+#define		FLASHRDY		0x00000001
+
+
+#define FLCNFGR		(EXSI_REG_BASE_ADR + 0x14)
+
+#define		FLWEH_MASK		0x30000000
+#define		FLWESU_MASK		0x0C000000
+#define		FLWEPW_MASK		0x03F00000
+#define		FLOEH_MASK		0x000C0000
+#define 	FLOESU_MASK		0x00030000
+#define 	FLOEPW_MASK		0x0000FC00
+#define 	FLCSH_MASK		0x00000300
+#define 	FLCSSU_MASK		0x000000C0
+#define 	FLCSPW_MASK		0x0000003F
+
+#define SRCNFGR		(EXSI_REG_BASE_ADR + 0x18)
+
+#define		SRWEH_MASK		0x30000000
+#define		SRWESU_MASK		0x0C000000
+#define		SRWEPW_MASK		0x03F00000
+
+#define		SROEH_MASK		0x000C0000
+#define 	SROESU_MASK		0x00030000
+#define 	SROEPW_MASK		0x0000FC00
+#define		SRCSH_MASK		0x00000300
+#define		SRCSSU_MASK		0x000000C0
+#define		SRCSPW_MASK		0x0000003F
+
+#define NVCNFGR		(EXSI_REG_BASE_ADR + 0x1C)
+
+#define 	NVWEH_MASK		0x30000000
+#define 	NVWESU_MASK		0x0C000000
+#define 	NVWEPW_MASK		0x03F00000
+#define 	NVOEH_MASK		0x000C0000
+#define 	NVOESU_MASK		0x00030000
+#define 	NVOEPW_MASK		0x0000FC00
+#define 	NVCSH_MASK		0x00000300
+#define 	NVCSSU_MASK		0x000000C0
+#define 	NVCSPW_MASK		0x0000003F
+
+#define XRCNFGR		(EXSI_REG_BASE_ADR + 0x20)
+
+#define 	XRWEH_MASK		0x30000000
+#define 	XRWESU_MASK		0x0C000000
+#define 	XRWEPW_MASK		0x03F00000
+#define 	XROEH_MASK		0x000C0000
+#define 	XROESU_MASK		0x00030000
+#define 	XROEPW_MASK		0x0000FC00
+#define 	XRCSH_MASK		0x00000300
+#define 	XRCSSU_MASK		0x000000C0
+#define		XRCSPW_MASK		0x0000003F
+
+#define XREGADDR	(EXSI_REG_BASE_ADR + 0x24)
+
+#define 	XRADDRINCEN		0x80000000
+#define 	XREGADD_MASK		0x007FFFFF
+
+
+#define XREGDATAR	(EXSI_REG_BASE_ADR + 0x28)
+
+#define		XREGDATA_MASK 		0x0000FFFF
+
+#define GPIOOER		(EXSI_REG_BASE_ADR + 0x40)
+
+#define GPIOODENR	(EXSI_REG_BASE_ADR + 0x44)
+
+#define GPIOINVR	(EXSI_REG_BASE_ADR + 0x48)
+
+#define GPIODATAOR	(EXSI_REG_BASE_ADR + 0x4C)
+
+#define GPIODATAIR	(EXSI_REG_BASE_ADR + 0x50)
+
+#define GPIOCNFGR	(EXSI_REG_BASE_ADR + 0x54)
+
+#define		GPIO_EXTSRC		0x00000001
+
+#define SCNTRLR		(EXSI_REG_BASE_ADR + 0xA0)
+
+#define 	SXFERDONE		0x00000100
+#define 	SXFERCNT_MASK		0x000000E0
+#define 	SCMDTYP_MASK		0x0000001C
+#define 	SXFERSTART		0x00000002
+#define 	SXFEREN			0x00000001
+
+#define	SRATER		(EXSI_REG_BASE_ADR + 0xA4)
+
+#define	SADDRR		(EXSI_REG_BASE_ADR + 0xA8)
+
+#define 	SADDR_MASK		0x0000FFFF
+
+#define SDATAOR		(EXSI_REG_BASE_ADR + 0xAC)
+
+#define	SDATAOR0	(EXSI_REG_BASE_ADR + 0xAC)
+#define SDATAOR1	(EXSI_REG_BASE_ADR + 0xAD)
+#define SDATAOR2	(EXSI_REG_BASE_ADR + 0xAE)
+#define SDATAOR3	(EXSI_REG_BASE_ADR + 0xAF)
+
+#define SDATAIR		(EXSI_REG_BASE_ADR + 0xB0)
+
+#define SDATAIR0	(EXSI_REG_BASE_ADR + 0xB0)
+#define SDATAIR1	(EXSI_REG_BASE_ADR + 0xB1)
+#define SDATAIR2	(EXSI_REG_BASE_ADR + 0xB2)
+#define SDATAIR3	(EXSI_REG_BASE_ADR + 0xB3)
+
+#define ASISTAT0R	(EXSI_REG_BASE_ADR + 0xD0)
+#define 	ASIFMTERR		0x00000400
+#define 	ASISEECHKERR		0x00000200
+#define 	ASIERR			0x00000100
+
+#define ASISTAT1R	(EXSI_REG_BASE_ADR + 0xD4)
+#define 	CHECKSUM_MASK		0x0000FFFF
+
+#define ASIERRADDR	(EXSI_REG_BASE_ADR + 0xD8)
+#define ASIERRDATAR	(EXSI_REG_BASE_ADR + 0xDC)
+#define ASIERRSTATR	(EXSI_REG_BASE_ADR + 0xE0)
+#define 	CPI2ASIBYTECNT_MASK	0x00070000
+#define 	CPI2ASIBYTEEN_MASK      0x0000F000
+#define 	CPI2ASITARGERR_MASK	0x00000F00
+#define 	CPI2ASITARGMID_MASK	0x000000F0
+#define 	CPI2ASIMSTERR_MASK	0x0000000F
+
+/*
+ * XSRAM, External SRAM (DWord and any BE pattern accessible)
+ */
+#define XSRAM_REG_BASE_ADDR             0xB8100000
+#define XSRAM_SIZE                        0x100000
+
+/*
+ * NVRAM Registers, Address Range: (0x00000 - 0x3FFFF).
+ */
+#define		NVRAM_REG_BASE_ADR	0xBF800000
+#define		NVRAM_MAX_BASE_ADR	0x003FFFFF
+
+/* OCM base address */
+#define		OCM_BASE_ADDR		0xA0000000
+#define		OCM_MAX_SIZE		0x20000
+
+/*
+ * Sequencers (Central and Link) Scratch RAM page definitions.
+ */
+
+/*
+ * The Central Management Sequencer (CSEQ) Scratch Memory is a 1024
+ * byte memory.  It is dword accessible and has byte parity
+ * protection. The CSEQ accesses it in 32 byte windows, either as mode
+ * dependent or mode independent memory. Each mode has 96 bytes,
+ * (three 32 byte pages 0-2, not contiguous), leaving 128 bytes of
+ * Mode Independent memory (four 32 byte pages 3-7). Note that mode
+ * dependent scratch memory, Mode 8, page 0-3 overlaps mode
+ * independent scratch memory, pages 0-3.
+ * - 896 bytes of mode dependent scratch, 96 bytes per Modes 0-7, and
+ * 128 bytes in mode 8,
+ * - 259 bytes of mode independent scratch, common to modes 0-15.
+ *
+ * Sequencer scratch RAM is 1024 bytes.  This scratch memory is
+ * divided into mode dependent and mode independent scratch with this
+ * memory further subdivided into pages of size 32 bytes. There are 5
+ * pages (160 bytes) of mode independent scratch and 3 pages of
+ * dependent scratch memory for modes 0-7 (768 bytes). Mode 8 pages
+ * 0-2 dependent scratch overlap with pages 0-2 of mode independent
+ * scratch memory.
+ *
+ * The host accesses this scratch in a different manner from the
+ * central sequencer. The sequencer has to use CSEQ registers CSCRPAGE
+ * and CMnSCRPAGE to access the scratch memory. A flat mapping of the
+ * scratch memory is avaliable for software convenience and to prevent
+ * corruption while the sequencer is running. This memory is mapped
+ * onto addresses 800h - BFFh, total of 400h bytes.
+ *
+ * These addresses are mapped as follows:
+ *
+ *        800h-83Fh   Mode Dependent Scratch Mode 0 Pages 0-1
+ *        840h-87Fh   Mode Dependent Scratch Mode 1 Pages 0-1
+ *        880h-8BFh   Mode Dependent Scratch Mode 2 Pages 0-1
+ *        8C0h-8FFh   Mode Dependent Scratch Mode 3 Pages 0-1
+ *        900h-93Fh   Mode Dependent Scratch Mode 4 Pages 0-1
+ *        940h-97Fh   Mode Dependent Scratch Mode 5 Pages 0-1
+ *        980h-9BFh   Mode Dependent Scratch Mode 6 Pages 0-1
+ *        9C0h-9FFh   Mode Dependent Scratch Mode 7 Pages 0-1
+ *        A00h-A5Fh   Mode Dependent Scratch Mode 8 Pages 0-2
+ *                    Mode Independent Scratch Pages 0-2
+ *        A60h-A7Fh   Mode Dependent Scratch Mode 8 Page 3
+ *                    Mode Independent Scratch Page 3
+ *        A80h-AFFh   Mode Independent Scratch Pages 4-7
+ *        B00h-B1Fh   Mode Dependent Scratch Mode 0 Page 2
+ *        B20h-B3Fh   Mode Dependent Scratch Mode 1 Page 2
+ *        B40h-B5Fh   Mode Dependent Scratch Mode 2 Page 2
+ *        B60h-B7Fh   Mode Dependent Scratch Mode 3 Page 2
+ *        B80h-B9Fh   Mode Dependent Scratch Mode 4 Page 2
+ *        BA0h-BBFh   Mode Dependent Scratch Mode 5 Page 2
+ *        BC0h-BDFh   Mode Dependent Scratch Mode 6 Page 2
+ *        BE0h-BFFh   Mode Dependent Scratch Mode 7 Page 2
+ */
+
+/* General macros */
+#define CSEQ_PAGE_SIZE			32  /* Scratch page size (in bytes) */
+
+/* All macros start with offsets from base + 0x800 (CMAPPEDSCR).
+ * Mode dependent scratch page 0, mode 0.
+ * For modes 1-7 you have to do arithmetic. */
+#define CSEQ_LRM_SAVE_SINDEX		(CMAPPEDSCR + 0x0000)
+#define CSEQ_LRM_SAVE_SCBPTR		(CMAPPEDSCR + 0x0002)
+#define CSEQ_Q_LINK_HEAD		(CMAPPEDSCR + 0x0004)
+#define CSEQ_Q_LINK_TAIL		(CMAPPEDSCR + 0x0006)
+#define CSEQ_LRM_SAVE_SCRPAGE		(CMAPPEDSCR + 0x0008)
+
+/* Mode dependent scratch page 0 mode 8 macros. */
+#define CSEQ_RET_ADDR			(CMAPPEDSCR + 0x0200)
+#define CSEQ_RET_SCBPTR			(CMAPPEDSCR + 0x0202)
+#define CSEQ_SAVE_SCBPTR		(CMAPPEDSCR + 0x0204)
+#define CSEQ_EMPTY_TRANS_CTX		(CMAPPEDSCR + 0x0206)
+#define CSEQ_RESP_LEN			(CMAPPEDSCR + 0x0208)
+#define CSEQ_TMF_SCBPTR			(CMAPPEDSCR + 0x020A)
+#define CSEQ_GLOBAL_PREV_SCB		(CMAPPEDSCR + 0x020C)
+#define CSEQ_GLOBAL_HEAD		(CMAPPEDSCR + 0x020E)
+#define CSEQ_CLEAR_LU_HEAD		(CMAPPEDSCR + 0x0210)
+#define CSEQ_TMF_OPCODE			(CMAPPEDSCR + 0x0212)
+#define CSEQ_SCRATCH_FLAGS		(CMAPPEDSCR + 0x0213)
+#define CSEQ_HSB_SITE                   (CMAPPEDSCR + 0x021A)
+#define CSEQ_FIRST_INV_SCB_SITE		(CMAPPEDSCR + 0x021C)
+#define CSEQ_FIRST_INV_DDB_SITE		(CMAPPEDSCR + 0x021E)
+
+/* Mode dependent scratch page 1 mode 8 macros. */
+#define CSEQ_LUN_TO_CLEAR		(CMAPPEDSCR + 0x0220)
+#define CSEQ_LUN_TO_CHECK		(CMAPPEDSCR + 0x0228)
+
+/* Mode dependent scratch page 2 mode 8 macros */
+#define CSEQ_HQ_NEW_POINTER		(CMAPPEDSCR + 0x0240)
+#define CSEQ_HQ_DONE_BASE		(CMAPPEDSCR + 0x0248)
+#define CSEQ_HQ_DONE_POINTER		(CMAPPEDSCR + 0x0250)
+#define CSEQ_HQ_DONE_PASS		(CMAPPEDSCR + 0x0254)
+
+/* Mode independent scratch page 4 macros. */
+#define CSEQ_Q_EXE_HEAD			(CMAPPEDSCR + 0x0280)
+#define CSEQ_Q_EXE_TAIL			(CMAPPEDSCR + 0x0282)
+#define CSEQ_Q_DONE_HEAD                (CMAPPEDSCR + 0x0284)
+#define CSEQ_Q_DONE_TAIL                (CMAPPEDSCR + 0x0286)
+#define CSEQ_Q_SEND_HEAD		(CMAPPEDSCR + 0x0288)
+#define CSEQ_Q_SEND_TAIL		(CMAPPEDSCR + 0x028A)
+#define CSEQ_Q_DMA2CHIM_HEAD		(CMAPPEDSCR + 0x028C)
+#define CSEQ_Q_DMA2CHIM_TAIL		(CMAPPEDSCR + 0x028E)
+#define CSEQ_Q_COPY_HEAD		(CMAPPEDSCR + 0x0290)
+#define CSEQ_Q_COPY_TAIL		(CMAPPEDSCR + 0x0292)
+#define CSEQ_REG0			(CMAPPEDSCR + 0x0294)
+#define CSEQ_REG1			(CMAPPEDSCR + 0x0296)
+#define CSEQ_REG2			(CMAPPEDSCR + 0x0298)
+#define CSEQ_LINK_CTL_Q_MAP		(CMAPPEDSCR + 0x029C)
+#define CSEQ_MAX_CSEQ_MODE		(CMAPPEDSCR + 0x029D)
+#define CSEQ_FREE_LIST_HACK_COUNT	(CMAPPEDSCR + 0x029E)
+
+/* Mode independent scratch page 5 macros. */
+#define CSEQ_EST_NEXUS_REQ_QUEUE	(CMAPPEDSCR + 0x02A0)
+#define CSEQ_EST_NEXUS_REQ_COUNT	(CMAPPEDSCR + 0x02A8)
+#define CSEQ_Q_EST_NEXUS_HEAD		(CMAPPEDSCR + 0x02B0)
+#define CSEQ_Q_EST_NEXUS_TAIL		(CMAPPEDSCR + 0x02B2)
+#define CSEQ_NEED_EST_NEXUS_SCB		(CMAPPEDSCR + 0x02B4)
+#define CSEQ_EST_NEXUS_REQ_HEAD		(CMAPPEDSCR + 0x02B6)
+#define CSEQ_EST_NEXUS_REQ_TAIL		(CMAPPEDSCR + 0x02B7)
+#define CSEQ_EST_NEXUS_SCB_OFFSET	(CMAPPEDSCR + 0x02B8)
+
+/* Mode independent scratch page 6 macros. */
+#define CSEQ_INT_ROUT_RET_ADDR0		(CMAPPEDSCR + 0x02C0)
+#define CSEQ_INT_ROUT_RET_ADDR1		(CMAPPEDSCR + 0x02C2)
+#define CSEQ_INT_ROUT_SCBPTR		(CMAPPEDSCR + 0x02C4)
+#define CSEQ_INT_ROUT_MODE		(CMAPPEDSCR + 0x02C6)
+#define CSEQ_ISR_SCRATCH_FLAGS		(CMAPPEDSCR + 0x02C7)
+#define CSEQ_ISR_SAVE_SINDEX		(CMAPPEDSCR + 0x02C8)
+#define CSEQ_ISR_SAVE_DINDEX		(CMAPPEDSCR + 0x02CA)
+#define CSEQ_Q_MONIRTT_HEAD		(CMAPPEDSCR + 0x02D0)
+#define CSEQ_Q_MONIRTT_TAIL		(CMAPPEDSCR + 0x02D2)
+#define CSEQ_FREE_SCB_MASK		(CMAPPEDSCR + 0x02D5)
+#define CSEQ_BUILTIN_FREE_SCB_HEAD	(CMAPPEDSCR + 0x02D6)
+#define CSEQ_BUILTIN_FREE_SCB_TAIL	(CMAPPEDSCR + 0x02D8)
+#define CSEQ_EXTENDED_FREE_SCB_HEAD	(CMAPPEDSCR + 0x02DA)
+#define CSEQ_EXTENDED_FREE_SCB_TAIL	(CMAPPEDSCR + 0x02DC)
+
+/* Mode independent scratch page 7 macros. */
+#define CSEQ_EMPTY_REQ_QUEUE		(CMAPPEDSCR + 0x02E0)
+#define CSEQ_EMPTY_REQ_COUNT		(CMAPPEDSCR + 0x02E8)
+#define CSEQ_Q_EMPTY_HEAD		(CMAPPEDSCR + 0x02F0)
+#define CSEQ_Q_EMPTY_TAIL		(CMAPPEDSCR + 0x02F2)
+#define CSEQ_NEED_EMPTY_SCB		(CMAPPEDSCR + 0x02F4)
+#define CSEQ_EMPTY_REQ_HEAD		(CMAPPEDSCR + 0x02F6)
+#define CSEQ_EMPTY_REQ_TAIL		(CMAPPEDSCR + 0x02F7)
+#define CSEQ_EMPTY_SCB_OFFSET		(CMAPPEDSCR + 0x02F8)
+#define CSEQ_PRIMITIVE_DATA		(CMAPPEDSCR + 0x02FA)
+#define CSEQ_TIMEOUT_CONST		(CMAPPEDSCR + 0x02FC)
+
+/***************************************************************************
+* Link m Sequencer scratch RAM is 512 bytes.
+* This scratch memory is divided into mode dependent and mode
+* independent scratch with this memory further subdivided into
+* pages of size 32 bytes. There are 4 pages (128 bytes) of
+* mode independent scratch and 4 pages of dependent scratch
+* memory for modes 0-2 (384 bytes).
+*
+* The host accesses this scratch in a different manner from the
+* link sequencer. The sequencer has to use LSEQ registers
+* LmSCRPAGE and LmMnSCRPAGE to access the scratch memory. A flat
+* mapping of the scratch memory is avaliable for software
+* convenience and to prevent corruption while the sequencer is
+* running. This memory is mapped onto addresses 800h - 9FFh.
+*
+* These addresses are mapped as follows:
+*
+*        800h-85Fh   Mode Dependent Scratch Mode 0 Pages 0-2
+*        860h-87Fh   Mode Dependent Scratch Mode 0 Page 3
+*                    Mode Dependent Scratch Mode 5 Page 0
+*        880h-8DFh   Mode Dependent Scratch Mode 1 Pages 0-2
+*        8E0h-8FFh   Mode Dependent Scratch Mode 1 Page 3
+*                    Mode Dependent Scratch Mode 5 Page 1
+*        900h-95Fh   Mode Dependent Scratch Mode 2 Pages 0-2
+*        960h-97Fh   Mode Dependent Scratch Mode 2 Page 3
+*                    Mode Dependent Scratch Mode 5 Page 2
+*        980h-9DFh   Mode Independent Scratch Pages 0-3
+*        9E0h-9FFh   Mode Independent Scratch Page 3
+*                    Mode Dependent Scratch Mode 5 Page 3
+*
+****************************************************************************/
+/* General macros */
+#define LSEQ_MODE_SCRATCH_SIZE		0x80 /* Size of scratch RAM per mode */
+#define LSEQ_PAGE_SIZE			0x20 /* Scratch page size (in bytes) */
+#define LSEQ_MODE5_PAGE0_OFFSET 	0x60
+
+/* Common mode dependent scratch page 0 macros for modes 0,1,2, and 5 */
+/* Indexed using LSEQ_MODE_SCRATCH_SIZE * mode, for modes 0,1,2. */
+#define LmSEQ_RET_ADDR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0000)
+#define LmSEQ_REG0_MODE(LinkNum)	(LmSCRATCH(LinkNum) + 0x0002)
+#define LmSEQ_MODE_FLAGS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0004)
+
+/* Mode flag macros (byte 0) */
+#define		SAS_SAVECTX_OCCURRED		0x80
+#define		SAS_OOBSVC_OCCURRED		0x40
+#define		SAS_OOB_DEVICE_PRESENT		0x20
+#define		SAS_CFGHDR_OCCURRED		0x10
+#define		SAS_RCV_INTS_ARE_DISABLED	0x08
+#define		SAS_OOB_HOT_PLUG_CNCT		0x04
+#define		SAS_AWAIT_OPEN_CONNECTION	0x02
+#define		SAS_CFGCMPLT_OCCURRED		0x01
+
+/* Mode flag macros (byte 1) */
+#define		SAS_RLSSCB_OCCURRED		0x80
+#define		SAS_FORCED_HEADER_MISS		0x40
+
+#define LmSEQ_RET_ADDR2(LinkNum)	(LmSCRATCH(LinkNum) + 0x0006)
+#define LmSEQ_RET_ADDR1(LinkNum)	(LmSCRATCH(LinkNum) + 0x0008)
+#define LmSEQ_OPCODE_TO_CSEQ(LinkNum)	(LmSCRATCH(LinkNum) + 0x000B)
+#define LmSEQ_DATA_TO_CSEQ(LinkNum)	(LmSCRATCH(LinkNum) + 0x000C)
+
+/* Mode dependent scratch page 0 macros for mode 0 (non-common) */
+/* Absolute offsets */
+#define LmSEQ_FIRST_INV_DDB_SITE(LinkNum)	(LmSCRATCH(LinkNum) + 0x000E)
+#define LmSEQ_EMPTY_TRANS_CTX(LinkNum)		(LmSCRATCH(LinkNum) + 0x0010)
+#define LmSEQ_RESP_LEN(LinkNum)			(LmSCRATCH(LinkNum) + 0x0012)
+#define LmSEQ_FIRST_INV_SCB_SITE(LinkNum)	(LmSCRATCH(LinkNum) + 0x0014)
+#define LmSEQ_INTEN_SAVE(LinkNum)		(LmSCRATCH(LinkNum) + 0x0016)
+#define LmSEQ_LINK_RST_FRM_LEN(LinkNum)		(LmSCRATCH(LinkNum) + 0x001A)
+#define LmSEQ_LINK_RST_PROTOCOL(LinkNum)	(LmSCRATCH(LinkNum) + 0x001B)
+#define LmSEQ_RESP_STATUS(LinkNum)		(LmSCRATCH(LinkNum) + 0x001C)
+#define LmSEQ_LAST_LOADED_SGE(LinkNum)		(LmSCRATCH(LinkNum) + 0x001D)
+#define LmSEQ_SAVE_SCBPTR(LinkNum)		(LmSCRATCH(LinkNum) + 0x001E)
+
+/* Mode dependent scratch page 0 macros for mode 1 (non-common) */
+/* Absolute offsets */
+#define LmSEQ_Q_XMIT_HEAD(LinkNum)		(LmSCRATCH(LinkNum) + 0x008E)
+#define LmSEQ_M1_EMPTY_TRANS_CTX(LinkNum)	(LmSCRATCH(LinkNum) + 0x0090)
+#define LmSEQ_INI_CONN_TAG(LinkNum)		(LmSCRATCH(LinkNum) + 0x0092)
+#define LmSEQ_FAILED_OPEN_STATUS(LinkNum)	(LmSCRATCH(LinkNum) + 0x009A)
+#define LmSEQ_XMIT_REQUEST_TYPE(LinkNum)	(LmSCRATCH(LinkNum) + 0x009B)
+#define LmSEQ_M1_RESP_STATUS(LinkNum)		(LmSCRATCH(LinkNum) + 0x009C)
+#define LmSEQ_M1_LAST_LOADED_SGE(LinkNum)	(LmSCRATCH(LinkNum) + 0x009D)
+#define LmSEQ_M1_SAVE_SCBPTR(LinkNum)		(LmSCRATCH(LinkNum) + 0x009E)
+
+/* Mode dependent scratch page 0 macros for mode 2 (non-common) */
+#define LmSEQ_PORT_COUNTER(LinkNum)		(LmSCRATCH(LinkNum) + 0x010E)
+#define LmSEQ_PM_TABLE_PTR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0110)
+#define LmSEQ_SATA_INTERLOCK_TMR_SAVE(LinkNum)	(LmSCRATCH(LinkNum) + 0x0112)
+#define LmSEQ_IP_BITL(LinkNum)			(LmSCRATCH(LinkNum) + 0x0114)
+#define LmSEQ_COPY_SMP_CONN_TAG(LinkNum)	(LmSCRATCH(LinkNum) + 0x0116)
+#define LmSEQ_P0M2_OFFS1AH(LinkNum)		(LmSCRATCH(LinkNum) + 0x011A)
+
+/* Mode dependent scratch page 0 macros for modes 4/5 (non-common) */
+/* Absolute offsets */
+#define LmSEQ_SAVED_OOB_STATUS(LinkNum)		(LmSCRATCH(LinkNum) + 0x006E)
+#define LmSEQ_SAVED_OOB_MODE(LinkNum)		(LmSCRATCH(LinkNum) + 0x006F)
+#define LmSEQ_Q_LINK_HEAD(LinkNum)		(LmSCRATCH(LinkNum) + 0x0070)
+#define LmSEQ_LINK_RST_ERR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0072)
+#define LmSEQ_SAVED_OOB_SIGNALS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0073)
+#define LmSEQ_SAS_RESET_MODE(LinkNum)		(LmSCRATCH(LinkNum) + 0x0074)
+#define LmSEQ_LINK_RESET_RETRY_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x0075)
+#define LmSEQ_NUM_LINK_RESET_RETRIES(LinkNum)	(LmSCRATCH(LinkNum) + 0x0076)
+#define LmSEQ_OOB_INT_ENABLES(LinkNum)		(LmSCRATCH(LinkNum) + 0x007A)
+#define LmSEQ_NOTIFY_TIMER_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x007C)
+#define LmSEQ_NOTIFY_TIMER_DOWN_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x007E)
+
+/* Mode dependent scratch page 1, mode 0 and mode 1 */
+#define LmSEQ_SG_LIST_PTR_ADDR0(LinkNum)        (LmSCRATCH(LinkNum) + 0x0020)
+#define LmSEQ_SG_LIST_PTR_ADDR1(LinkNum)        (LmSCRATCH(LinkNum) + 0x0030)
+#define LmSEQ_M1_SG_LIST_PTR_ADDR0(LinkNum)     (LmSCRATCH(LinkNum) + 0x00A0)
+#define LmSEQ_M1_SG_LIST_PTR_ADDR1(LinkNum)     (LmSCRATCH(LinkNum) + 0x00B0)
+
+/* Mode dependent scratch page 1 macros for mode 2 */
+/* Absolute offsets */
+#define LmSEQ_INVALID_DWORD_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x0120)
+#define LmSEQ_DISPARITY_ERROR_COUNT(LinkNum) 	(LmSCRATCH(LinkNum) + 0x0124)
+#define LmSEQ_LOSS_OF_SYNC_COUNT(LinkNum)	(LmSCRATCH(LinkNum) + 0x0128)
+
+/* Mode dependent scratch page 1 macros for mode 4/5 */
+#define LmSEQ_FRAME_TYPE_MASK(LinkNum)	      (LmSCRATCH(LinkNum) + 0x00E0)
+#define LmSEQ_HASHED_DEST_ADDR_MASK(LinkNum)  (LmSCRATCH(LinkNum) + 0x00E1)
+#define LmSEQ_HASHED_SRC_ADDR_MASK_PRINT(LinkNum) (LmSCRATCH(LinkNum) + 0x00E4)
+#define LmSEQ_HASHED_SRC_ADDR_MASK(LinkNum)   (LmSCRATCH(LinkNum) + 0x00E5)
+#define LmSEQ_NUM_FILL_BYTES_MASK(LinkNum)    (LmSCRATCH(LinkNum) + 0x00EB)
+#define LmSEQ_TAG_MASK(LinkNum)		      (LmSCRATCH(LinkNum) + 0x00F0)
+#define LmSEQ_TARGET_PORT_XFER_TAG(LinkNum)   (LmSCRATCH(LinkNum) + 0x00F2)
+#define LmSEQ_DATA_OFFSET(LinkNum)	      (LmSCRATCH(LinkNum) + 0x00F4)
+
+/* Mode dependent scratch page 2 macros for mode 0 */
+/* Absolute offsets */
+#define LmSEQ_SMP_RCV_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0040)
+#define LmSEQ_DEVICE_BITS(LinkNum)		(LmSCRATCH(LinkNum) + 0x005B)
+#define LmSEQ_SDB_DDB(LinkNum)			(LmSCRATCH(LinkNum) + 0x005C)
+#define LmSEQ_SDB_NUM_TAGS(LinkNum)		(LmSCRATCH(LinkNum) + 0x005E)
+#define LmSEQ_SDB_CURR_TAG(LinkNum)		(LmSCRATCH(LinkNum) + 0x005F)
+
+/* Mode dependent scratch page 2 macros for mode 1 */
+/* Absolute offsets */
+/* byte 0 bits 1-0 are domain select. */
+#define LmSEQ_TX_ID_ADDR_FRAME(LinkNum)		(LmSCRATCH(LinkNum) + 0x00C0)
+#define LmSEQ_OPEN_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x00C8)
+#define LmSEQ_SRST_AS_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x00CC)
+#define LmSEQ_LAST_LOADED_SG_EL(LinkNum)	(LmSCRATCH(LinkNum) + 0x00D4)
+
+/* Mode dependent scratch page 2 macros for mode 2 */
+/* Absolute offsets */
+#define LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS(LinkNum) (LmSCRATCH(LinkNum) + 0x0140)
+#define LmSEQ_CLOSE_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0144)
+#define LmSEQ_BREAK_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0148)
+#define LmSEQ_DWS_RESET_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x014C)
+#define LmSEQ_SATA_INTERLOCK_TIMER_TERM_TS(LinkNum) \
+						(LmSCRATCH(LinkNum) + 0x0150)
+#define LmSEQ_MCTL_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0154)
+
+/* Mode dependent scratch page 2 macros for mode 5 */
+#define LmSEQ_COMINIT_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0160)
+#define LmSEQ_RCV_ID_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0164)
+#define LmSEQ_RCV_FIS_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0168)
+#define LmSEQ_DEV_PRES_TIMER_TERM_TS(LinkNum)	(LmSCRATCH(LinkNum) + 0x016C)
+
+/* Mode dependent scratch page 3 macros for modes 0 and 1 */
+/* None defined */
+
+/* Mode dependent scratch page 3 macros for modes 2 and 5 */
+/* None defined */
+
+/* Mode Independent Scratch page 0 macros. */
+#define LmSEQ_Q_TGTXFR_HEAD(LinkNum)	(LmSCRATCH(LinkNum) + 0x0180)
+#define LmSEQ_Q_TGTXFR_TAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x0182)
+#define LmSEQ_LINK_NUMBER(LinkNum)	(LmSCRATCH(LinkNum) + 0x0186)
+#define LmSEQ_SCRATCH_FLAGS(LinkNum)	(LmSCRATCH(LinkNum) + 0x0187)
+/*
+ * Currently only bit 0, SAS_DWSAQD, is used.
+ */
+#define		SAS_DWSAQD			0x01  /*
+						       * DWSSTATUS: DWSAQD
+						       * bit las read in ISR.
+						       */
+#define  LmSEQ_CONNECTION_STATE(LinkNum) (LmSCRATCH(LinkNum) + 0x0188)
+/* Connection states (byte 0) */
+#define		SAS_WE_OPENED_CS		0x01
+#define		SAS_DEVICE_OPENED_CS		0x02
+#define		SAS_WE_SENT_DONE_CS		0x04
+#define		SAS_DEVICE_SENT_DONE_CS		0x08
+#define		SAS_WE_SENT_CLOSE_CS		0x10
+#define		SAS_DEVICE_SENT_CLOSE_CS	0x20
+#define		SAS_WE_SENT_BREAK_CS		0x40
+#define		SAS_DEVICE_SENT_BREAK_CS	0x80
+/* Connection states (byte 1) */
+#define		SAS_OPN_TIMEOUT_OR_OPN_RJCT_CS	0x01
+#define		SAS_AIP_RECEIVED_CS		0x02
+#define		SAS_CREDIT_TIMEOUT_OCCURRED_CS	0x04
+#define		SAS_ACKNAK_TIMEOUT_OCCURRED_CS	0x08
+#define		SAS_SMPRSP_TIMEOUT_OCCURRED_CS	0x10
+#define		SAS_DONE_TIMEOUT_OCCURRED_CS	0x20
+/* Connection states (byte 2) */
+#define		SAS_SMP_RESPONSE_RECEIVED_CS	0x01
+#define		SAS_INTLK_TIMEOUT_OCCURRED_CS	0x02
+#define		SAS_DEVICE_SENT_DMAT_CS		0x04
+#define		SAS_DEVICE_SENT_SYNCSRST_CS	0x08
+#define		SAS_CLEARING_AFFILIATION_CS	0x20
+#define		SAS_RXTASK_ACTIVE_CS		0x40
+#define		SAS_TXTASK_ACTIVE_CS		0x80
+/* Connection states (byte 3) */
+#define		SAS_PHY_LOSS_OF_SIGNAL_CS	0x01
+#define		SAS_DWS_TIMER_EXPIRED_CS	0x02
+#define		SAS_LINK_RESET_NOT_COMPLETE_CS	0x04
+#define		SAS_PHY_DISABLED_CS		0x08
+#define		SAS_LINK_CTL_TASK_ACTIVE_CS	0x10
+#define		SAS_PHY_EVENT_TASK_ACTIVE_CS	0x20
+#define		SAS_DEVICE_SENT_ID_FRAME_CS	0x40
+#define		SAS_DEVICE_SENT_REG_FIS_CS	0x40
+#define		SAS_DEVICE_SENT_HARD_RESET_CS	0x80
+#define  	SAS_PHY_IS_DOWN_FLAGS	(SAS_PHY_LOSS_OF_SIGNAL_CS|\
+					 SAS_DWS_TIMER_EXPIRED_CS |\
+					 SAS_LINK_RESET_NOT_COMPLETE_CS|\
+					 SAS_PHY_DISABLED_CS)
+
+#define		SAS_LINK_CTL_PHY_EVENT_FLAGS   (SAS_LINK_CTL_TASK_ACTIVE_CS |\
+						SAS_PHY_EVENT_TASK_ACTIVE_CS |\
+						SAS_DEVICE_SENT_ID_FRAME_CS  |\
+						SAS_DEVICE_SENT_HARD_RESET_CS)
+
+#define LmSEQ_CONCTL(LinkNum)		(LmSCRATCH(LinkNum) + 0x018C)
+#define LmSEQ_CONSTAT(LinkNum)		(LmSCRATCH(LinkNum) + 0x018E)
+#define LmSEQ_CONNECTION_MODES(LinkNum)	(LmSCRATCH(LinkNum) + 0x018F)
+#define LmSEQ_REG1_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0192)
+#define LmSEQ_REG2_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0194)
+#define LmSEQ_REG3_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0196)
+#define LmSEQ_REG0_ISR(LinkNum)		(LmSCRATCH(LinkNum) + 0x0198)
+
+/* Mode independent scratch page 1 macros. */
+#define LmSEQ_EST_NEXUS_SCBPTR0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A0)
+#define LmSEQ_EST_NEXUS_SCBPTR1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A2)
+#define LmSEQ_EST_NEXUS_SCBPTR2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A4)
+#define LmSEQ_EST_NEXUS_SCBPTR3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A6)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A8)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01A9)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AA)
+#define LmSEQ_EST_NEXUS_SCB_OPCODE3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AB)
+#define LmSEQ_EST_NEXUS_SCB_HEAD(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AC)
+#define LmSEQ_EST_NEXUS_SCB_TAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AD)
+#define LmSEQ_EST_NEXUS_BUF_AVAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01AE)
+#define LmSEQ_TIMEOUT_CONST(LinkNum)		(LmSCRATCH(LinkNum) + 0x01B8)
+#define LmSEQ_ISR_SAVE_SINDEX(LinkNum)	        (LmSCRATCH(LinkNum) + 0x01BC)
+#define LmSEQ_ISR_SAVE_DINDEX(LinkNum)	        (LmSCRATCH(LinkNum) + 0x01BE)
+
+/* Mode independent scratch page 2 macros. */
+#define LmSEQ_EMPTY_SCB_PTR0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C0)
+#define LmSEQ_EMPTY_SCB_PTR1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C2)
+#define LmSEQ_EMPTY_SCB_PTR2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C4)
+#define LmSEQ_EMPTY_SCB_PTR3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C6)
+#define LmSEQ_EMPTY_SCB_OPCD0(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C8)
+#define LmSEQ_EMPTY_SCB_OPCD1(LinkNum)	(LmSCRATCH(LinkNum) + 0x01C9)
+#define LmSEQ_EMPTY_SCB_OPCD2(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CA)
+#define LmSEQ_EMPTY_SCB_OPCD3(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CB)
+#define LmSEQ_EMPTY_SCB_HEAD(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CC)
+#define LmSEQ_EMPTY_SCB_TAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CD)
+#define LmSEQ_EMPTY_BUFS_AVAIL(LinkNum)	(LmSCRATCH(LinkNum) + 0x01CE)
+#define LmSEQ_ATA_SCR_REGS(LinkNum)	(LmSCRATCH(LinkNum) + 0x01D4)
+
+/* Mode independent scratch page 3 macros. */
+#define LmSEQ_DEV_PRES_TMR_TOUT_CONST(LinkNum)	(LmSCRATCH(LinkNum) + 0x01E0)
+#define LmSEQ_SATA_INTERLOCK_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01E4)
+#define LmSEQ_STP_SHUTDOWN_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01E8)
+#define LmSEQ_SRST_ASSERT_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01EC)
+#define LmSEQ_RCV_FIS_TIMEOUT(LinkNum)		(LmSCRATCH(LinkNum) + 0x01F0)
+#define LmSEQ_ONE_MILLISEC_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01F4)
+#define LmSEQ_TEN_MS_COMINIT_TIMEOUT(LinkNum)	(LmSCRATCH(LinkNum) + 0x01F8)
+#define LmSEQ_SMP_RCV_TIMEOUT(LinkNum)		(LmSCRATCH(LinkNum) + 0x01FC)
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_sas.h b/drivers/scsi/aic94xx/aic94xx_sas.h
new file mode 100644
index 0000000..64d2317
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_sas.h
@@ -0,0 +1,785 @@
+/*
+ * Aic94xx SAS/SATA driver SAS definitions and hardware interface header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_SAS_H_
+#define _AIC94XX_SAS_H_
+
+#include <scsi/libsas.h>
+
+/* ---------- DDBs ---------- */
+/* DDBs are device descriptor blocks which describe a device in the
+ * domain that this sequencer can maintain low-level connections for
+ * us.  They are be 64 bytes.
+ */
+
+struct asd_ddb_ssp_smp_target_port {
+	u8     conn_type;	  /* byte 0 */
+#define DDB_TP_CONN_TYPE 0x81	  /* Initiator port and addr frame type 0x01 */
+
+	u8     conn_rate;
+	__be16 init_conn_tag;
+	u8     dest_sas_addr[8];  /* bytes 4-11 */
+
+	__le16 send_queue_head;
+	u8     sq_suspended;
+	u8     ddb_type;	  /* DDB_TYPE_TARGET */
+#define DDB_TYPE_UNUSED    0xFF
+#define DDB_TYPE_TARGET    0xFE
+#define DDB_TYPE_INITIATOR 0xFD
+#define DDB_TYPE_PM_PORT   0xFC
+
+	__le16 _r_a;
+	__be16 awt_def;
+
+	u8     compat_features;	  /* byte 20 */
+	u8     pathway_blocked_count;
+	__be16 arb_wait_time;
+	__be32 more_compat_features; /* byte 24 */
+
+	u8     conn_mask;
+	u8     flags;	  /* concurrent conn:2,2 and open:0(1) */
+#define CONCURRENT_CONN_SUPP 0x04
+#define OPEN_REQUIRED        0x01
+
+	u16    _r_b;
+	__le16 exec_queue_tail;
+	__le16 send_queue_tail;
+	__le16 sister_ddb;
+
+	__le16 _r_c;
+
+	u8     max_concurrent_conn;
+	u8     num_concurrent_conn;
+	u8     num_contexts;
+
+	u8     _r_d;
+
+	__le16 active_task_count;
+
+	u8     _r_e[9];
+
+	u8     itnl_reason;	  /* I_T nexus loss reason */
+
+	__le16 _r_f;
+
+	__le16 itnl_timeout;
+#define ITNL_TIMEOUT_CONST 0x7D0 /* 2 seconds */
+
+	__le32 itnl_timestamp;
+} __attribute__ ((packed));
+
+struct asd_ddb_stp_sata_target_port {
+	u8     conn_type;	  /* byte 0 */
+	u8     conn_rate;
+	__be16 init_conn_tag;
+	u8     dest_sas_addr[8];  /* bytes 4-11 */
+
+	__le16 send_queue_head;
+	u8     sq_suspended;
+	u8     ddb_type;	  /* DDB_TYPE_TARGET */
+
+	__le16 _r_a;
+
+	__be16 awt_def;
+	u8     compat_features;	  /* byte 20 */
+	u8     pathway_blocked_count;
+	__be16 arb_wait_time;
+	__be32 more_compat_features; /* byte 24 */
+
+	u8     conn_mask;
+	u8     flags;	  /* concurrent conn:2,2 and open:0(1) */
+#define SATA_MULTIPORT     0x80
+#define SUPPORTS_AFFIL     0x40
+#define STP_AFFIL_POL      0x20
+
+	u8     _r_b;
+	u8     flags2;		  /* STP close policy:0 */
+#define STP_CL_POL_NO_TX    0x00
+#define STP_CL_POL_BTW_CMDS 0x01
+
+	__le16 exec_queue_tail;
+	__le16 send_queue_tail;
+	__le16 sister_ddb;
+	__le16 ata_cmd_scbptr;
+	__le32 sata_tag_alloc_mask;
+	__le16 active_task_count;
+	__le16 _r_c;
+	__le32 sata_sactive;
+	u8     num_sata_tags;
+	u8     sata_status;
+	u8     sata_ending_status;
+	u8     itnl_reason;	  /* I_T nexus loss reason */
+	__le16 ncq_data_scb_ptr;
+	__le16 itnl_timeout;
+	__le32 itnl_timestamp;
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_init_port, describes the device descriptor block
+ * of an initiator port (when the sequencer is operating in target mode).
+ * Bytes [0,11] and [20,27] are from the OPEN address frame.
+ * The sequencer allocates an initiator port DDB entry.
+ */
+struct asd_ddb_init_port {
+	u8     conn_type;	  /* byte 0 */
+	u8     conn_rate;
+	__be16 init_conn_tag;     /* BE */
+	u8     dest_sas_addr[8];
+	__le16 send_queue_head;   /* LE, byte 12 */
+	u8     sq_suspended;
+	u8     ddb_type;	  /* DDB_TYPE_INITIATOR */
+	__le16 _r_a;
+	__be16 awt_def;		  /* BE */
+	u8     compat_features;
+	u8     pathway_blocked_count;
+	__be16 arb_wait_time;	  /* BE */
+	__be32 more_compat_features; /* BE */
+	u8     conn_mask;
+	u8     flags;		  /* == 5 */
+	u16    _r_b;
+	__le16 exec_queue_tail;	  /* execution queue tail */
+	__le16 send_queue_tail;
+	__le16 sister_ddb;
+	__le16 init_resp_timeout; /* initiator response timeout */
+	__le32 _r_c;
+	__le16 active_tasks;	  /* active task count */
+	__le16 init_list;	  /* initiator list link pointer */
+	__le32 _r_d;
+	u8     max_conn_to[3]; /* from Conn-Disc mode page, in us, LE */
+	u8     itnl_reason;	  /* I_T nexus loss reason */
+	__le16 bus_inact_to; /* from Conn-Disc mode page, in 100 us, LE */
+	__le16 itnl_to;		  /* from the Protocol Specific Port Ctrl MP */
+	__le32 itnl_timestamp;
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_sata_tag, describes a look-up table to be used
+ * by the sequencers.  SATA II, IDENTIFY DEVICE data, word 76, bit 8:
+ * NCQ support.  This table is used by the sequencers to find the
+ * corresponding SCB, given a SATA II tag value.
+ */
+struct asd_ddb_sata_tag {
+	__le16 scb_pointer[32];
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_sata_pm_table, describes a port number to
+ * connection handle look-up table.  SATA targets attached to a port
+ * multiplier require a 4-bit port number value.  There is one DDB
+ * entry of this type for each SATA port multiplier (sister DDB).
+ * Given a SATA PM port number, this table gives us the SATA PM Port
+ * DDB of the SATA port multiplier port (i.e. the SATA target
+ * discovered on the port).
+ */
+struct asd_ddb_sata_pm_table {
+	__le16 ddb_pointer[16];
+	__le16 _r_a[16];
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_sata_pm_port, describes the SATA port multiplier
+ * port format DDB.
+ */
+struct asd_ddb_sata_pm_port {
+	u8     _r_a[15];
+	u8     ddb_type;
+	u8     _r_b[13];
+	u8     pm_port_flags;
+#define PM_PORT_MASK  0xF0
+#define PM_PORT_SET   0x02
+	u8     _r_c[6];
+	__le16 sister_ddb;
+	__le16 ata_cmd_scbptr;
+	__le32 sata_tag_alloc_mask;
+	__le16 active_task_count;
+	__le16 parent_ddb;
+	__le32 sata_sactive;
+	u8     num_sata_tags;
+	u8     sata_status;
+	u8     sata_ending_status;
+	u8     _r_d[9];
+} __attribute__ ((packed));
+
+/* This struct asd_ddb_seq_shared, describes a DDB shared by the
+ * central and link sequencers.  port_map_by_links is indexed phy
+ * number [0,7]; each byte is a bit mask of all the phys that are in
+ * the same port as the indexed phy.
+ */
+struct asd_ddb_seq_shared {
+	__le16 q_free_ddb_head;
+	__le16 q_free_ddb_tail;
+	__le16 q_free_ddb_cnt;
+	__le16 q_used_ddb_head;
+	__le16 q_used_ddb_tail;
+	__le16 shared_mem_lock;
+	__le16 smp_conn_tag;
+	__le16 est_nexus_buf_cnt;
+	__le16 est_nexus_buf_thresh;
+	u32    _r_a;
+	u8     settable_max_contexts;
+	u8     _r_b[23];
+	u8     conn_not_active;
+	u8     phy_is_up;
+	u8     _r_c[8];
+	u8     port_map_by_links[8];
+} __attribute__ ((packed));
+
+/* ---------- SG Element ---------- */
+
+/* This struct sg_el, describes the hardware scatter gather buffer
+ * element.  All entries are little endian.  In an SCB, there are 2 of
+ * this, plus one more, called a link element of this indicating a
+ * sublist if needed.
+ *
+ * A link element has only the bus address set and the flags (DS) bit
+ * valid.  The bus address points to the start of the sublist.
+ *
+ * If a sublist is needed, then that sublist should also include the 2
+ * sg_el embedded in the SCB, in which case next_sg_offset is 32,
+ * since sizeof(sg_el) = 16; EOS should be 1 and EOL 0 in this case.
+ */
+struct sg_el {
+	__le64 bus_addr;
+	__le32 size;
+	__le16 _r;
+	u8     next_sg_offs;
+	u8     flags;
+#define ASD_SG_EL_DS_MASK   0x30
+#define ASD_SG_EL_DS_OCM    0x10
+#define ASD_SG_EL_DS_HM     0x00
+#define ASD_SG_EL_LIST_MASK 0xC0
+#define ASD_SG_EL_LIST_EOL  0x40
+#define ASD_SG_EL_LIST_EOS  0x80
+} __attribute__ ((packed));
+
+/* ---------- SCBs ---------- */
+
+/* An SCB (sequencer control block) is comprised of a common header
+ * and a task part, for a total of 128 bytes.  All fields are in LE
+ * order, unless otherwise noted.
+ */
+
+/* This struct scb_header, defines the SCB header format.
+ */
+struct scb_header {
+	__le64 next_scb;
+	__le16 index;		  /* transaction context */
+	u8     opcode;
+} __attribute__ ((packed));
+
+/* SCB opcodes: Execution queue
+ */
+#define INITIATE_SSP_TASK       0x00
+#define INITIATE_LONG_SSP_TASK  0x01
+#define INITIATE_BIDIR_SSP_TASK 0x02
+#define ABORT_TASK              0x03
+#define INITIATE_SSP_TMF        0x04
+#define SSP_TARG_GET_DATA       0x05
+#define SSP_TARG_GET_DATA_GOOD  0x06
+#define SSP_TARG_SEND_RESP      0x07
+#define QUERY_SSP_TASK          0x08
+#define INITIATE_ATA_TASK       0x09
+#define INITIATE_ATAPI_TASK     0x0a
+#define CONTROL_ATA_DEV         0x0b
+#define INITIATE_SMP_TASK       0x0c
+#define SMP_TARG_SEND_RESP      0x0f
+
+/* SCB opcodes: Send Queue
+ */
+#define SSP_TARG_SEND_DATA      0x40
+#define SSP_TARG_SEND_DATA_GOOD 0x41
+
+/* SCB opcodes: Link Queue
+ */
+#define CONTROL_PHY             0x80
+#define SEND_PRIMITIVE          0x81
+#define INITIATE_LINK_ADM_TASK  0x82
+
+/* SCB opcodes: other
+ */
+#define EMPTY_SCB               0xc0
+#define INITIATE_SEQ_ADM_TASK   0xc1
+#define EST_ICL_TARG_WINDOW     0xc2
+#define COPY_MEM                0xc3
+#define CLEAR_NEXUS             0xc4
+#define INITIATE_DDB_ADM_TASK   0xc6
+#define ESTABLISH_NEXUS_ESCB    0xd0
+
+#define LUN_SIZE                8
+
+/* See SAS spec, task IU
+ */
+struct ssp_task_iu {
+	u8     lun[LUN_SIZE];	  /* BE */
+	u16    _r_a;
+	u8     tmf;
+	u8     _r_b;
+	__be16 tag;		  /* BE */
+	u8     _r_c[14];
+} __attribute__ ((packed));
+
+/* See SAS spec, command IU
+ */
+struct ssp_command_iu {
+	u8     lun[LUN_SIZE];
+	u8     _r_a;
+	u8     efb_prio_attr;	  /* enable first burst, task prio & attr */
+#define EFB_MASK        0x80
+#define TASK_PRIO_MASK	0x78
+#define TASK_ATTR_MASK  0x07
+
+	u8    _r_b;
+	u8     add_cdb_len;	  /* in dwords, since bit 0,1 are reserved */
+	union {
+		u8     cdb[16];
+		struct {
+			__le64 long_cdb_addr;	  /* bus address, LE */
+			__le32 long_cdb_size;	  /* LE */
+			u8     _r_c[3];
+			u8     eol_ds;		  /* eol:6,6, ds:5,4 */
+		} long_cdb;	  /* sequencer extension */
+	};
+} __attribute__ ((packed));
+
+struct xfer_rdy_iu {
+	__be32 requested_offset;  /* BE */
+	__be32 write_data_len;	  /* BE */
+	__be32 _r_a;
+} __attribute__ ((packed));
+
+/* ---------- SCB tasks ---------- */
+
+/* This is both ssp_task and long_ssp_task
+ */
+struct initiate_ssp_task {
+	u8     proto_conn_rate;	  /* proto:6,4, conn_rate:3,0 */
+	__le32 total_xfer_len;
+	struct ssp_frame_hdr  ssp_frame;
+	struct ssp_command_iu ssp_cmd;
+	__le16 sister_scb;	  /* 0xFFFF */
+	__le16 conn_handle;	  /* index to DDB for the intended target */
+	u8     data_dir;	  /* :1,0 */
+#define DATA_DIR_NONE   0x00
+#define DATA_DIR_IN     0x01
+#define DATA_DIR_OUT    0x02
+#define DATA_DIR_BYRECIPIENT 0x03
+
+	u8     _r_a;
+	u8     retry_count;
+	u8     _r_b[5];
+	struct sg_el sg_element[3]; /* 2 real and 1 link */
+} __attribute__ ((packed));
+
+/* This defines both ata_task and atapi_task.
+ * ata: C bit of FIS should be 1,
+ * atapi: C bit of FIS should be 1, and command register should be 0xA0,
+ * to indicate a packet command.
+ */
+struct initiate_ata_task {
+	u8     proto_conn_rate;
+	__le32 total_xfer_len;
+	struct host_to_dev_fis fis;
+	__le32 data_offs;
+	u8     atapi_packet[16];
+	u8     _r_a[12];
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     ata_flags;	  /* CSMI:6,6, DTM:4,4, QT:3,3, data dir:1,0 */
+#define CSMI_TASK           0x40
+#define DATA_XFER_MODE_DMA  0x10
+#define ATA_Q_TYPE_MASK     0x08
+#define	ATA_Q_TYPE_UNTAGGED 0x00
+#define ATA_Q_TYPE_NCQ      0x08
+
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c;
+	u8     flags;
+#define STP_AFFIL_POLICY   0x20
+#define SET_AFFIL_POLICY   0x10
+#define RET_PARTIAL_SGLIST 0x02
+
+	u8     _r_d[3];
+	struct sg_el sg_element[3];
+} __attribute__ ((packed));
+
+struct initiate_smp_task {
+	u8     proto_conn_rate;
+	u8     _r_a[40];
+	struct sg_el smp_req;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     _r_c[8];
+	struct sg_el smp_resp;
+	u8     _r_d[32];
+} __attribute__ ((packed));
+
+struct control_phy {
+	u8     phy_id;
+	u8     sub_func;
+#define DISABLE_PHY            0x00
+#define ENABLE_PHY             0x01
+#define RELEASE_SPINUP_HOLD    0x02
+#define ENABLE_PHY_NO_SAS_OOB  0x03
+#define ENABLE_PHY_NO_SATA_OOB 0x04
+#define PHY_NO_OP              0x05
+#define EXECUTE_HARD_RESET     0x81
+
+	u8     func_mask;
+	u8     speed_mask;
+	u8     hot_plug_delay;
+	u8     port_type;
+	u8     flags;
+#define DEV_PRES_TIMER_OVERRIDE_ENABLE 0x01
+#define DISABLE_PHY_IF_OOB_FAILS       0x02
+
+	__le32 timeout_override;
+	u8     link_reset_retries;
+	u8     _r_a[47];
+	__le16 conn_handle;
+	u8     _r_b[56];
+} __attribute__ ((packed));
+
+struct control_ata_dev {
+	u8     proto_conn_rate;
+	__le32 _r_a;
+	struct host_to_dev_fis fis;
+	u8     _r_b[32];
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     ata_flags;	  /* 0 */
+	u8     _r_c[55];
+} __attribute__ ((packed));
+
+struct empty_scb {
+	u8     num_valid;
+	__le32 _r_a;
+#define ASD_EDBS_PER_SCB 7
+/* header+data+CRC+DMA suffix data */
+#define ASD_EDB_SIZE (24+1024+4+16)
+	struct sg_el eb[ASD_EDBS_PER_SCB];
+#define ELEMENT_NOT_VALID  0xC0
+} __attribute__ ((packed));
+
+struct initiate_link_adm {
+	u8     phy_id;
+	u8     sub_func;
+#define GET_LINK_ERROR_COUNT      0x00
+#define RESET_LINK_ERROR_COUNT    0x01
+#define ENABLE_NOTIFY_SPINUP_INTS 0x02
+
+	u8     _r_a[57];
+	__le16 conn_handle;
+	u8     _r_b[56];
+} __attribute__ ((packed));
+
+struct copy_memory {
+	u8     _r_a;
+	__le16 xfer_len;
+	__le16 _r_b;
+	__le64 src_busaddr;
+	u8     src_ds;		  /* See definition of sg_el */
+	u8     _r_c[45];
+	__le16 conn_handle;
+	__le64 _r_d;
+	__le64 dest_busaddr;
+	u8     dest_ds;		  /* See definition of sg_el */
+	u8     _r_e[39];
+} __attribute__ ((packed));
+
+struct abort_task {
+	u8     proto_conn_rate;
+	__le32 _r_a;
+	struct ssp_frame_hdr ssp_frame;
+	struct ssp_task_iu ssp_task;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     flags;	  /* ovrd_itnl_timer:3,3, suspend_data_trans:2,2 */
+#define SUSPEND_DATA_TRANS 0x04
+
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c[5];
+	__le16 index;  /* Transaction context of task to be queried */
+	__le16 itnl_to;
+	u8     _r_d[44];
+} __attribute__ ((packed));
+
+struct clear_nexus {
+	u8     nexus;
+#define NEXUS_ADAPTER  0x00
+#define NEXUS_PORT     0x01
+#define NEXUS_I_T      0x02
+#define NEXUS_I_T_L    0x03
+#define NEXUS_TAG      0x04
+#define NEXUS_TRANS_CX 0x05
+#define NEXUS_SATA_TAG 0x06
+#define NEXUS_T_L      0x07
+#define NEXUS_L        0x08
+#define NEXUS_T_TAG    0x09
+
+	__le32 _r_a;
+	u8     flags;
+#define SUSPEND_TX     0x80
+#define RESUME_TX      0x40
+#define SEND_Q         0x04
+#define EXEC_Q         0x02
+#define NOTINQ         0x01
+
+	u8     _r_b[3];
+	u8     conn_mask;
+	u8     _r_c[19];
+	struct ssp_task_iu ssp_task; /* LUN and TAG */
+	__le16 _r_d;
+	__le16 conn_handle;
+	__le64 _r_e;
+	__le16 index;  /* Transaction context of task to be cleared */
+	__le16 context;		  /* Clear nexus context */
+	u8     _r_f[44];
+} __attribute__ ((packed));
+
+struct initiate_ssp_tmf {
+	u8     proto_conn_rate;
+	__le32 _r_a;
+	struct ssp_frame_hdr ssp_frame;
+	struct ssp_task_iu ssp_task;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     flags;	  /* itnl override and suspend data tx */
+#define OVERRIDE_ITNL_TIMER  8
+
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c[5];
+	__le16 index;  /* Transaction context of task to be queried */
+	__le16 itnl_to;
+	u8     _r_d[44];
+} __attribute__ ((packed));
+
+/* Transmits an arbitrary primitive on the link.
+ * Used for NOTIFY and BROADCAST.
+ */
+struct send_prim {
+	u8     phy_id;
+	u8     wait_transmit; 	  /* :0,0 */
+	u8     xmit_flags;
+#define XMTPSIZE_MASK      0xF0
+#define XMTPSIZE_SINGLE    0x10
+#define XMTPSIZE_REPEATED  0x20
+#define XMTPSIZE_CONT      0x20
+#define XMTPSIZE_TRIPLE    0x30
+#define XMTPSIZE_REDUNDANT 0x60
+#define XMTPSIZE_INF       0
+
+#define XMTCONTEN          0x04
+#define XMTPFRM            0x02	  /* Transmit at the next frame boundary */
+#define XMTPIMM            0x01	  /* Transmit immediately */
+
+	__le16 _r_a;
+	u8     prim[4];		  /* K, D0, D1, D2 */
+	u8     _r_b[50];
+	__le16 conn_handle;
+	u8     _r_c[56];
+} __attribute__ ((packed));
+
+/* This describes both SSP Target Get Data and SSP Target Get Data And
+ * Send Good Response SCBs.  Used when the sequencer is operating in
+ * target mode...
+ */
+struct ssp_targ_get_data {
+	u8     proto_conn_rate;
+	__le32 total_xfer_len;
+	struct ssp_frame_hdr ssp_frame;
+	struct xfer_rdy_iu  xfer_rdy;
+	u8     lun[LUN_SIZE];
+	__le64 _r_a;
+	__le16 sister_scb;
+	__le16 conn_handle;
+	u8     data_dir;	  /* 01b */
+	u8     _r_b;
+	u8     retry_count;
+	u8     _r_c[5];
+	struct sg_el sg_element[3];
+} __attribute__ ((packed));
+
+/* ---------- The actual SCB struct ---------- */
+
+struct scb {
+	struct scb_header header;
+	union {
+		struct initiate_ssp_task ssp_task;
+		struct initiate_ata_task ata_task;
+		struct initiate_smp_task smp_task;
+		struct control_phy       control_phy;
+		struct control_ata_dev   control_ata_dev;
+		struct empty_scb         escb;
+		struct initiate_link_adm link_adm;
+		struct copy_memory       cp_mem;
+		struct abort_task        abort_task;
+		struct clear_nexus       clear_nexus;
+		struct initiate_ssp_tmf  ssp_tmf;
+	};
+} __attribute__ ((packed));
+
+/* ---------- Done List ---------- */
+/* The done list entry opcode field is defined below.
+ * The mnemonic encoding and meaning is as follows:
+ * TC - Task Complete, status was received and acknowledged
+ * TF - Task Failed, indicates an error prior to receiving acknowledgment
+ *   for the command:
+ *   - no conn,
+ *   - NACK or R_ERR received in response to this command,
+ *   - credit blocked or not available, or in the case of SMP request,
+ *   - no SMP response was received.
+ *   In these four cases it is known that the target didn't receive the
+ *   command.
+ * TI - Task Interrupted, error after the command was acknowledged.  It is
+ *   known that the command was received by the target.
+ * TU - Task Unacked, command was transmitted but neither ACK (R_OK) nor NAK
+ *   (R_ERR) was received due to loss of signal, broken connection, loss of
+ *   dword sync or other reason.  The application client should send the
+ *   appropriate task query.
+ * TA - Task Aborted, see TF.
+ * _RESP - The completion includes an empty buffer containing status.
+ * TO - Timeout.
+ */
+#define TC_NO_ERROR             0x00
+#define TC_UNDERRUN             0x01
+#define TC_OVERRUN              0x02
+#define TF_OPEN_TO              0x03
+#define TF_OPEN_REJECT          0x04
+#define TI_BREAK                0x05
+#define TI_PROTO_ERR            0x06
+#define TC_SSP_RESP             0x07
+#define TI_PHY_DOWN             0x08
+#define TF_PHY_DOWN             0x09
+#define TC_LINK_ADM_RESP        0x0a
+#define TC_CSMI                 0x0b
+#define TC_ATA_RESP             0x0c
+#define TU_PHY_DOWN             0x0d
+#define TU_BREAK                0x0e
+#define TI_SATA_TO              0x0f
+#define TI_NAK                  0x10
+#define TC_CONTROL_PHY          0x11
+#define TF_BREAK                0x12
+#define TC_RESUME               0x13
+#define TI_ACK_NAK_TO           0x14
+#define TF_SMPRSP_TO            0x15
+#define TF_SMP_XMIT_RCV_ERR     0x16
+#define TC_PARTIAL_SG_LIST      0x17
+#define TU_ACK_NAK_TO           0x18
+#define TU_SATA_TO              0x19
+#define TF_NAK_RECV             0x1a
+#define TA_I_T_NEXUS_LOSS       0x1b
+#define TC_ATA_R_ERR_RECV       0x1c
+#define TF_TMF_NO_CTX           0x1d
+#define TA_ON_REQ               0x1e
+#define TF_TMF_NO_TAG           0x1f
+#define TF_TMF_TAG_FREE         0x20
+#define TF_TMF_TASK_DONE        0x21
+#define TF_TMF_NO_CONN_HANDLE   0x22
+#define TC_TASK_CLEARED         0x23
+#define TI_SYNCS_RECV           0x24
+#define TU_SYNCS_RECV           0x25
+#define TF_IRTT_TO              0x26
+#define TF_NO_SMP_CONN          0x27
+#define TF_IU_SHORT             0x28
+#define TF_DATA_OFFS_ERR        0x29
+#define TF_INV_CONN_HANDLE      0x2a
+#define TF_REQUESTED_N_PENDING  0x2b
+
+/* 0xc1 - 0xc7: empty buffer received,
+   0xd1 - 0xd7: establish nexus empty buffer received
+*/
+/* This is the ESCB mask */
+#define ESCB_RECVD              0xC0
+
+
+/* This struct done_list_struct defines the done list entry.
+ * All fields are LE.
+ */
+struct done_list_struct {
+	__le16 index;		  /* aka transaction context */
+	u8     opcode;
+	u8     status_block[4];
+	u8     toggle;		  /* bit 0 */
+#define DL_TOGGLE_MASK     0x01
+} __attribute__ ((packed));
+
+/* ---------- PHYS ---------- */
+
+struct asd_phy {
+	struct asd_sas_phy        sas_phy;
+	struct asd_phy_desc   *phy_desc; /* hw profile */
+
+	struct sas_identify_frame *identify_frame;
+	struct asd_dma_tok  *id_frm_tok;
+
+	u8         frame_rcvd[ASD_EDB_SIZE];
+};
+
+
+#define ASD_SCB_SIZE sizeof(struct scb)
+#define ASD_DDB_SIZE sizeof(struct asd_ddb_ssp_smp_target_port)
+
+/* Define this to 0 if you do not want NOTIFY (ENABLE SPINIP) sent.
+ * Default: 0x10 (it's a mask)
+ */
+#define ASD_NOTIFY_ENABLE_SPINUP  0x10
+
+/* If enabled, set this to the interval between transmission
+ * of NOTIFY (ENABLE SPINUP). In units of 200 us.
+ */
+#define ASD_NOTIFY_TIMEOUT        2500
+
+/* Initial delay after OOB, before we transmit NOTIFY (ENABLE SPINUP).
+ * If 0, transmit immediately. In milliseconds.
+ */
+#define ASD_NOTIFY_DOWN_COUNT     0
+
+/* Device present timer timeout constant, 10 ms. */
+#define ASD_DEV_PRESENT_TIMEOUT   0x2710
+
+#define ASD_SATA_INTERLOCK_TIMEOUT 0
+
+/* How long to wait before shutting down an STP connection, unless
+ * an STP target sent frame(s). 50 usec.
+ * IGNORED by the sequencer (i.e. value 0 always).
+ */
+#define ASD_STP_SHUTDOWN_TIMEOUT  0x0
+
+/* ATA soft reset timer timeout. 5 usec. */
+#define ASD_SRST_ASSERT_TIMEOUT   0x05
+
+/* 31 sec */
+#define ASD_RCV_FIS_TIMEOUT       0x01D905C0
+
+#define ASD_ONE_MILLISEC_TIMEOUT  0x03e8
+
+/* COMINIT timer */
+#define ASD_TEN_MILLISEC_TIMEOUT  0x2710
+#define ASD_COMINIT_TIMEOUT ASD_TEN_MILLISEC_TIMEOUT
+
+/* 1 sec */
+#define ASD_SMP_RCV_TIMEOUT       0x000F4240
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c
new file mode 100644
index 0000000..7ee49b5
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_scb.c
@@ -0,0 +1,758 @@
+/*
+ * Aic94xx SAS/SATA driver SCB management.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+#include "aic94xx_seq.h"
+
+#include "aic94xx_dump.h"
+
+/* ---------- EMPTY SCB ---------- */
+
+#define DL_PHY_MASK      7
+#define BYTES_DMAED      0
+#define PRIMITIVE_RECVD  0x08
+#define PHY_EVENT        0x10
+#define LINK_RESET_ERROR 0x18
+#define TIMER_EVENT      0x20
+#define REQ_TASK_ABORT   0xF0
+#define REQ_DEVICE_RESET 0xF1
+#define SIGNAL_NCQ_ERROR 0xF2
+#define CLEAR_NCQ_ERROR  0xF3
+
+#define PHY_EVENTS_STATUS (CURRENT_LOSS_OF_SIGNAL | CURRENT_OOB_DONE   \
+			   | CURRENT_SPINUP_HOLD | CURRENT_GTO_TIMEOUT \
+			   | CURRENT_OOB_ERROR)
+
+static inline void get_lrate_mode(struct asd_phy *phy, u8 oob_mode)
+{
+	struct sas_phy *sas_phy = phy->sas_phy.phy;
+
+	switch (oob_mode & 7) {
+	case PHY_SPEED_60:
+		/* FIXME: sas transport class doesn't have this */
+		phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
+		break;
+	case PHY_SPEED_30:
+		phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	case PHY_SPEED_15:
+		phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS;
+		phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	}
+	sas_phy->negotiated_linkrate = phy->sas_phy.linkrate;
+	sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+	sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+	sas_phy->maximum_linkrate = phy->phy_desc->max_sas_lrate;
+	sas_phy->minimum_linkrate = phy->phy_desc->min_sas_lrate;
+
+	if (oob_mode & SAS_MODE)
+		phy->sas_phy.oob_mode = SAS_OOB_MODE;
+	else if (oob_mode & SATA_MODE)
+		phy->sas_phy.oob_mode = SATA_OOB_MODE;
+}
+
+static inline void asd_phy_event_tasklet(struct asd_ascb *ascb,
+					 struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+	int phy_id = dl->status_block[0] & DL_PHY_MASK;
+	struct asd_phy *phy = &asd_ha->phys[phy_id];
+
+	u8 oob_status = dl->status_block[1] & PHY_EVENTS_STATUS;
+	u8 oob_mode   = dl->status_block[2];
+
+	switch (oob_status) {
+	case CURRENT_LOSS_OF_SIGNAL:
+		/* directly attached device was removed */
+		ASD_DPRINTK("phy%d: device unplugged\n", phy_id);
+		asd_turn_led(asd_ha, phy_id, 0);
+		sas_phy_disconnected(&phy->sas_phy);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL);
+		break;
+	case CURRENT_OOB_DONE:
+		/* hot plugged device */
+		asd_turn_led(asd_ha, phy_id, 1);
+		get_lrate_mode(phy, oob_mode);
+		ASD_DPRINTK("phy%d device plugged: lrate:0x%x, proto:0x%x\n",
+			    phy_id, phy->sas_phy.linkrate, phy->sas_phy.iproto);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE);
+		break;
+	case CURRENT_SPINUP_HOLD:
+		/* hot plug SATA, no COMWAKE sent */
+		asd_turn_led(asd_ha, phy_id, 1);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD);
+		break;
+	case CURRENT_GTO_TIMEOUT:
+	case CURRENT_OOB_ERROR:
+		ASD_DPRINTK("phy%d error while OOB: oob status:0x%x\n", phy_id,
+			    dl->status_block[1]);
+		asd_turn_led(asd_ha, phy_id, 0);
+		sas_phy_disconnected(&phy->sas_phy);
+		sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR);
+		break;
+	}
+}
+
+/* If phys are enabled sparsely, this will do the right thing. */
+static inline unsigned ord_phy(struct asd_ha_struct *asd_ha,
+			       struct asd_phy *phy)
+{
+	u8 enabled_mask = asd_ha->hw_prof.enabled_phys;
+	int i, k = 0;
+
+	for_each_phy(enabled_mask, enabled_mask, i) {
+		if (&asd_ha->phys[i] == phy)
+			return k;
+		k++;
+	}
+	return 0;
+}
+
+/**
+ * asd_get_attached_sas_addr -- extract/generate attached SAS address
+ * phy: pointer to asd_phy
+ * sas_addr: pointer to buffer where the SAS address is to be written
+ *
+ * This function extracts the SAS address from an IDENTIFY frame
+ * received.  If OOB is SATA, then a SAS address is generated from the
+ * HA tables.
+ *
+ * LOCKING: the frame_rcvd_lock needs to be held since this parses the frame
+ * buffer.
+ */
+static inline void asd_get_attached_sas_addr(struct asd_phy *phy, u8 *sas_addr)
+{
+	if (phy->sas_phy.frame_rcvd[0] == 0x34
+	    && phy->sas_phy.oob_mode == SATA_OOB_MODE) {
+		struct asd_ha_struct *asd_ha = phy->sas_phy.ha->lldd_ha;
+		/* FIS device-to-host */
+		u64 addr = be64_to_cpu(*(__be64 *)phy->phy_desc->sas_addr);
+
+		addr += asd_ha->hw_prof.sata_name_base + ord_phy(asd_ha, phy);
+		*(__be64 *)sas_addr = cpu_to_be64(addr);
+	} else {
+		struct sas_identify_frame *idframe =
+			(void *) phy->sas_phy.frame_rcvd;
+		memcpy(sas_addr, idframe->sas_addr, SAS_ADDR_SIZE);
+	}
+}
+
+static inline void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb,
+					   struct done_list_struct *dl,
+					   int edb_id, int phy_id)
+{
+	unsigned long flags;
+	int edb_el = edb_id + ascb->edb_index;
+	struct asd_dma_tok *edb = ascb->ha->seq.edb_arr[edb_el];
+	struct asd_phy *phy = &ascb->ha->phys[phy_id];
+	struct sas_ha_struct *sas_ha = phy->sas_phy.ha;
+	u16 size = ((dl->status_block[3] & 7) << 8) | dl->status_block[2];
+
+	size = min(size, (u16) sizeof(phy->frame_rcvd));
+
+	spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags);
+	memcpy(phy->sas_phy.frame_rcvd, edb->vaddr, size);
+	phy->sas_phy.frame_rcvd_size = size;
+	asd_get_attached_sas_addr(phy, phy->sas_phy.attached_sas_addr);
+	spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags);
+	asd_dump_frame_rcvd(phy, dl);
+	sas_ha->notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED);
+}
+
+static inline void asd_link_reset_err_tasklet(struct asd_ascb *ascb,
+					      struct done_list_struct *dl,
+					      int phy_id)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+	u8 lr_error = dl->status_block[1];
+	u8 retries_left = dl->status_block[2];
+
+	switch (lr_error) {
+	case 0:
+		ASD_DPRINTK("phy%d: Receive ID timer expired\n", phy_id);
+		break;
+	case 1:
+		ASD_DPRINTK("phy%d: Loss of signal\n", phy_id);
+		break;
+	case 2:
+		ASD_DPRINTK("phy%d: Loss of dword sync\n", phy_id);
+		break;
+	case 3:
+		ASD_DPRINTK("phy%d: Receive FIS timeout\n", phy_id);
+		break;
+	default:
+		ASD_DPRINTK("phy%d: unknown link reset error code: 0x%x\n",
+			    phy_id, lr_error);
+		break;
+	}
+
+	asd_turn_led(asd_ha, phy_id, 0);
+	sas_phy_disconnected(sas_phy);
+	sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR);
+
+	if (retries_left == 0) {
+		int num = 1;
+		struct asd_ascb *cp = asd_ascb_alloc_list(ascb->ha, &num,
+							  GFP_ATOMIC);
+		if (!cp) {
+			asd_printk("%s: out of memory\n", __FUNCTION__);
+			goto out;
+		}
+		ASD_DPRINTK("phy%d: retries:0 performing link reset seq\n",
+			    phy_id);
+		asd_build_control_phy(cp, phy_id, ENABLE_PHY);
+		if (asd_post_ascb_list(ascb->ha, cp, 1) != 0)
+			asd_ascb_free(cp);
+	}
+out:
+	;
+}
+
+static inline void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb,
+					      struct done_list_struct *dl,
+					      int phy_id)
+{
+	unsigned long flags;
+	struct sas_ha_struct *sas_ha = &ascb->ha->sas_ha;
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+	u8  reg  = dl->status_block[1];
+	u32 cont = dl->status_block[2] << ((reg & 3)*8);
+
+	reg &= ~3;
+	switch (reg) {
+	case LmPRMSTAT0BYTE0:
+		switch (cont) {
+		case LmBROADCH:
+		case LmBROADRVCH0:
+		case LmBROADRVCH1:
+		case LmBROADSES:
+			ASD_DPRINTK("phy%d: BROADCAST change received:%d\n",
+				    phy_id, cont);
+			spin_lock_irqsave(&sas_phy->sas_prim_lock, flags);
+			sas_phy->sas_prim = ffs(cont);
+			spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags);
+			sas_ha->notify_port_event(sas_phy,PORTE_BROADCAST_RCVD);
+			break;
+
+		case LmUNKNOWNP:
+			ASD_DPRINTK("phy%d: unknown BREAK\n", phy_id);
+			break;
+
+		default:
+			ASD_DPRINTK("phy%d: primitive reg:0x%x, cont:0x%04x\n",
+				    phy_id, reg, cont);
+			break;
+		}
+		break;
+	case LmPRMSTAT1BYTE0:
+		switch (cont) {
+		case LmHARDRST:
+			ASD_DPRINTK("phy%d: HARD_RESET primitive rcvd\n",
+				    phy_id);
+			/* The sequencer disables all phys on that port.
+			 * We have to re-enable the phys ourselves. */
+			sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET);
+			break;
+
+		default:
+			ASD_DPRINTK("phy%d: primitive reg:0x%x, cont:0x%04x\n",
+				    phy_id, reg, cont);
+			break;
+		}
+		break;
+	default:
+		ASD_DPRINTK("unknown primitive register:0x%x\n",
+			    dl->status_block[1]);
+		break;
+	}
+}
+
+/**
+ * asd_invalidate_edb -- invalidate an EDB and if necessary post the ESCB
+ * @ascb: pointer to Empty SCB
+ * @edb_id: index [0,6] to the empty data buffer which is to be invalidated
+ *
+ * After an EDB has been invalidated, if all EDBs in this ESCB have been
+ * invalidated, the ESCB is posted back to the sequencer.
+ * Context is tasklet/IRQ.
+ */
+void asd_invalidate_edb(struct asd_ascb *ascb, int edb_id)
+{
+	struct asd_seq_data *seq = &ascb->ha->seq;
+	struct empty_scb *escb = &ascb->scb->escb;
+	struct sg_el     *eb   = &escb->eb[edb_id];
+	struct asd_dma_tok *edb = seq->edb_arr[ascb->edb_index + edb_id];
+
+	memset(edb->vaddr, 0, ASD_EDB_SIZE);
+	eb->flags |= ELEMENT_NOT_VALID;
+	escb->num_valid--;
+
+	if (escb->num_valid == 0) {
+		int i;
+		/* ASD_DPRINTK("reposting escb: vaddr: 0x%p, "
+			    "dma_handle: 0x%08llx, next: 0x%08llx, "
+			    "index:%d, opcode:0x%02x\n",
+			    ascb->dma_scb.vaddr,
+			    (u64)ascb->dma_scb.dma_handle,
+			    le64_to_cpu(ascb->scb->header.next_scb),
+			    le16_to_cpu(ascb->scb->header.index),
+			    ascb->scb->header.opcode);
+		*/
+		escb->num_valid = ASD_EDBS_PER_SCB;
+		for (i = 0; i < ASD_EDBS_PER_SCB; i++)
+			escb->eb[i].flags = 0;
+		if (!list_empty(&ascb->list))
+			list_del_init(&ascb->list);
+		i = asd_post_escb_list(ascb->ha, ascb, 1);
+		if (i)
+			asd_printk("couldn't post escb, err:%d\n", i);
+	}
+}
+
+static void escb_tasklet_complete(struct asd_ascb *ascb,
+				  struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_ha_struct *sas_ha = &asd_ha->sas_ha;
+	int edb = (dl->opcode & DL_PHY_MASK) - 1; /* [0xc1,0xc7] -> [0,6] */
+	u8  sb_opcode = dl->status_block[0];
+	int phy_id = sb_opcode & DL_PHY_MASK;
+	struct asd_sas_phy *sas_phy = sas_ha->sas_phy[phy_id];
+
+	if (edb > 6 || edb < 0) {
+		ASD_DPRINTK("edb is 0x%x! dl->opcode is 0x%x\n",
+			    edb, dl->opcode);
+		ASD_DPRINTK("sb_opcode : 0x%x, phy_id: 0x%x\n",
+			    sb_opcode, phy_id);
+		ASD_DPRINTK("escb: vaddr: 0x%p, "
+			    "dma_handle: 0x%llx, next: 0x%llx, "
+			    "index:%d, opcode:0x%02x\n",
+			    ascb->dma_scb.vaddr,
+			    (unsigned long long)ascb->dma_scb.dma_handle,
+			    (unsigned long long)
+			    le64_to_cpu(ascb->scb->header.next_scb),
+			    le16_to_cpu(ascb->scb->header.index),
+			    ascb->scb->header.opcode);
+	}
+
+	sb_opcode &= ~DL_PHY_MASK;
+
+	switch (sb_opcode) {
+	case BYTES_DMAED:
+		ASD_DPRINTK("%s: phy%d: BYTES_DMAED\n", __FUNCTION__, phy_id);
+		asd_bytes_dmaed_tasklet(ascb, dl, edb, phy_id);
+		break;
+	case PRIMITIVE_RECVD:
+		ASD_DPRINTK("%s: phy%d: PRIMITIVE_RECVD\n", __FUNCTION__,
+			    phy_id);
+		asd_primitive_rcvd_tasklet(ascb, dl, phy_id);
+		break;
+	case PHY_EVENT:
+		ASD_DPRINTK("%s: phy%d: PHY_EVENT\n", __FUNCTION__, phy_id);
+		asd_phy_event_tasklet(ascb, dl);
+		break;
+	case LINK_RESET_ERROR:
+		ASD_DPRINTK("%s: phy%d: LINK_RESET_ERROR\n", __FUNCTION__,
+			    phy_id);
+		asd_link_reset_err_tasklet(ascb, dl, phy_id);
+		break;
+	case TIMER_EVENT:
+		ASD_DPRINTK("%s: phy%d: TIMER_EVENT, lost dw sync\n",
+			    __FUNCTION__, phy_id);
+		asd_turn_led(asd_ha, phy_id, 0);
+		/* the device is gone */
+		sas_phy_disconnected(sas_phy);
+		sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT);
+		break;
+	case REQ_TASK_ABORT:
+		ASD_DPRINTK("%s: phy%d: REQ_TASK_ABORT\n", __FUNCTION__,
+			    phy_id);
+		break;
+	case REQ_DEVICE_RESET:
+		ASD_DPRINTK("%s: phy%d: REQ_DEVICE_RESET\n", __FUNCTION__,
+			    phy_id);
+		break;
+	case SIGNAL_NCQ_ERROR:
+		ASD_DPRINTK("%s: phy%d: SIGNAL_NCQ_ERROR\n", __FUNCTION__,
+			    phy_id);
+		break;
+	case CLEAR_NCQ_ERROR:
+		ASD_DPRINTK("%s: phy%d: CLEAR_NCQ_ERROR\n", __FUNCTION__,
+			    phy_id);
+		break;
+	default:
+		ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __FUNCTION__,
+			    phy_id, sb_opcode);
+		ASD_DPRINTK("edb is 0x%x! dl->opcode is 0x%x\n",
+			    edb, dl->opcode);
+		ASD_DPRINTK("sb_opcode : 0x%x, phy_id: 0x%x\n",
+			    sb_opcode, phy_id);
+		ASD_DPRINTK("escb: vaddr: 0x%p, "
+			    "dma_handle: 0x%llx, next: 0x%llx, "
+			    "index:%d, opcode:0x%02x\n",
+			    ascb->dma_scb.vaddr,
+			    (unsigned long long)ascb->dma_scb.dma_handle,
+			    (unsigned long long)
+			    le64_to_cpu(ascb->scb->header.next_scb),
+			    le16_to_cpu(ascb->scb->header.index),
+			    ascb->scb->header.opcode);
+
+		break;
+	}
+
+	asd_invalidate_edb(ascb, edb);
+}
+
+int asd_init_post_escbs(struct asd_ha_struct *asd_ha)
+{
+	struct asd_seq_data *seq = &asd_ha->seq;
+	int i;
+
+	for (i = 0; i < seq->num_escbs; i++)
+		seq->escb_arr[i]->tasklet_complete = escb_tasklet_complete;
+
+	ASD_DPRINTK("posting %d escbs\n", i);
+	return asd_post_escb_list(asd_ha, seq->escb_arr[0], seq->num_escbs);
+}
+
+/* ---------- CONTROL PHY ---------- */
+
+#define CONTROL_PHY_STATUS (CURRENT_DEVICE_PRESENT | CURRENT_OOB_DONE   \
+			    | CURRENT_SPINUP_HOLD | CURRENT_GTO_TIMEOUT \
+			    | CURRENT_OOB_ERROR)
+
+/**
+ * control_phy_tasklet_complete -- tasklet complete for CONTROL PHY ascb
+ * @ascb: pointer to an ascb
+ * @dl: pointer to the done list entry
+ *
+ * This function completes a CONTROL PHY scb and frees the ascb.
+ * A note on LEDs:
+ *  - an LED blinks if there is IO though it,
+ *  - if a device is connected to the LED, it is lit,
+ *  - if no device is connected to the LED, is is dimmed (off).
+ */
+static void control_phy_tasklet_complete(struct asd_ascb *ascb,
+					 struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct scb *scb = ascb->scb;
+	struct control_phy *control_phy = &scb->control_phy;
+	u8 phy_id = control_phy->phy_id;
+	struct asd_phy *phy = &ascb->ha->phys[phy_id];
+
+	u8 status     = dl->status_block[0];
+	u8 oob_status = dl->status_block[1];
+	u8 oob_mode   = dl->status_block[2];
+	/* u8 oob_signals= dl->status_block[3]; */
+
+	if (status != 0) {
+		ASD_DPRINTK("%s: phy%d status block opcode:0x%x\n",
+			    __FUNCTION__, phy_id, status);
+		goto out;
+	}
+
+	switch (control_phy->sub_func) {
+	case DISABLE_PHY:
+		asd_ha->hw_prof.enabled_phys &= ~(1 << phy_id);
+		asd_turn_led(asd_ha, phy_id, 0);
+		asd_control_led(asd_ha, phy_id, 0);
+		ASD_DPRINTK("%s: disable phy%d\n", __FUNCTION__, phy_id);
+		break;
+
+	case ENABLE_PHY:
+		asd_control_led(asd_ha, phy_id, 1);
+		if (oob_status & CURRENT_OOB_DONE) {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			get_lrate_mode(phy, oob_mode);
+			asd_turn_led(asd_ha, phy_id, 1);
+			ASD_DPRINTK("%s: phy%d, lrate:0x%x, proto:0x%x\n",
+				    __FUNCTION__, phy_id,phy->sas_phy.linkrate,
+				    phy->sas_phy.iproto);
+		} else if (oob_status & CURRENT_SPINUP_HOLD) {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			asd_turn_led(asd_ha, phy_id, 1);
+			ASD_DPRINTK("%s: phy%d, spinup hold\n", __FUNCTION__,
+				    phy_id);
+		} else if (oob_status & CURRENT_ERR_MASK) {
+			asd_turn_led(asd_ha, phy_id, 0);
+			ASD_DPRINTK("%s: phy%d: error: oob status:0x%02x\n",
+				    __FUNCTION__, phy_id, oob_status);
+		} else if (oob_status & (CURRENT_HOT_PLUG_CNCT
+					 | CURRENT_DEVICE_PRESENT))  {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			asd_turn_led(asd_ha, phy_id, 1);
+			ASD_DPRINTK("%s: phy%d: hot plug or device present\n",
+				    __FUNCTION__, phy_id);
+		} else {
+			asd_ha->hw_prof.enabled_phys |= (1 << phy_id);
+			asd_turn_led(asd_ha, phy_id, 0);
+			ASD_DPRINTK("%s: phy%d: no device present: "
+				    "oob_status:0x%x\n",
+				    __FUNCTION__, phy_id, oob_status);
+		}
+		break;
+	case RELEASE_SPINUP_HOLD:
+	case PHY_NO_OP:
+	case EXECUTE_HARD_RESET:
+		ASD_DPRINTK("%s: phy%d: sub_func:0x%x\n", __FUNCTION__,
+			    phy_id, control_phy->sub_func);
+		/* XXX finish */
+		break;
+	default:
+		ASD_DPRINTK("%s: phy%d: sub_func:0x%x?\n", __FUNCTION__,
+			    phy_id, control_phy->sub_func);
+		break;
+	}
+out:
+	asd_ascb_free(ascb);
+}
+
+static inline void set_speed_mask(u8 *speed_mask, struct asd_phy_desc *pd)
+{
+	/* disable all speeds, then enable defaults */
+	*speed_mask = SAS_SPEED_60_DIS | SAS_SPEED_30_DIS | SAS_SPEED_15_DIS
+		| SATA_SPEED_30_DIS | SATA_SPEED_15_DIS;
+
+	switch (pd->max_sas_lrate) {
+	case SAS_LINK_RATE_6_0_GBPS:
+		*speed_mask &= ~SAS_SPEED_60_DIS;
+	default:
+	case SAS_LINK_RATE_3_0_GBPS:
+		*speed_mask &= ~SAS_SPEED_30_DIS;
+	case SAS_LINK_RATE_1_5_GBPS:
+		*speed_mask &= ~SAS_SPEED_15_DIS;
+	}
+
+	switch (pd->min_sas_lrate) {
+	case SAS_LINK_RATE_6_0_GBPS:
+		*speed_mask |= SAS_SPEED_30_DIS;
+	case SAS_LINK_RATE_3_0_GBPS:
+		*speed_mask |= SAS_SPEED_15_DIS;
+	default:
+	case SAS_LINK_RATE_1_5_GBPS:
+		/* nothing to do */
+		;
+	}
+
+	switch (pd->max_sata_lrate) {
+	case SAS_LINK_RATE_3_0_GBPS:
+		*speed_mask &= ~SATA_SPEED_30_DIS;
+	default:
+	case SAS_LINK_RATE_1_5_GBPS:
+		*speed_mask &= ~SATA_SPEED_15_DIS;
+	}
+
+	switch (pd->min_sata_lrate) {
+	case SAS_LINK_RATE_3_0_GBPS:
+		*speed_mask |= SATA_SPEED_15_DIS;
+	default:
+	case SAS_LINK_RATE_1_5_GBPS:
+		/* nothing to do */
+		;
+	}
+}
+
+/**
+ * asd_build_control_phy -- build a CONTROL PHY SCB
+ * @ascb: pointer to an ascb
+ * @phy_id: phy id to control, integer
+ * @subfunc: subfunction, what to actually to do the phy
+ *
+ * This function builds a CONTROL PHY scb.  No allocation of any kind
+ * is performed. @ascb is allocated with the list function.
+ * The caller can override the ascb->tasklet_complete to point
+ * to its own callback function.  It must call asd_ascb_free()
+ * at its tasklet complete function.
+ * See the default implementation.
+ */
+void asd_build_control_phy(struct asd_ascb *ascb, int phy_id, u8 subfunc)
+{
+	struct asd_phy *phy = &ascb->ha->phys[phy_id];
+	struct scb *scb = ascb->scb;
+	struct control_phy *control_phy = &scb->control_phy;
+
+	scb->header.opcode = CONTROL_PHY;
+	control_phy->phy_id = (u8) phy_id;
+	control_phy->sub_func = subfunc;
+
+	switch (subfunc) {
+	case EXECUTE_HARD_RESET:  /* 0x81 */
+	case ENABLE_PHY:          /* 0x01 */
+		/* decide hot plug delay */
+		control_phy->hot_plug_delay = HOTPLUG_DELAY_TIMEOUT;
+
+		/* decide speed mask */
+		set_speed_mask(&control_phy->speed_mask, phy->phy_desc);
+
+		/* initiator port settings are in the hi nibble */
+		if (phy->sas_phy.role == PHY_ROLE_INITIATOR)
+			control_phy->port_type = SAS_PROTO_ALL << 4;
+		else if (phy->sas_phy.role == PHY_ROLE_TARGET)
+			control_phy->port_type = SAS_PROTO_ALL;
+		else
+			control_phy->port_type =
+				(SAS_PROTO_ALL << 4) | SAS_PROTO_ALL;
+
+		/* link reset retries, this should be nominal */
+		control_phy->link_reset_retries = 10;
+
+	case RELEASE_SPINUP_HOLD: /* 0x02 */
+		/* decide the func_mask */
+		control_phy->func_mask = FUNCTION_MASK_DEFAULT;
+		if (phy->phy_desc->flags & ASD_SATA_SPINUP_HOLD)
+			control_phy->func_mask &= ~SPINUP_HOLD_DIS;
+		else
+			control_phy->func_mask |= SPINUP_HOLD_DIS;
+	}
+
+	control_phy->conn_handle = cpu_to_le16(0xFFFF);
+
+	ascb->tasklet_complete = control_phy_tasklet_complete;
+}
+
+/* ---------- INITIATE LINK ADM TASK ---------- */
+
+static void link_adm_tasklet_complete(struct asd_ascb *ascb,
+				      struct done_list_struct *dl)
+{
+	u8 opcode = dl->opcode;
+	struct initiate_link_adm *link_adm = &ascb->scb->link_adm;
+	u8 phy_id = link_adm->phy_id;
+
+	if (opcode != TC_NO_ERROR) {
+		asd_printk("phy%d: link adm task 0x%x completed with error "
+			   "0x%x\n", phy_id, link_adm->sub_func, opcode);
+	}
+	ASD_DPRINTK("phy%d: link adm task 0x%x: 0x%x\n",
+		    phy_id, link_adm->sub_func, opcode);
+
+	asd_ascb_free(ascb);
+}
+
+void asd_build_initiate_link_adm_task(struct asd_ascb *ascb, int phy_id,
+				      u8 subfunc)
+{
+	struct scb *scb = ascb->scb;
+	struct initiate_link_adm *link_adm = &scb->link_adm;
+
+	scb->header.opcode = INITIATE_LINK_ADM_TASK;
+
+	link_adm->phy_id = phy_id;
+	link_adm->sub_func = subfunc;
+	link_adm->conn_handle = cpu_to_le16(0xFFFF);
+
+	ascb->tasklet_complete = link_adm_tasklet_complete;
+}
+
+/* ---------- SCB timer ---------- */
+
+/**
+ * asd_ascb_timedout -- called when a pending SCB's timer has expired
+ * @data: unsigned long, a pointer to the ascb in question
+ *
+ * This is the default timeout function which does the most necessary.
+ * Upper layers can implement their own timeout function, say to free
+ * resources they have with this SCB, and then call this one at the
+ * end of their timeout function.  To do this, one should initialize
+ * the ascb->timer.{function, data, expires} prior to calling the post
+ * funcion.  The timer is started by the post function.
+ */
+void asd_ascb_timedout(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+	struct asd_seq_data *seq = &ascb->ha->seq;
+	unsigned long flags;
+
+	ASD_DPRINTK("scb:0x%x timed out\n", ascb->scb->header.opcode);
+
+	spin_lock_irqsave(&seq->pend_q_lock, flags);
+	seq->pending--;
+	list_del_init(&ascb->list);
+	spin_unlock_irqrestore(&seq->pend_q_lock, flags);
+
+	asd_ascb_free(ascb);
+}
+
+/* ---------- CONTROL PHY ---------- */
+
+/* Given the spec value, return a driver value. */
+static const int phy_func_table[] = {
+	[PHY_FUNC_NOP]        = PHY_NO_OP,
+	[PHY_FUNC_LINK_RESET] = ENABLE_PHY,
+	[PHY_FUNC_HARD_RESET] = EXECUTE_HARD_RESET,
+	[PHY_FUNC_DISABLE]    = DISABLE_PHY,
+	[PHY_FUNC_RELEASE_SPINUP_HOLD] = RELEASE_SPINUP_HOLD,
+};
+
+int asd_control_phy(struct asd_sas_phy *phy, enum phy_func func, void *arg)
+{
+	struct asd_ha_struct *asd_ha = phy->ha->lldd_ha;
+	struct asd_phy_desc *pd = asd_ha->phys[phy->id].phy_desc;
+	struct asd_ascb *ascb;
+	struct sas_phy_linkrates *rates;
+	int res = 1;
+
+	switch (func) {
+	case PHY_FUNC_CLEAR_ERROR_LOG:
+		return -ENOSYS;
+	case PHY_FUNC_SET_LINK_RATE:
+		rates = arg;
+		if (rates->minimum_linkrate) {
+			pd->min_sas_lrate = rates->minimum_linkrate;
+			pd->min_sata_lrate = rates->minimum_linkrate;
+		}
+		if (rates->maximum_linkrate) {
+			pd->max_sas_lrate = rates->maximum_linkrate;
+			pd->max_sata_lrate = rates->maximum_linkrate;
+		}
+		func = PHY_FUNC_LINK_RESET;
+		break;
+	default:
+		break;
+	}
+
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
+	if (!ascb)
+		return -ENOMEM;
+
+	asd_build_control_phy(ascb, phy->id, phy_func_table[func]);
+	res = asd_post_ascb_list(asd_ha, ascb , 1);
+	if (res)
+		asd_ascb_free(ascb);
+
+	return res;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c
new file mode 100644
index 0000000..83574b5
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_sds.c
@@ -0,0 +1,1089 @@
+/*
+ * Aic94xx SAS/SATA driver access to shared data structures and memory
+ * maps.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "aic94xx.h"
+#include "aic94xx_reg.h"
+
+/* ---------- OCM stuff ---------- */
+
+struct asd_ocm_dir_ent {
+	u8 type;
+	u8 offs[3];
+	u8 _r1;
+	u8 size[3];
+} __attribute__ ((packed));
+
+struct asd_ocm_dir {
+	char sig[2];
+	u8   _r1[2];
+	u8   major;          /* 0 */
+	u8   minor;          /* 0 */
+	u8   _r2;
+	u8   num_de;
+	struct asd_ocm_dir_ent entry[15];
+} __attribute__ ((packed));
+
+#define	OCM_DE_OCM_DIR			0x00
+#define	OCM_DE_WIN_DRVR			0x01
+#define	OCM_DE_BIOS_CHIM		0x02
+#define	OCM_DE_RAID_ENGN		0x03
+#define	OCM_DE_BIOS_INTL		0x04
+#define	OCM_DE_BIOS_CHIM_OSM		0x05
+#define	OCM_DE_BIOS_CHIM_DYNAMIC	0x06
+#define	OCM_DE_ADDC2C_RES0		0x07
+#define	OCM_DE_ADDC2C_RES1		0x08
+#define	OCM_DE_ADDC2C_RES2		0x09
+#define	OCM_DE_ADDC2C_RES3		0x0A
+
+#define OCM_INIT_DIR_ENTRIES	5
+/***************************************************************************
+*  OCM dircetory default
+***************************************************************************/
+static struct asd_ocm_dir OCMDirInit =
+{
+	.sig = {0x4D, 0x4F},	/* signature */
+	.num_de = OCM_INIT_DIR_ENTRIES,	/* no. of directory entries */
+};
+
+/***************************************************************************
+*  OCM dircetory Entries default
+***************************************************************************/
+static struct asd_ocm_dir_ent OCMDirEntriesInit[OCM_INIT_DIR_ENTRIES] =
+{
+	{
+		.type = (OCM_DE_ADDC2C_RES0),	/* Entry type  */
+		.offs = {128},			/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_ADDC2C_RES1),	/* Entry type  */
+		.offs = {128, 4},		/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_ADDC2C_RES2),	/* Entry type  */
+		.offs = {128, 8},		/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_ADDC2C_RES3),	/* Entry type  */
+		.offs = {128, 12},		/* Offset */
+		.size = {0, 4},			/* size */
+	},
+	{
+		.type = (OCM_DE_WIN_DRVR),	/* Entry type  */
+		.offs = {128, 16},		/* Offset */
+		.size = {128, 235, 1},		/* size */
+	},
+};
+
+struct asd_bios_chim_struct {
+	char sig[4];
+	u8   major;          /* 1 */
+	u8   minor;          /* 0 */
+	u8   bios_major;
+	u8   bios_minor;
+	__le32  bios_build;
+	u8   flags;
+	u8   pci_slot;
+	__le16  ue_num;
+	__le16  ue_size;
+	u8  _r[14];
+	/* The unit element array is right here.
+	 */
+} __attribute__ ((packed));
+
+/**
+ * asd_read_ocm_seg - read an on chip memory (OCM) segment
+ * @asd_ha: pointer to the host adapter structure
+ * @buffer: where to write the read data
+ * @offs: offset into OCM where to read from
+ * @size: how many bytes to read
+ *
+ * Return the number of bytes not read. Return 0 on success.
+ */
+static int asd_read_ocm_seg(struct asd_ha_struct *asd_ha, void *buffer,
+			    u32 offs, int size)
+{
+	u8 *p = buffer;
+	if (unlikely(asd_ha->iospace))
+		asd_read_reg_string(asd_ha, buffer, offs+OCM_BASE_ADDR, size);
+	else {
+		for ( ; size > 0; size--, offs++, p++)
+			*p = asd_read_ocm_byte(asd_ha, offs);
+	}
+	return size;
+}
+
+static int asd_read_ocm_dir(struct asd_ha_struct *asd_ha,
+			    struct asd_ocm_dir *dir, u32 offs)
+{
+	int err = asd_read_ocm_seg(asd_ha, dir, offs, sizeof(*dir));
+	if (err) {
+		ASD_DPRINTK("couldn't read ocm segment\n");
+		return err;
+	}
+
+	if (dir->sig[0] != 'M' || dir->sig[1] != 'O') {
+		ASD_DPRINTK("no valid dir signature(%c%c) at start of OCM\n",
+			    dir->sig[0], dir->sig[1]);
+		return -ENOENT;
+	}
+	if (dir->major != 0) {
+		asd_printk("unsupported major version of ocm dir:0x%x\n",
+			   dir->major);
+		return -ENOENT;
+	}
+	dir->num_de &= 0xf;
+	return 0;
+}
+
+/**
+ * asd_write_ocm_seg - write an on chip memory (OCM) segment
+ * @asd_ha: pointer to the host adapter structure
+ * @buffer: where to read the write data
+ * @offs: offset into OCM to write to
+ * @size: how many bytes to write
+ *
+ * Return the number of bytes not written. Return 0 on success.
+ */
+static void asd_write_ocm_seg(struct asd_ha_struct *asd_ha, void *buffer,
+			    u32 offs, int size)
+{
+	u8 *p = buffer;
+	if (unlikely(asd_ha->iospace))
+		asd_write_reg_string(asd_ha, buffer, offs+OCM_BASE_ADDR, size);
+	else {
+		for ( ; size > 0; size--, offs++, p++)
+			asd_write_ocm_byte(asd_ha, offs, *p);
+	}
+	return;
+}
+
+#define THREE_TO_NUM(X) ((X)[0] | ((X)[1] << 8) | ((X)[2] << 16))
+
+static int asd_find_dir_entry(struct asd_ocm_dir *dir, u8 type,
+			      u32 *offs, u32 *size)
+{
+	int i;
+	struct asd_ocm_dir_ent *ent;
+
+	for (i = 0; i < dir->num_de; i++) {
+		if (dir->entry[i].type == type)
+			break;
+	}
+	if (i >= dir->num_de)
+		return -ENOENT;
+	ent = &dir->entry[i];
+	*offs = (u32) THREE_TO_NUM(ent->offs);
+	*size = (u32) THREE_TO_NUM(ent->size);
+	return 0;
+}
+
+#define OCM_BIOS_CHIM_DE  2
+#define BC_BIOS_PRESENT   1
+
+static int asd_get_bios_chim(struct asd_ha_struct *asd_ha,
+			     struct asd_ocm_dir *dir)
+{
+	int err;
+	struct asd_bios_chim_struct *bc_struct;
+	u32 offs, size;
+
+	err = asd_find_dir_entry(dir, OCM_BIOS_CHIM_DE, &offs, &size);
+	if (err) {
+		ASD_DPRINTK("couldn't find BIOS_CHIM dir ent\n");
+		goto out;
+	}
+	err = -ENOMEM;
+	bc_struct = kmalloc(sizeof(*bc_struct), GFP_KERNEL);
+	if (!bc_struct) {
+		asd_printk("no memory for bios_chim struct\n");
+		goto out;
+	}
+	err = asd_read_ocm_seg(asd_ha, (void *)bc_struct, offs,
+			       sizeof(*bc_struct));
+	if (err) {
+		ASD_DPRINTK("couldn't read ocm segment\n");
+		goto out2;
+	}
+	if (strncmp(bc_struct->sig, "SOIB", 4)
+	    && strncmp(bc_struct->sig, "IPSA", 4)) {
+		ASD_DPRINTK("BIOS_CHIM entry has no valid sig(%c%c%c%c)\n",
+			    bc_struct->sig[0], bc_struct->sig[1],
+			    bc_struct->sig[2], bc_struct->sig[3]);
+		err = -ENOENT;
+		goto out2;
+	}
+	if (bc_struct->major != 1) {
+		asd_printk("BIOS_CHIM unsupported major version:0x%x\n",
+			   bc_struct->major);
+		err = -ENOENT;
+		goto out2;
+	}
+	if (bc_struct->flags & BC_BIOS_PRESENT) {
+		asd_ha->hw_prof.bios.present = 1;
+		asd_ha->hw_prof.bios.maj = bc_struct->bios_major;
+		asd_ha->hw_prof.bios.min = bc_struct->bios_minor;
+		asd_ha->hw_prof.bios.bld = le32_to_cpu(bc_struct->bios_build);
+		ASD_DPRINTK("BIOS present (%d,%d), %d\n",
+			    asd_ha->hw_prof.bios.maj,
+			    asd_ha->hw_prof.bios.min,
+			    asd_ha->hw_prof.bios.bld);
+	}
+	asd_ha->hw_prof.ue.num = le16_to_cpu(bc_struct->ue_num);
+	asd_ha->hw_prof.ue.size= le16_to_cpu(bc_struct->ue_size);
+	ASD_DPRINTK("ue num:%d, ue size:%d\n", asd_ha->hw_prof.ue.num,
+		    asd_ha->hw_prof.ue.size);
+	size = asd_ha->hw_prof.ue.num * asd_ha->hw_prof.ue.size;
+	if (size > 0) {
+		err = -ENOMEM;
+		asd_ha->hw_prof.ue.area = kmalloc(size, GFP_KERNEL);
+		if (!asd_ha->hw_prof.ue.area)
+			goto out2;
+		err = asd_read_ocm_seg(asd_ha, (void *)asd_ha->hw_prof.ue.area,
+				       offs + sizeof(*bc_struct), size);
+		if (err) {
+			kfree(asd_ha->hw_prof.ue.area);
+			asd_ha->hw_prof.ue.area = NULL;
+			asd_ha->hw_prof.ue.num  = 0;
+			asd_ha->hw_prof.ue.size = 0;
+			ASD_DPRINTK("couldn't read ue entries(%d)\n", err);
+		}
+	}
+out2:
+	kfree(bc_struct);
+out:
+	return err;
+}
+
+static void
+asd_hwi_initialize_ocm_dir (struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	/* Zero OCM */
+	for (i = 0; i < OCM_MAX_SIZE; i += 4)
+		asd_write_ocm_dword(asd_ha, i, 0);
+
+	/* Write Dir */
+	asd_write_ocm_seg(asd_ha, &OCMDirInit, 0,
+			  sizeof(struct asd_ocm_dir));
+
+	/* Write Dir Entries */
+	for (i = 0; i < OCM_INIT_DIR_ENTRIES; i++)
+		asd_write_ocm_seg(asd_ha, &OCMDirEntriesInit[i],
+				  sizeof(struct asd_ocm_dir) +
+				  (i * sizeof(struct asd_ocm_dir_ent))
+				  , sizeof(struct asd_ocm_dir_ent));
+
+}
+
+static int
+asd_hwi_check_ocm_access (struct asd_ha_struct *asd_ha)
+{
+	struct pci_dev *pcidev = asd_ha->pcidev;
+	u32 reg;
+	int err = 0;
+	u32 v;
+
+	/* check if OCM has been initialized by BIOS */
+	reg = asd_read_reg_dword(asd_ha, EXSICNFGR);
+
+	if (!(reg & OCMINITIALIZED)) {
+		err = pci_read_config_dword(pcidev, PCIC_INTRPT_STAT, &v);
+		if (err) {
+			asd_printk("couldn't access PCIC_INTRPT_STAT of %s\n",
+					pci_name(pcidev));
+			goto out;
+		}
+
+		printk(KERN_INFO "OCM is not initialized by BIOS,"
+		       "reinitialize it and ignore it, current IntrptStatus"
+		       "is 0x%x\n", v);
+
+		if (v)
+			err = pci_write_config_dword(pcidev,
+						     PCIC_INTRPT_STAT, v);
+		if (err) {
+			asd_printk("couldn't write PCIC_INTRPT_STAT of %s\n",
+					pci_name(pcidev));
+			goto out;
+		}
+
+		asd_hwi_initialize_ocm_dir(asd_ha);
+
+	}
+out:
+	return err;
+}
+
+/**
+ * asd_read_ocm - read on chip memory (OCM)
+ * @asd_ha: pointer to the host adapter structure
+ */
+int asd_read_ocm(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	struct asd_ocm_dir *dir;
+
+	if (asd_hwi_check_ocm_access(asd_ha))
+		return -1;
+
+	dir = kmalloc(sizeof(*dir), GFP_KERNEL);
+	if (!dir) {
+		asd_printk("no memory for ocm dir\n");
+		return -ENOMEM;
+	}
+
+	err = asd_read_ocm_dir(asd_ha, dir, 0);
+	if (err)
+		goto out;
+
+	err = asd_get_bios_chim(asd_ha, dir);
+out:
+	kfree(dir);
+	return err;
+}
+
+/* ---------- FLASH stuff ---------- */
+
+#define FLASH_RESET			0xF0
+
+#define FLASH_SIZE                      0x200000
+#define FLASH_DIR_COOKIE                "*** ADAPTEC FLASH DIRECTORY *** "
+#define FLASH_NEXT_ENTRY_OFFS		0x2000
+#define FLASH_MAX_DIR_ENTRIES		32
+
+#define FLASH_DE_TYPE_MASK              0x3FFFFFFF
+#define FLASH_DE_MS                     0x120
+#define FLASH_DE_CTRL_A_USER            0xE0
+
+struct asd_flash_de {
+	__le32   type;
+	__le32   offs;
+	__le32   pad_size;
+	__le32   image_size;
+	__le32   chksum;
+	u8       _r[12];
+	u8       version[32];
+} __attribute__ ((packed));
+
+struct asd_flash_dir {
+	u8    cookie[32];
+	__le32   rev;		  /* 2 */
+	__le32   chksum;
+	__le32   chksum_antidote;
+	__le32   bld;
+	u8    bld_id[32];	  /* build id data */
+	u8    ver_data[32];	  /* date and time of build */
+	__le32   ae_mask;
+	__le32   v_mask;
+	__le32   oc_mask;
+	u8    _r[20];
+	struct asd_flash_de dir_entry[FLASH_MAX_DIR_ENTRIES];
+} __attribute__ ((packed));
+
+struct asd_manuf_sec {
+	char  sig[2];		  /* 'S', 'M' */
+	u16   offs_next;
+	u8    maj;           /* 0 */
+	u8    min;           /* 0 */
+	u16   chksum;
+	u16   size;
+	u8    _r[6];
+	u8    sas_addr[SAS_ADDR_SIZE];
+	u8    pcba_sn[ASD_PCBA_SN_SIZE];
+	/* Here start the other segments */
+	u8    linked_list[0];
+} __attribute__ ((packed));
+
+struct asd_manuf_phy_desc {
+	u8    state;         /* low 4 bits */
+#define MS_PHY_STATE_ENABLEABLE 0
+#define MS_PHY_STATE_REPORTED   1
+#define MS_PHY_STATE_HIDDEN     2
+	u8    phy_id;
+	u16   _r;
+	u8    phy_control_0; /* mode 5 reg 0x160 */
+	u8    phy_control_1; /* mode 5 reg 0x161 */
+	u8    phy_control_2; /* mode 5 reg 0x162 */
+	u8    phy_control_3; /* mode 5 reg 0x163 */
+} __attribute__ ((packed));
+
+struct asd_manuf_phy_param {
+	char  sig[2];		  /* 'P', 'M' */
+	u16   next;
+	u8    maj;           /* 0 */
+	u8    min;           /* 2 */
+	u8    num_phy_desc;  /* 8 */
+	u8    phy_desc_size; /* 8 */
+	u8    _r[3];
+	u8    usage_model_id;
+	u32   _r2;
+	struct asd_manuf_phy_desc phy_desc[ASD_MAX_PHYS];
+} __attribute__ ((packed));
+
+#if 0
+static const char *asd_sb_type[] = {
+	"unknown",
+	"SGPIO",
+	[2 ... 0x7F] = "unknown",
+	[0x80] = "ADPT_I2C",
+	[0x81 ... 0xFF] = "VENDOR_UNIQUExx"
+};
+#endif
+
+struct asd_ms_sb_desc {
+	u8    type;
+	u8    node_desc_index;
+	u8    conn_desc_index;
+	u8    _recvd[0];
+} __attribute__ ((packed));
+
+#if 0
+static const char *asd_conn_type[] = {
+	[0 ... 7] = "unknown",
+	"SFF8470",
+	"SFF8482",
+	"SFF8484",
+	[0x80] = "PCIX_DAUGHTER0",
+	[0x81] = "SAS_DAUGHTER0",
+	[0x82 ... 0xFF] = "VENDOR_UNIQUExx"
+};
+
+static const char *asd_conn_location[] = {
+	"unknown",
+	"internal",
+	"external",
+	"board_to_board",
+};
+#endif
+
+struct asd_ms_conn_desc {
+	u8    type;
+	u8    location;
+	u8    num_sideband_desc;
+	u8    size_sideband_desc;
+	u32   _resvd;
+	u8    name[16];
+	struct asd_ms_sb_desc sb_desc[0];
+} __attribute__ ((packed));
+
+struct asd_nd_phy_desc {
+	u8    vp_attch_type;
+	u8    attch_specific[0];
+} __attribute__ ((packed));
+
+#if 0
+static const char *asd_node_type[] = {
+	"IOP",
+	"IO_CONTROLLER",
+	"EXPANDER",
+	"PORT_MULTIPLIER",
+	"PORT_MULTIPLEXER",
+	"MULTI_DROP_I2C_BUS",
+};
+#endif
+
+struct asd_ms_node_desc {
+	u8    type;
+	u8    num_phy_desc;
+	u8    size_phy_desc;
+	u8    _resvd;
+	u8    name[16];
+	struct asd_nd_phy_desc phy_desc[0];
+} __attribute__ ((packed));
+
+struct asd_ms_conn_map {
+	char  sig[2];		  /* 'M', 'C' */
+	__le16 next;
+	u8    maj;		  /* 0 */
+	u8    min;		  /* 0 */
+	__le16 cm_size;		  /* size of this struct */
+	u8    num_conn;
+	u8    conn_size;
+	u8    num_nodes;
+	u8    usage_model_id;
+	u32   _resvd;
+	struct asd_ms_conn_desc conn_desc[0];
+	struct asd_ms_node_desc node_desc[0];
+} __attribute__ ((packed));
+
+struct asd_ctrla_phy_entry {
+	u8    sas_addr[SAS_ADDR_SIZE];
+	u8    sas_link_rates;  /* max in hi bits, min in low bits */
+	u8    flags;
+	u8    sata_link_rates;
+	u8    _r[5];
+} __attribute__ ((packed));
+
+struct asd_ctrla_phy_settings {
+	u8    id0;		  /* P'h'y */
+	u8    _r;
+	u16   next;
+	u8    num_phys;	      /* number of PHYs in the PCI function */
+	u8    _r2[3];
+	struct asd_ctrla_phy_entry phy_ent[ASD_MAX_PHYS];
+} __attribute__ ((packed));
+
+struct asd_ll_el {
+	u8   id0;
+	u8   id1;
+	__le16  next;
+	u8   something_here[0];
+} __attribute__ ((packed));
+
+static int asd_poll_flash(struct asd_ha_struct *asd_ha)
+{
+	int c;
+	u8 d;
+
+	for (c = 5000; c > 0; c--) {
+		d  = asd_read_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar);
+		d ^= asd_read_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar);
+		if (!d)
+			return 0;
+		udelay(5);
+	}
+	return -ENOENT;
+}
+
+static int asd_reset_flash(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = asd_poll_flash(asd_ha);
+	if (err)
+		return err;
+	asd_write_reg_byte(asd_ha, asd_ha->hw_prof.flash.bar, FLASH_RESET);
+	err = asd_poll_flash(asd_ha);
+
+	return err;
+}
+
+static inline int asd_read_flash_seg(struct asd_ha_struct *asd_ha,
+				     void *buffer, u32 offs, int size)
+{
+	asd_read_reg_string(asd_ha, buffer, asd_ha->hw_prof.flash.bar+offs,
+			    size);
+	return 0;
+}
+
+/**
+ * asd_find_flash_dir - finds and reads the flash directory
+ * @asd_ha: pointer to the host adapter structure
+ * @flash_dir: pointer to flash directory structure
+ *
+ * If found, the flash directory segment will be copied to
+ * @flash_dir.  Return 1 if found, 0 if not.
+ */
+static int asd_find_flash_dir(struct asd_ha_struct *asd_ha,
+			      struct asd_flash_dir *flash_dir)
+{
+	u32 v;
+	for (v = 0; v < FLASH_SIZE; v += FLASH_NEXT_ENTRY_OFFS) {
+		asd_read_flash_seg(asd_ha, flash_dir, v,
+				   sizeof(FLASH_DIR_COOKIE)-1);
+		if (memcmp(flash_dir->cookie, FLASH_DIR_COOKIE,
+			   sizeof(FLASH_DIR_COOKIE)-1) == 0) {
+			asd_ha->hw_prof.flash.dir_offs = v;
+			asd_read_flash_seg(asd_ha, flash_dir, v,
+					   sizeof(*flash_dir));
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int asd_flash_getid(struct asd_ha_struct *asd_ha)
+{
+	int err = 0;
+	u32 reg;
+
+	reg = asd_read_reg_dword(asd_ha, EXSICNFGR);
+
+	if (!(reg & FLASHEX)) {
+		ASD_DPRINTK("flash doesn't exist\n");
+		return -ENOENT;
+	}
+	if (pci_read_config_dword(asd_ha->pcidev, PCI_CONF_FLSH_BAR,
+				  &asd_ha->hw_prof.flash.bar)) {
+		asd_printk("couldn't read PCI_CONF_FLSH_BAR of %s\n",
+			   pci_name(asd_ha->pcidev));
+		return -ENOENT;
+	}
+	asd_ha->hw_prof.flash.present = 1;
+	asd_ha->hw_prof.flash.wide = reg & FLASHW ? 1 : 0;
+	err = asd_reset_flash(asd_ha);
+	if (err) {
+		ASD_DPRINTK("couldn't reset flash(%d)\n", err);
+		return err;
+	}
+	return 0;
+}
+
+static u16 asd_calc_flash_chksum(u16 *p, int size)
+{
+	u16 chksum = 0;
+
+	while (size-- > 0)
+		chksum += *p++;
+
+	return chksum;
+}
+
+
+static int asd_find_flash_de(struct asd_flash_dir *flash_dir, u32 entry_type,
+			     u32 *offs, u32 *size)
+{
+	int i;
+	struct asd_flash_de *de;
+
+	for (i = 0; i < FLASH_MAX_DIR_ENTRIES; i++) {
+		u32 type = le32_to_cpu(flash_dir->dir_entry[i].type);
+
+		type &= FLASH_DE_TYPE_MASK;
+		if (type == entry_type)
+			break;
+	}
+	if (i >= FLASH_MAX_DIR_ENTRIES)
+		return -ENOENT;
+	de = &flash_dir->dir_entry[i];
+	*offs = le32_to_cpu(de->offs);
+	*size = le32_to_cpu(de->pad_size);
+	return 0;
+}
+
+static int asd_validate_ms(struct asd_manuf_sec *ms)
+{
+	if (ms->sig[0] != 'S' || ms->sig[1] != 'M') {
+		ASD_DPRINTK("manuf sec: no valid sig(%c%c)\n",
+			    ms->sig[0], ms->sig[1]);
+		return -ENOENT;
+	}
+	if (ms->maj != 0) {
+		asd_printk("unsupported manuf. sector. major version:%x\n",
+			   ms->maj);
+		return -ENOENT;
+	}
+	ms->offs_next = le16_to_cpu((__force __le16) ms->offs_next);
+	ms->chksum = le16_to_cpu((__force __le16) ms->chksum);
+	ms->size = le16_to_cpu((__force __le16) ms->size);
+
+	if (asd_calc_flash_chksum((u16 *)ms, ms->size/2)) {
+		asd_printk("failed manuf sector checksum\n");
+	}
+
+	return 0;
+}
+
+static int asd_ms_get_sas_addr(struct asd_ha_struct *asd_ha,
+			       struct asd_manuf_sec *ms)
+{
+	memcpy(asd_ha->hw_prof.sas_addr, ms->sas_addr, SAS_ADDR_SIZE);
+	return 0;
+}
+
+static int asd_ms_get_pcba_sn(struct asd_ha_struct *asd_ha,
+			      struct asd_manuf_sec *ms)
+{
+	memcpy(asd_ha->hw_prof.pcba_sn, ms->pcba_sn, ASD_PCBA_SN_SIZE);
+	asd_ha->hw_prof.pcba_sn[ASD_PCBA_SN_SIZE] = '\0';
+	return 0;
+}
+
+/**
+ * asd_find_ll_by_id - find a linked list entry by its id
+ * @start: void pointer to the first element in the linked list
+ * @id0: the first byte of the id  (offs 0)
+ * @id1: the second byte of the id (offs 1)
+ *
+ * @start has to be the _base_ element start, since the
+ * linked list entries's offset is from this pointer.
+ * Some linked list entries use only the first id, in which case
+ * you can pass 0xFF for the second.
+ */
+static void *asd_find_ll_by_id(void * const start, const u8 id0, const u8 id1)
+{
+	struct asd_ll_el *el = start;
+
+	do {
+		switch (id1) {
+		default:
+			if (el->id1 == id1)
+		case 0xFF:
+				if (el->id0 == id0)
+					return el;
+		}
+		el = start + le16_to_cpu(el->next);
+	} while (el != start);
+
+	return NULL;
+}
+
+/**
+ * asd_ms_get_phy_params - get phy parameters from the manufacturing sector
+ * @asd_ha: pointer to the host adapter structure
+ * @manuf_sec: pointer to the manufacturing sector
+ *
+ * The manufacturing sector contans also the linked list of sub-segments,
+ * since when it was read, its size was taken from the flash directory,
+ * not from the structure size.
+ *
+ * HIDDEN phys do not count in the total count.  REPORTED phys cannot
+ * be enabled but are reported and counted towards the total.
+ * ENEBLEABLE phys are enabled by default and count towards the total.
+ * The absolute total phy number is ASD_MAX_PHYS.  hw_prof->num_phys
+ * merely specifies the number of phys the host adapter decided to
+ * report.  E.g., it is possible for phys 0, 1 and 2 to be HIDDEN,
+ * phys 3, 4 and 5 to be REPORTED and phys 6 and 7 to be ENEBLEABLE.
+ * In this case ASD_MAX_PHYS is 8, hw_prof->num_phys is 5, and only 2
+ * are actually enabled (enabled by default, max number of phys
+ * enableable in this case).
+ */
+static int asd_ms_get_phy_params(struct asd_ha_struct *asd_ha,
+				 struct asd_manuf_sec *manuf_sec)
+{
+	int i;
+	int en_phys = 0;
+	int rep_phys = 0;
+	struct asd_manuf_phy_param *phy_param;
+	struct asd_manuf_phy_param dflt_phy_param;
+
+	phy_param = asd_find_ll_by_id(manuf_sec, 'P', 'M');
+	if (!phy_param) {
+		ASD_DPRINTK("ms: no phy parameters found\n");
+		ASD_DPRINTK("ms: Creating default phy parameters\n");
+		dflt_phy_param.sig[0] = 'P';
+		dflt_phy_param.sig[1] = 'M';
+		dflt_phy_param.maj = 0;
+		dflt_phy_param.min = 2;
+		dflt_phy_param.num_phy_desc = 8;
+		dflt_phy_param.phy_desc_size = sizeof(struct asd_manuf_phy_desc);
+		for (i =0; i < ASD_MAX_PHYS; i++) {
+			dflt_phy_param.phy_desc[i].state = 0;
+			dflt_phy_param.phy_desc[i].phy_id = i;
+			dflt_phy_param.phy_desc[i].phy_control_0 = 0xf6;
+			dflt_phy_param.phy_desc[i].phy_control_1 = 0x10;
+			dflt_phy_param.phy_desc[i].phy_control_2 = 0x43;
+			dflt_phy_param.phy_desc[i].phy_control_3 = 0xeb;
+		}
+
+		phy_param = &dflt_phy_param;
+
+	}
+
+	if (phy_param->maj != 0) {
+		asd_printk("unsupported manuf. phy param major version:0x%x\n",
+			   phy_param->maj);
+		return -ENOENT;
+	}
+
+	ASD_DPRINTK("ms: num_phy_desc: %d\n", phy_param->num_phy_desc);
+	asd_ha->hw_prof.enabled_phys = 0;
+	for (i = 0; i < phy_param->num_phy_desc; i++) {
+		struct asd_manuf_phy_desc *pd = &phy_param->phy_desc[i];
+		switch (pd->state & 0xF) {
+		case MS_PHY_STATE_HIDDEN:
+			ASD_DPRINTK("ms: phy%d: HIDDEN\n", i);
+			continue;
+		case MS_PHY_STATE_REPORTED:
+			ASD_DPRINTK("ms: phy%d: REPORTED\n", i);
+			asd_ha->hw_prof.enabled_phys &= ~(1 << i);
+			rep_phys++;
+			continue;
+		case MS_PHY_STATE_ENABLEABLE:
+			ASD_DPRINTK("ms: phy%d: ENEBLEABLE\n", i);
+			asd_ha->hw_prof.enabled_phys |= (1 << i);
+			en_phys++;
+			break;
+		}
+		asd_ha->hw_prof.phy_desc[i].phy_control_0 = pd->phy_control_0;
+		asd_ha->hw_prof.phy_desc[i].phy_control_1 = pd->phy_control_1;
+		asd_ha->hw_prof.phy_desc[i].phy_control_2 = pd->phy_control_2;
+		asd_ha->hw_prof.phy_desc[i].phy_control_3 = pd->phy_control_3;
+	}
+	asd_ha->hw_prof.max_phys = rep_phys + en_phys;
+	asd_ha->hw_prof.num_phys = en_phys;
+	ASD_DPRINTK("ms: max_phys:0x%x, num_phys:0x%x\n",
+		    asd_ha->hw_prof.max_phys, asd_ha->hw_prof.num_phys);
+	ASD_DPRINTK("ms: enabled_phys:0x%x\n", asd_ha->hw_prof.enabled_phys);
+	return 0;
+}
+
+static int asd_ms_get_connector_map(struct asd_ha_struct *asd_ha,
+				    struct asd_manuf_sec *manuf_sec)
+{
+	struct asd_ms_conn_map *cm;
+
+	cm = asd_find_ll_by_id(manuf_sec, 'M', 'C');
+	if (!cm) {
+		ASD_DPRINTK("ms: no connector map found\n");
+		return 0;
+	}
+
+	if (cm->maj != 0) {
+		ASD_DPRINTK("ms: unsupported: connector map major version 0x%x"
+			    "\n", cm->maj);
+		return -ENOENT;
+	}
+
+	/* XXX */
+
+	return 0;
+}
+
+
+/**
+ * asd_process_ms - find and extract information from the manufacturing sector
+ * @asd_ha: pointer to the host adapter structure
+ * @flash_dir: pointer to the flash directory
+ */
+static int asd_process_ms(struct asd_ha_struct *asd_ha,
+			  struct asd_flash_dir *flash_dir)
+{
+	int err;
+	struct asd_manuf_sec *manuf_sec;
+	u32 offs, size;
+
+	err = asd_find_flash_de(flash_dir, FLASH_DE_MS, &offs, &size);
+	if (err) {
+		ASD_DPRINTK("Couldn't find the manuf. sector\n");
+		goto out;
+	}
+
+	if (size == 0)
+		goto out;
+
+	err = -ENOMEM;
+	manuf_sec = kmalloc(size, GFP_KERNEL);
+	if (!manuf_sec) {
+		ASD_DPRINTK("no mem for manuf sector\n");
+		goto out;
+	}
+
+	err = asd_read_flash_seg(asd_ha, (void *)manuf_sec, offs, size);
+	if (err) {
+		ASD_DPRINTK("couldn't read manuf sector at 0x%x, size 0x%x\n",
+			    offs, size);
+		goto out2;
+	}
+
+	err = asd_validate_ms(manuf_sec);
+	if (err) {
+		ASD_DPRINTK("couldn't validate manuf sector\n");
+		goto out2;
+	}
+
+	err = asd_ms_get_sas_addr(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("couldn't read the SAS_ADDR\n");
+		goto out2;
+	}
+	ASD_DPRINTK("manuf sect SAS_ADDR %llx\n",
+		    SAS_ADDR(asd_ha->hw_prof.sas_addr));
+
+	err = asd_ms_get_pcba_sn(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("couldn't read the PCBA SN\n");
+		goto out2;
+	}
+	ASD_DPRINTK("manuf sect PCBA SN %s\n", asd_ha->hw_prof.pcba_sn);
+
+	err = asd_ms_get_phy_params(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("ms: couldn't get phy parameters\n");
+		goto out2;
+	}
+
+	err = asd_ms_get_connector_map(asd_ha, manuf_sec);
+	if (err) {
+		ASD_DPRINTK("ms: couldn't get connector map\n");
+		goto out2;
+	}
+
+out2:
+	kfree(manuf_sec);
+out:
+	return err;
+}
+
+static int asd_process_ctrla_phy_settings(struct asd_ha_struct *asd_ha,
+					  struct asd_ctrla_phy_settings *ps)
+{
+	int i;
+	for (i = 0; i < ps->num_phys; i++) {
+		struct asd_ctrla_phy_entry *pe = &ps->phy_ent[i];
+
+		if (!PHY_ENABLED(asd_ha, i))
+			continue;
+		if (*(u64 *)pe->sas_addr == 0) {
+			asd_ha->hw_prof.enabled_phys &= ~(1 << i);
+			continue;
+		}
+		/* This is the SAS address which should be sent in IDENTIFY. */
+		memcpy(asd_ha->hw_prof.phy_desc[i].sas_addr, pe->sas_addr,
+		       SAS_ADDR_SIZE);
+		asd_ha->hw_prof.phy_desc[i].max_sas_lrate =
+			(pe->sas_link_rates & 0xF0) >> 4;
+		asd_ha->hw_prof.phy_desc[i].min_sas_lrate =
+			(pe->sas_link_rates & 0x0F);
+		asd_ha->hw_prof.phy_desc[i].max_sata_lrate =
+			(pe->sata_link_rates & 0xF0) >> 4;
+		asd_ha->hw_prof.phy_desc[i].min_sata_lrate =
+			(pe->sata_link_rates & 0x0F);
+		asd_ha->hw_prof.phy_desc[i].flags = pe->flags;
+		ASD_DPRINTK("ctrla: phy%d: sas_addr: %llx, sas rate:0x%x-0x%x,"
+			    " sata rate:0x%x-0x%x, flags:0x%x\n",
+			    i,
+			    SAS_ADDR(asd_ha->hw_prof.phy_desc[i].sas_addr),
+			    asd_ha->hw_prof.phy_desc[i].max_sas_lrate,
+			    asd_ha->hw_prof.phy_desc[i].min_sas_lrate,
+			    asd_ha->hw_prof.phy_desc[i].max_sata_lrate,
+			    asd_ha->hw_prof.phy_desc[i].min_sata_lrate,
+			    asd_ha->hw_prof.phy_desc[i].flags);
+	}
+
+	return 0;
+}
+
+/**
+ * asd_process_ctrl_a_user - process CTRL-A user settings
+ * @asd_ha: pointer to the host adapter structure
+ * @flash_dir: pointer to the flash directory
+ */
+static int asd_process_ctrl_a_user(struct asd_ha_struct *asd_ha,
+				   struct asd_flash_dir *flash_dir)
+{
+	int err, i;
+	u32 offs, size;
+	struct asd_ll_el *el;
+	struct asd_ctrla_phy_settings *ps;
+	struct asd_ctrla_phy_settings dflt_ps;
+
+	err = asd_find_flash_de(flash_dir, FLASH_DE_CTRL_A_USER, &offs, &size);
+	if (err) {
+		ASD_DPRINTK("couldn't find CTRL-A user settings section\n");
+		ASD_DPRINTK("Creating default CTRL-A user settings section\n");
+
+		dflt_ps.id0 = 'h';
+		dflt_ps.num_phys = 8;
+		for (i =0; i < ASD_MAX_PHYS; i++) {
+			memcpy(dflt_ps.phy_ent[i].sas_addr,
+			       asd_ha->hw_prof.sas_addr, SAS_ADDR_SIZE);
+			dflt_ps.phy_ent[i].sas_link_rates = 0x98;
+			dflt_ps.phy_ent[i].flags = 0x0;
+			dflt_ps.phy_ent[i].sata_link_rates = 0x0;
+		}
+
+		size = sizeof(struct asd_ctrla_phy_settings);
+		ps = &dflt_ps;
+	}
+
+	if (size == 0)
+		goto out;
+
+	err = -ENOMEM;
+	el = kmalloc(size, GFP_KERNEL);
+	if (!el) {
+		ASD_DPRINTK("no mem for ctrla user settings section\n");
+		goto out;
+	}
+
+	err = asd_read_flash_seg(asd_ha, (void *)el, offs, size);
+	if (err) {
+		ASD_DPRINTK("couldn't read ctrla phy settings section\n");
+		goto out2;
+	}
+
+	err = -ENOENT;
+	ps = asd_find_ll_by_id(el, 'h', 0xFF);
+	if (!ps) {
+		ASD_DPRINTK("couldn't find ctrla phy settings struct\n");
+		goto out2;
+	}
+
+	err = asd_process_ctrla_phy_settings(asd_ha, ps);
+	if (err) {
+		ASD_DPRINTK("couldn't process ctrla phy settings\n");
+		goto out2;
+	}
+out2:
+	kfree(el);
+out:
+	return err;
+}
+
+/**
+ * asd_read_flash - read flash memory
+ * @asd_ha: pointer to the host adapter structure
+ */
+int asd_read_flash(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	struct asd_flash_dir *flash_dir;
+
+	err = asd_flash_getid(asd_ha);
+	if (err)
+		return err;
+
+	flash_dir = kmalloc(sizeof(*flash_dir), GFP_KERNEL);
+	if (!flash_dir)
+		return -ENOMEM;
+
+	err = -ENOENT;
+	if (!asd_find_flash_dir(asd_ha, flash_dir)) {
+		ASD_DPRINTK("couldn't find flash directory\n");
+		goto out;
+	}
+
+	if (le32_to_cpu(flash_dir->rev) != 2) {
+		asd_printk("unsupported flash dir version:0x%x\n",
+			   le32_to_cpu(flash_dir->rev));
+		goto out;
+	}
+
+	err = asd_process_ms(asd_ha, flash_dir);
+	if (err) {
+		ASD_DPRINTK("couldn't process manuf sector settings\n");
+		goto out;
+	}
+
+	err = asd_process_ctrl_a_user(asd_ha, flash_dir);
+	if (err) {
+		ASD_DPRINTK("couldn't process CTRL-A user settings\n");
+		goto out;
+	}
+
+out:
+	kfree(flash_dir);
+	return err;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_seq.c b/drivers/scsi/aic94xx/aic94xx_seq.c
new file mode 100644
index 0000000..d9b6da5
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_seq.c
@@ -0,0 +1,1404 @@
+/*
+ * Aic94xx SAS/SATA driver sequencer interface.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * Parts of this code adapted from David Chaw's adp94xx_seq.c.
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include "aic94xx_reg.h"
+#include "aic94xx_hwi.h"
+
+#include "aic94xx_seq.h"
+#include "aic94xx_dump.h"
+
+/* It takes no more than 0.05 us for an instruction
+ * to complete. So waiting for 1 us should be more than
+ * plenty.
+ */
+#define PAUSE_DELAY 1
+#define PAUSE_TRIES 1000
+
+static const struct firmware *sequencer_fw;
+static const char *sequencer_version;
+static u16 cseq_vecs[CSEQ_NUM_VECS], lseq_vecs[LSEQ_NUM_VECS], mode2_task,
+	cseq_idle_loop, lseq_idle_loop;
+static u8 *cseq_code, *lseq_code;
+static u32 cseq_code_size, lseq_code_size;
+
+static u16 first_scb_site_no = 0xFFFF;
+static u16 last_scb_site_no;
+
+/* ---------- Pause/Unpause CSEQ/LSEQ ---------- */
+
+/**
+ * asd_pause_cseq - pause the central sequencer
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Return 0 on success, negative on failure.
+ */
+int asd_pause_cseq(struct asd_ha_struct *asd_ha)
+{
+	int	count = PAUSE_TRIES;
+	u32	arp2ctl;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+	if (arp2ctl & PAUSED)
+		return 0;
+
+	asd_write_reg_dword(asd_ha, CARP2CTL, arp2ctl | EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+		if (arp2ctl & PAUSED)
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't pause CSEQ\n");
+	return -1;
+}
+
+/**
+ * asd_unpause_cseq - unpause the central sequencer.
+ * @asd_ha: pointer to host adapter structure.
+ *
+ * Return 0 on success, negative on error.
+ */
+int asd_unpause_cseq(struct asd_ha_struct *asd_ha)
+{
+	u32	arp2ctl;
+	int	count = PAUSE_TRIES;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+	if (!(arp2ctl & PAUSED))
+		return 0;
+
+	asd_write_reg_dword(asd_ha, CARP2CTL, arp2ctl & ~EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, CARP2CTL);
+		if (!(arp2ctl & PAUSED))
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't unpause the CSEQ\n");
+	return -1;
+}
+
+/**
+ * asd_seq_pause_lseq - pause a link sequencer
+ * @asd_ha: pointer to a host adapter structure
+ * @lseq: link sequencer of interest
+ *
+ * Return 0 on success, negative on error.
+ */
+static inline int asd_seq_pause_lseq(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u32    arp2ctl;
+	int    count = PAUSE_TRIES;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+	if (arp2ctl & PAUSED)
+		return 0;
+
+	asd_write_reg_dword(asd_ha, LmARP2CTL(lseq), arp2ctl | EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+		if (arp2ctl & PAUSED)
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't pause LSEQ %d\n", lseq);
+	return -1;
+}
+
+/**
+ * asd_pause_lseq - pause the link sequencer(s)
+ * @asd_ha: pointer to host adapter structure
+ * @lseq_mask: mask of link sequencers of interest
+ *
+ * Return 0 on success, negative on failure.
+ */
+int asd_pause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask)
+{
+	int lseq;
+	int err = 0;
+
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		err = asd_seq_pause_lseq(asd_ha, lseq);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+/**
+ * asd_seq_unpause_lseq - unpause a link sequencer
+ * @asd_ha: pointer to host adapter structure
+ * @lseq: link sequencer of interest
+ *
+ * Return 0 on success, negative on error.
+ */
+static inline int asd_seq_unpause_lseq(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u32 arp2ctl;
+	int count = PAUSE_TRIES;
+
+	arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+	if (!(arp2ctl & PAUSED))
+		return 0;
+
+	asd_write_reg_dword(asd_ha, LmARP2CTL(lseq), arp2ctl & ~EPAUSE);
+	do {
+		arp2ctl = asd_read_reg_dword(asd_ha, LmARP2CTL(lseq));
+		if (!(arp2ctl & PAUSED))
+			return 0;
+		udelay(PAUSE_DELAY);
+	} while (--count > 0);
+
+	ASD_DPRINTK("couldn't unpause LSEQ %d\n", lseq);
+	return 0;
+}
+
+
+/**
+ * asd_unpause_lseq - unpause the link sequencer(s)
+ * @asd_ha: pointer to host adapter structure
+ * @lseq_mask: mask of link sequencers of interest
+ *
+ * Return 0 on success, negative on failure.
+ */
+int asd_unpause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask)
+{
+	int lseq;
+	int err = 0;
+
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		err = asd_seq_unpause_lseq(asd_ha, lseq);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+/* ---------- Downloading CSEQ/LSEQ microcode ---------- */
+
+static int asd_verify_cseq(struct asd_ha_struct *asd_ha, const u8 *_prog,
+			   u32 size)
+{
+	u32 addr = CSEQ_RAM_REG_BASE_ADR;
+	const u32 *prog = (u32 *) _prog;
+	u32 i;
+
+	for (i = 0; i < size; i += 4, prog++, addr += 4) {
+		u32 val = asd_read_reg_dword(asd_ha, addr);
+
+		if (le32_to_cpu(*prog) != val) {
+			asd_printk("%s: cseq verify failed at %u "
+				   "read:0x%x, wanted:0x%x\n",
+				   pci_name(asd_ha->pcidev),
+				   i, val, le32_to_cpu(*prog));
+			return -1;
+		}
+	}
+	ASD_DPRINTK("verified %d bytes, passed\n", size);
+	return 0;
+}
+
+/**
+ * asd_verify_lseq - verify the microcode of a link sequencer
+ * @asd_ha: pointer to host adapter structure
+ * @_prog: pointer to the microcode
+ * @size: size of the microcode in bytes
+ * @lseq: link sequencer of interest
+ *
+ * The link sequencer code is accessed in 4 KB pages, which are selected
+ * by setting LmRAMPAGE (bits 8 and 9) of the LmBISTCTL1 register.
+ * The 10 KB LSEQm instruction code is mapped, page at a time, at
+ * LmSEQRAM address.
+ */
+static int asd_verify_lseq(struct asd_ha_struct *asd_ha, const u8 *_prog,
+			   u32 size, int lseq)
+{
+#define LSEQ_CODEPAGE_SIZE 4096
+	int pages =  (size + LSEQ_CODEPAGE_SIZE - 1) / LSEQ_CODEPAGE_SIZE;
+	u32 page;
+	const u32 *prog = (u32 *) _prog;
+
+	for (page = 0; page < pages; page++) {
+		u32 i;
+
+		asd_write_reg_dword(asd_ha, LmBISTCTL1(lseq),
+				    page << LmRAMPAGE_LSHIFT);
+		for (i = 0; size > 0 && i < LSEQ_CODEPAGE_SIZE;
+		     i += 4, prog++, size-=4) {
+
+			u32 val = asd_read_reg_dword(asd_ha, LmSEQRAM(lseq)+i);
+
+			if (le32_to_cpu(*prog) != val) {
+				asd_printk("%s: LSEQ%d verify failed "
+					   "page:%d, offs:%d\n",
+					   pci_name(asd_ha->pcidev),
+					   lseq, page, i);
+				return -1;
+			}
+		}
+	}
+	ASD_DPRINTK("LSEQ%d verified %d bytes, passed\n", lseq,
+		    (int)((u8 *)prog-_prog));
+	return 0;
+}
+
+/**
+ * asd_verify_seq -- verify CSEQ/LSEQ microcode
+ * @asd_ha: pointer to host adapter structure
+ * @prog: pointer to microcode
+ * @size: size of the microcode
+ * @lseq_mask: if 0, verify CSEQ microcode, else mask of LSEQs of interest
+ *
+ * Return 0 if microcode is correct, negative on mismatch.
+ */
+static int asd_verify_seq(struct asd_ha_struct *asd_ha, const u8 *prog,
+			      u32 size, u8 lseq_mask)
+{
+	if (lseq_mask == 0)
+		return asd_verify_cseq(asd_ha, prog, size);
+	else {
+		int lseq, err;
+
+		for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+			err = asd_verify_lseq(asd_ha, prog, size, lseq);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+#define ASD_DMA_MODE_DOWNLOAD
+#ifdef ASD_DMA_MODE_DOWNLOAD
+/* This is the size of the CSEQ Mapped instruction page */
+#define MAX_DMA_OVLY_COUNT ((1U << 14)-1)
+static int asd_download_seq(struct asd_ha_struct *asd_ha,
+			    const u8 * const prog, u32 size, u8 lseq_mask)
+{
+	u32 comstaten;
+	u32 reg;
+	int page;
+	const int pages = (size + MAX_DMA_OVLY_COUNT - 1) / MAX_DMA_OVLY_COUNT;
+	struct asd_dma_tok *token;
+	int err = 0;
+
+	if (size % 4) {
+		asd_printk("sequencer program not multiple of 4\n");
+		return -1;
+	}
+
+	asd_pause_cseq(asd_ha);
+	asd_pause_lseq(asd_ha, 0xFF);
+
+	/* save, disable and clear interrupts */
+	comstaten = asd_read_reg_dword(asd_ha, COMSTATEN);
+	asd_write_reg_dword(asd_ha, COMSTATEN, 0);
+	asd_write_reg_dword(asd_ha, COMSTAT, COMSTAT_MASK);
+
+	asd_write_reg_dword(asd_ha, CHIMINTEN, RST_CHIMINTEN);
+	asd_write_reg_dword(asd_ha, CHIMINT, CHIMINT_MASK);
+
+	token = asd_alloc_coherent(asd_ha, MAX_DMA_OVLY_COUNT, GFP_KERNEL);
+	if (!token) {
+		asd_printk("out of memory for dma SEQ download\n");
+		err = -ENOMEM;
+		goto out;
+	}
+	ASD_DPRINTK("dma-ing %d bytes\n", size);
+
+	for (page = 0; page < pages; page++) {
+		int i;
+		u32 left = min(size-page*MAX_DMA_OVLY_COUNT,
+			       (u32)MAX_DMA_OVLY_COUNT);
+
+		memcpy(token->vaddr, prog + page*MAX_DMA_OVLY_COUNT, left);
+		asd_write_reg_addr(asd_ha, OVLYDMAADR, token->dma_handle);
+		asd_write_reg_dword(asd_ha, OVLYDMACNT, left);
+		reg = !page ? RESETOVLYDMA : 0;
+		reg |= (STARTOVLYDMA | OVLYHALTERR);
+		reg |= (lseq_mask ? (((u32)lseq_mask) << 8) : OVLYCSEQ);
+		/* Start DMA. */
+		asd_write_reg_dword(asd_ha, OVLYDMACTL, reg);
+
+		for (i = PAUSE_TRIES*100; i > 0; i--) {
+			u32 dmadone = asd_read_reg_dword(asd_ha, OVLYDMACTL);
+			if (!(dmadone & OVLYDMAACT))
+				break;
+			udelay(PAUSE_DELAY);
+		}
+	}
+
+	reg = asd_read_reg_dword(asd_ha, COMSTAT);
+	if (!(reg & OVLYDMADONE) || (reg & OVLYERR)
+	    || (asd_read_reg_dword(asd_ha, CHIMINT) & DEVEXCEPT_MASK)){
+		asd_printk("%s: error DMA-ing sequencer code\n",
+			   pci_name(asd_ha->pcidev));
+		err = -ENODEV;
+	}
+
+	asd_free_coherent(asd_ha, token);
+ out:
+	asd_write_reg_dword(asd_ha, COMSTATEN, comstaten);
+
+	return err ? : asd_verify_seq(asd_ha, prog, size, lseq_mask);
+}
+#else /* ASD_DMA_MODE_DOWNLOAD */
+static int asd_download_seq(struct asd_ha_struct *asd_ha, const u8 *_prog,
+			    u32 size, u8 lseq_mask)
+{
+	int i;
+	u32 reg = 0;
+	const u32 *prog = (u32 *) _prog;
+
+	if (size % 4) {
+		asd_printk("sequencer program not multiple of 4\n");
+		return -1;
+	}
+
+	asd_pause_cseq(asd_ha);
+	asd_pause_lseq(asd_ha, 0xFF);
+
+	reg |= (lseq_mask ? (((u32)lseq_mask) << 8) : OVLYCSEQ);
+	reg |= PIOCMODE;
+
+	asd_write_reg_dword(asd_ha, OVLYDMACNT, size);
+	asd_write_reg_dword(asd_ha, OVLYDMACTL, reg);
+
+	ASD_DPRINTK("downloading %s sequencer%s in PIO mode...\n",
+		    lseq_mask ? "LSEQ" : "CSEQ", lseq_mask ? "s" : "");
+
+	for (i = 0; i < size; i += 4, prog++)
+		asd_write_reg_dword(asd_ha, SPIODATA, *prog);
+
+	reg = (reg & ~PIOCMODE) | OVLYHALTERR;
+	asd_write_reg_dword(asd_ha, OVLYDMACTL, reg);
+
+	return asd_verify_seq(asd_ha, _prog, size, lseq_mask);
+}
+#endif /* ASD_DMA_MODE_DOWNLOAD */
+
+/**
+ * asd_seq_download_seqs - download the sequencer microcode
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Download the central and link sequencer microcode.
+ */
+static int asd_seq_download_seqs(struct asd_ha_struct *asd_ha)
+{
+	int 	err;
+
+	if (!asd_ha->hw_prof.enabled_phys) {
+		asd_printk("%s: no enabled phys!\n", pci_name(asd_ha->pcidev));
+		return -ENODEV;
+	}
+
+	/* Download the CSEQ */
+	ASD_DPRINTK("downloading CSEQ...\n");
+	err = asd_download_seq(asd_ha, cseq_code, cseq_code_size, 0);
+	if (err) {
+		asd_printk("CSEQ download failed:%d\n", err);
+		return err;
+	}
+
+	/* Download the Link Sequencers code. All of the Link Sequencers
+	 * microcode can be downloaded at the same time.
+	 */
+	ASD_DPRINTK("downloading LSEQs...\n");
+	err = asd_download_seq(asd_ha, lseq_code, lseq_code_size,
+			       asd_ha->hw_prof.enabled_phys);
+	if (err) {
+		/* Try it one at a time */
+		u8 lseq;
+		u8 lseq_mask = asd_ha->hw_prof.enabled_phys;
+
+		for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+			err = asd_download_seq(asd_ha, lseq_code,
+					       lseq_code_size, 1<<lseq);
+			if (err)
+				break;
+		}
+	}
+	if (err)
+		asd_printk("LSEQs download failed:%d\n", err);
+
+	return err;
+}
+
+/* ---------- Initializing the chip, chip memory, etc. ---------- */
+
+/**
+ * asd_init_cseq_mip - initialize CSEQ mode independent pages 4-7
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_cseq_mip(struct asd_ha_struct *asd_ha)
+{
+	/* CSEQ Mode Independent, page 4 setup. */
+	asd_write_reg_word(asd_ha, CSEQ_Q_EXE_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EXE_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DONE_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DONE_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_SEND_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_SEND_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DMA2CHIM_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_DMA2CHIM_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_COPY_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_COPY_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_REG0, 0);
+	asd_write_reg_word(asd_ha, CSEQ_REG1, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_REG2, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_LINK_CTL_Q_MAP, 0);
+	{
+		u8 con = asd_read_reg_byte(asd_ha, CCONEXIST);
+		u8 val = hweight8(con);
+		asd_write_reg_byte(asd_ha, CSEQ_MAX_CSEQ_MODE, (val<<4)|val);
+	}
+	asd_write_reg_word(asd_ha, CSEQ_FREE_LIST_HACK_COUNT, 0);
+
+	/* CSEQ Mode independent, page 5 setup. */
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_QUEUE, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_QUEUE+4, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_COUNT, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EST_NEXUS_REQ_COUNT+4, 0);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EST_NEXUS_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EST_NEXUS_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_NEED_EST_NEXUS_SCB, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EST_NEXUS_REQ_HEAD, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EST_NEXUS_REQ_TAIL, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EST_NEXUS_SCB_OFFSET, 0);
+
+	/* CSEQ Mode independent, page 6 setup. */
+	asd_write_reg_word(asd_ha, CSEQ_INT_ROUT_RET_ADDR0, 0);
+	asd_write_reg_word(asd_ha, CSEQ_INT_ROUT_RET_ADDR1, 0);
+	asd_write_reg_word(asd_ha, CSEQ_INT_ROUT_SCBPTR, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_INT_ROUT_MODE, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_ISR_SCRATCH_FLAGS, 0);
+	asd_write_reg_word(asd_ha, CSEQ_ISR_SAVE_SINDEX, 0);
+	asd_write_reg_word(asd_ha, CSEQ_ISR_SAVE_DINDEX, 0);
+	asd_write_reg_word(asd_ha, CSEQ_Q_MONIRTT_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_MONIRTT_TAIL, 0xFFFF);
+	/* Calculate the free scb mask. */
+	{
+		u16 cmdctx = asd_get_cmdctx_size(asd_ha);
+		cmdctx = (~((cmdctx/128)-1)) >> 8;
+		asd_write_reg_byte(asd_ha, CSEQ_FREE_SCB_MASK, (u8)cmdctx);
+	}
+	asd_write_reg_word(asd_ha, CSEQ_BUILTIN_FREE_SCB_HEAD,
+			   first_scb_site_no);
+	asd_write_reg_word(asd_ha, CSEQ_BUILTIN_FREE_SCB_TAIL,
+			   last_scb_site_no);
+	asd_write_reg_word(asd_ha, CSEQ_EXTENDED_FREE_SCB_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_EXTENDED_FREE_SCB_TAIL, 0xFFFF);
+
+	/* CSEQ Mode independent, page 7 setup. */
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_QUEUE, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_QUEUE+4, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_COUNT, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_EMPTY_REQ_COUNT+4, 0);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EMPTY_HEAD, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_Q_EMPTY_TAIL, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_NEED_EMPTY_SCB, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EMPTY_REQ_HEAD, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EMPTY_REQ_TAIL, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_EMPTY_SCB_OFFSET, 0);
+	asd_write_reg_word(asd_ha, CSEQ_PRIMITIVE_DATA, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_TIMEOUT_CONST, 0);
+}
+
+/**
+ * asd_init_cseq_mdp - initialize CSEQ Mode dependent pages
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_cseq_mdp(struct asd_ha_struct *asd_ha)
+{
+	int	i;
+	int	moffs;
+
+	moffs = CSEQ_PAGE_SIZE * 2;
+
+	/* CSEQ Mode dependent, modes 0-7, page 0 setup. */
+	for (i = 0; i < 8; i++) {
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_LRM_SAVE_SINDEX, 0);
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_LRM_SAVE_SCBPTR, 0);
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_Q_LINK_HEAD, 0xFFFF);
+		asd_write_reg_word(asd_ha, i*moffs+CSEQ_Q_LINK_TAIL, 0xFFFF);
+		asd_write_reg_byte(asd_ha, i*moffs+CSEQ_LRM_SAVE_SCRPAGE, 0);
+	}
+
+	/* CSEQ Mode dependent, mode 0-7, page 1 and 2 shall be ignored. */
+
+	/* CSEQ Mode dependent, mode 8, page 0 setup. */
+	asd_write_reg_word(asd_ha, CSEQ_RET_ADDR, 0xFFFF);
+	asd_write_reg_word(asd_ha, CSEQ_RET_SCBPTR, 0);
+	asd_write_reg_word(asd_ha, CSEQ_SAVE_SCBPTR, 0);
+	asd_write_reg_word(asd_ha, CSEQ_EMPTY_TRANS_CTX, 0);
+	asd_write_reg_word(asd_ha, CSEQ_RESP_LEN, 0);
+	asd_write_reg_word(asd_ha, CSEQ_TMF_SCBPTR, 0);
+	asd_write_reg_word(asd_ha, CSEQ_GLOBAL_PREV_SCB, 0);
+	asd_write_reg_word(asd_ha, CSEQ_GLOBAL_HEAD, 0);
+	asd_write_reg_word(asd_ha, CSEQ_CLEAR_LU_HEAD, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_TMF_OPCODE, 0);
+	asd_write_reg_byte(asd_ha, CSEQ_SCRATCH_FLAGS, 0);
+	asd_write_reg_word(asd_ha, CSEQ_HSB_SITE, 0);
+	asd_write_reg_word(asd_ha, CSEQ_FIRST_INV_SCB_SITE,
+			   (u16)last_scb_site_no+1);
+	asd_write_reg_word(asd_ha, CSEQ_FIRST_INV_DDB_SITE,
+			   (u16)asd_ha->hw_prof.max_ddbs);
+
+	/* CSEQ Mode dependent, mode 8, page 1 setup. */
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CLEAR, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CLEAR + 4, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CHECK, 0);
+	asd_write_reg_dword(asd_ha, CSEQ_LUN_TO_CHECK + 4, 0);
+
+	/* CSEQ Mode dependent, mode 8, page 2 setup. */
+	/* Tell the sequencer the bus address of the first SCB. */
+	asd_write_reg_addr(asd_ha, CSEQ_HQ_NEW_POINTER,
+			   asd_ha->seq.next_scb.dma_handle);
+	ASD_DPRINTK("First SCB dma_handle: 0x%llx\n",
+		    (unsigned long long)asd_ha->seq.next_scb.dma_handle);
+
+	/* Tell the sequencer the first Done List entry address. */
+	asd_write_reg_addr(asd_ha, CSEQ_HQ_DONE_BASE,
+			   asd_ha->seq.actual_dl->dma_handle);
+
+	/* Initialize the Q_DONE_POINTER with the least significant
+	 * 4 bytes of the first Done List address. */
+	asd_write_reg_dword(asd_ha, CSEQ_HQ_DONE_POINTER,
+			    ASD_BUSADDR_LO(asd_ha->seq.actual_dl->dma_handle));
+
+	asd_write_reg_byte(asd_ha, CSEQ_HQ_DONE_PASS, ASD_DEF_DL_TOGGLE);
+
+	/* CSEQ Mode dependent, mode 8, page 3 shall be ignored. */
+}
+
+/**
+ * asd_init_cseq_scratch -- setup and init CSEQ
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Setup and initialize Central sequencers. Initialiaze the mode
+ * independent and dependent scratch page to the default settings.
+ */
+static void asd_init_cseq_scratch(struct asd_ha_struct *asd_ha)
+{
+	asd_init_cseq_mip(asd_ha);
+	asd_init_cseq_mdp(asd_ha);
+}
+
+/**
+ * asd_init_lseq_mip -- initialize LSEQ Mode independent pages 0-3
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_lseq_mip(struct asd_ha_struct *asd_ha, u8 lseq)
+{
+	int i;
+
+	/* LSEQ Mode independent page 0 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_Q_TGTXFR_HEAD(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_Q_TGTXFR_TAIL(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_NUMBER(lseq), lseq);
+	asd_write_reg_byte(asd_ha, LmSEQ_SCRATCH_FLAGS(lseq),
+			   ASD_NOTIFY_ENABLE_SPINUP);
+	asd_write_reg_dword(asd_ha, LmSEQ_CONNECTION_STATE(lseq),0x08000000);
+	asd_write_reg_word(asd_ha, LmSEQ_CONCTL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_CONSTAT(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_CONNECTION_MODES(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_REG1_ISR(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_REG2_ISR(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_REG3_ISR(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_REG0_ISR(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_REG0_ISR(lseq)+4, 0);
+
+	/* LSEQ Mode independent page 1 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR0(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR1(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR2(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EST_NEXUS_SCBPTR3(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE0(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE1(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE2(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_OPCODE3(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_HEAD(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_SCB_TAIL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EST_NEXUS_BUF_AVAIL(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_TIMEOUT_CONST(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_ISR_SAVE_SINDEX(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_ISR_SAVE_DINDEX(lseq), 0);
+
+	/* LSEQ Mode Independent page 2 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR0(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR1(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR2(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_SCB_PTR3(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD0(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD1(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD2(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_OPCD3(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_HEAD(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_SCB_TAIL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_EMPTY_BUFS_AVAIL(lseq), 0);
+	for (i = 0; i < 12; i += 4)
+		asd_write_reg_dword(asd_ha, LmSEQ_ATA_SCR_REGS(lseq) + i, 0);
+
+	/* LSEQ Mode Independent page 3 setup. */
+
+	/* Device present timer timeout */
+	asd_write_reg_dword(asd_ha, LmSEQ_DEV_PRES_TMR_TOUT_CONST(lseq),
+			    ASD_DEV_PRESENT_TIMEOUT);
+
+	/* SATA interlock timer disabled */
+	asd_write_reg_dword(asd_ha, LmSEQ_SATA_INTERLOCK_TIMEOUT(lseq),
+			    ASD_SATA_INTERLOCK_TIMEOUT);
+
+	/* STP shutdown timer timeout constant, IGNORED by the sequencer,
+	 * always 0. */
+	asd_write_reg_dword(asd_ha, LmSEQ_STP_SHUTDOWN_TIMEOUT(lseq),
+			    ASD_STP_SHUTDOWN_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_SRST_ASSERT_TIMEOUT(lseq),
+			    ASD_SRST_ASSERT_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_RCV_FIS_TIMEOUT(lseq),
+			    ASD_RCV_FIS_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_ONE_MILLISEC_TIMEOUT(lseq),
+			    ASD_ONE_MILLISEC_TIMEOUT);
+
+	/* COM_INIT timer */
+	asd_write_reg_dword(asd_ha, LmSEQ_TEN_MS_COMINIT_TIMEOUT(lseq),
+			    ASD_TEN_MILLISEC_TIMEOUT);
+
+	asd_write_reg_dword(asd_ha, LmSEQ_SMP_RCV_TIMEOUT(lseq),
+			    ASD_SMP_RCV_TIMEOUT);
+}
+
+/**
+ * asd_init_lseq_mdp -- initialize LSEQ mode dependent pages.
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_lseq_mdp(struct asd_ha_struct *asd_ha,  int lseq)
+{
+	int    i;
+	u32    moffs;
+	u16 ret_addr[] = {
+		0xFFFF,		  /* mode 0 */
+		0xFFFF,		  /* mode 1 */
+		mode2_task,	  /* mode 2 */
+		0,
+		0xFFFF,		  /* mode 4/5 */
+		0xFFFF,		  /* mode 4/5 */
+	};
+
+	/*
+	 * Mode 0,1,2 and 4/5 have common field on page 0 for the first
+	 * 14 bytes.
+	 */
+	for (i = 0; i < 3; i++) {
+		moffs = i * LSEQ_MODE_SCRATCH_SIZE;
+		asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR(lseq)+moffs,
+				   ret_addr[i]);
+		asd_write_reg_word(asd_ha, LmSEQ_REG0_MODE(lseq)+moffs, 0);
+		asd_write_reg_word(asd_ha, LmSEQ_MODE_FLAGS(lseq)+moffs, 0);
+		asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR2(lseq)+moffs,0xFFFF);
+		asd_write_reg_word(asd_ha, LmSEQ_RET_ADDR1(lseq)+moffs,0xFFFF);
+		asd_write_reg_byte(asd_ha, LmSEQ_OPCODE_TO_CSEQ(lseq)+moffs,0);
+		asd_write_reg_word(asd_ha, LmSEQ_DATA_TO_CSEQ(lseq)+moffs,0);
+	}
+	/*
+	 *  Mode 5 page 0 overlaps the same scratch page with Mode 0 page 3.
+	 */
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_RET_ADDR(lseq)+LSEQ_MODE5_PAGE0_OFFSET,
+			   ret_addr[5]);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_REG0_MODE(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_MODE_FLAGS(lseq)+LSEQ_MODE5_PAGE0_OFFSET, 0);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_RET_ADDR2(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0xFFFF);
+	asd_write_reg_word(asd_ha,
+			 LmSEQ_RET_ADDR1(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0xFFFF);
+	asd_write_reg_byte(asd_ha,
+		         LmSEQ_OPCODE_TO_CSEQ(lseq)+LSEQ_MODE5_PAGE0_OFFSET,0);
+	asd_write_reg_word(asd_ha,
+		         LmSEQ_DATA_TO_CSEQ(lseq)+LSEQ_MODE5_PAGE0_OFFSET, 0);
+
+	/* LSEQ Mode dependent 0, page 0 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_FIRST_INV_DDB_SITE(lseq),
+			   (u16)asd_ha->hw_prof.max_ddbs);
+	asd_write_reg_word(asd_ha, LmSEQ_EMPTY_TRANS_CTX(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_RESP_LEN(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_FIRST_INV_SCB_SITE(lseq),
+			   (u16)last_scb_site_no+1);
+	asd_write_reg_word(asd_ha, LmSEQ_INTEN_SAVE(lseq),
+			    (u16) LmM0INTEN_MASK & 0xFFFF0000 >> 16);
+	asd_write_reg_word(asd_ha, LmSEQ_INTEN_SAVE(lseq) + 2,
+			    (u16) LmM0INTEN_MASK & 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_FRM_LEN(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_PROTOCOL(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_RESP_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_LAST_LOADED_SGE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_SAVE_SCBPTR(lseq), 0);
+
+	/* LSEQ mode dependent, mode 1, page 0 setup. */
+	asd_write_reg_word(asd_ha, LmSEQ_Q_XMIT_HEAD(lseq), 0xFFFF);
+	asd_write_reg_word(asd_ha, LmSEQ_M1_EMPTY_TRANS_CTX(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_INI_CONN_TAG(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_FAILED_OPEN_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_XMIT_REQUEST_TYPE(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_M1_RESP_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_M1_LAST_LOADED_SGE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_M1_SAVE_SCBPTR(lseq), 0);
+
+	/* LSEQ Mode dependent mode 2, page 0 setup */
+	asd_write_reg_word(asd_ha, LmSEQ_PORT_COUNTER(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_PM_TABLE_PTR(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_SATA_INTERLOCK_TMR_SAVE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_IP_BITL(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_COPY_SMP_CONN_TAG(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_P0M2_OFFS1AH(lseq), 0);
+
+	/* LSEQ Mode dependent, mode 4/5, page 0 setup. */
+	asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_STATUS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_MODE(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_Q_LINK_HEAD(lseq), 0xFFFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RST_ERR(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SAVED_OOB_SIGNALS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SAS_RESET_MODE(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_LINK_RESET_RETRY_COUNT(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_NUM_LINK_RESET_RETRIES(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_OOB_INT_ENABLES(lseq), 0);
+	/*
+	 * Set the desired interval between transmissions of the NOTIFY
+	 * (ENABLE SPINUP) primitive.  Must be initilized to val - 1.
+	 */
+	asd_write_reg_word(asd_ha, LmSEQ_NOTIFY_TIMER_TIMEOUT(lseq),
+			   ASD_NOTIFY_TIMEOUT - 1);
+	/* No delay for the first NOTIFY to be sent to the attached target. */
+	asd_write_reg_word(asd_ha, LmSEQ_NOTIFY_TIMER_DOWN_COUNT(lseq),
+			   ASD_NOTIFY_DOWN_COUNT);
+
+	/* LSEQ Mode dependent, mode 0 and 1, page 1 setup. */
+	for (i = 0; i < 2; i++)	{
+		int j;
+		/* Start from Page 1 of Mode 0 and 1. */
+		moffs = LSEQ_PAGE_SIZE + i*LSEQ_MODE_SCRATCH_SIZE;
+		/* All the fields of page 1 can be intialized to 0. */
+		for (j = 0; j < LSEQ_PAGE_SIZE; j += 4)
+			asd_write_reg_dword(asd_ha, LmSCRATCH(lseq)+moffs+j,0);
+	}
+
+	/* LSEQ Mode dependent, mode 2, page 1 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_INVALID_DWORD_COUNT(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_DISPARITY_ERROR_COUNT(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_LOSS_OF_SYNC_COUNT(lseq), 0);
+
+	/* LSEQ Mode dependent, mode 4/5, page 1. */
+	for (i = 0; i < LSEQ_PAGE_SIZE; i+=4)
+		asd_write_reg_dword(asd_ha, LmSEQ_FRAME_TYPE_MASK(lseq)+i, 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_FRAME_TYPE_MASK(lseq), 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq), 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq)+1,0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_DEST_ADDR_MASK(lseq)+2,0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq), 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq)+1, 0xFF);
+	asd_write_reg_byte(asd_ha, LmSEQ_HASHED_SRC_ADDR_MASK(lseq)+2, 0xFF);
+	asd_write_reg_dword(asd_ha, LmSEQ_DATA_OFFSET(lseq), 0xFFFFFFFF);
+
+	/* LSEQ Mode dependent, mode 0, page 2 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_SMP_RCV_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_DEVICE_BITS(lseq), 0);
+	asd_write_reg_word(asd_ha, LmSEQ_SDB_DDB(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SDB_NUM_TAGS(lseq), 0);
+	asd_write_reg_byte(asd_ha, LmSEQ_SDB_CURR_TAG(lseq), 0);
+
+	/* LSEQ Mode Dependent 1, page 2 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_TX_ID_ADDR_FRAME(lseq)+4, 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_OPEN_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_SRST_AS_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_LAST_LOADED_SG_EL(lseq), 0);
+
+	/* LSEQ Mode Dependent 2, page 2 setup. */
+	/* The LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS is IGNORED by the sequencer,
+	 * i.e. always 0. */
+	asd_write_reg_dword(asd_ha, LmSEQ_STP_SHUTDOWN_TIMER_TERM_TS(lseq),0);
+	asd_write_reg_dword(asd_ha, LmSEQ_CLOSE_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_BREAK_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_DWS_RESET_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha,LmSEQ_SATA_INTERLOCK_TIMER_TERM_TS(lseq),0);
+	asd_write_reg_dword(asd_ha, LmSEQ_MCTL_TIMER_TERM_TS(lseq), 0);
+
+	/* LSEQ Mode Dependent 4/5, page 2 setup. */
+	asd_write_reg_dword(asd_ha, LmSEQ_COMINIT_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_RCV_ID_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_RCV_FIS_TIMER_TERM_TS(lseq), 0);
+	asd_write_reg_dword(asd_ha, LmSEQ_DEV_PRES_TIMER_TERM_TS(lseq),	0);
+}
+
+/**
+ * asd_init_lseq_scratch -- setup and init link sequencers
+ * @asd_ha: pointer to host adapter struct
+ */
+static void asd_init_lseq_scratch(struct asd_ha_struct *asd_ha)
+{
+	u8 lseq;
+	u8 lseq_mask;
+
+	lseq_mask = asd_ha->hw_prof.enabled_phys;
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		asd_init_lseq_mip(asd_ha, lseq);
+		asd_init_lseq_mdp(asd_ha, lseq);
+	}
+}
+
+/**
+ * asd_init_scb_sites -- initialize sequencer SCB sites (memory).
+ * @asd_ha: pointer to host adapter structure
+ *
+ * This should be done before initializing common CSEQ and LSEQ
+ * scratch since those areas depend on some computed values here,
+ * last_scb_site_no, etc.
+ */
+static void asd_init_scb_sites(struct asd_ha_struct *asd_ha)
+{
+	u16	site_no;
+	u16     max_scbs = 0;
+
+	for (site_no = asd_ha->hw_prof.max_scbs-1;
+	     site_no != (u16) -1;
+	     site_no--) {
+		u16	i;
+
+		/* Initialize all fields in the SCB site to 0. */
+		for (i = 0; i < ASD_SCB_SIZE; i += 4)
+			asd_scbsite_write_dword(asd_ha, site_no, i, 0);
+
+		/* Workaround needed by SEQ to fix a SATA issue is to exclude
+		 * certain SCB sites from the free list. */
+		if (!SCB_SITE_VALID(site_no))
+			continue;
+
+		if (last_scb_site_no == 0)
+			last_scb_site_no = site_no;
+
+		/* For every SCB site, we need to initialize the
+		 * following fields: Q_NEXT, SCB_OPCODE, SCB_FLAGS,
+		 * and SG Element Flag. */
+
+		/* Q_NEXT field of the last SCB is invalidated. */
+		asd_scbsite_write_word(asd_ha, site_no, 0, first_scb_site_no);
+
+		/* Initialize SCB Site Opcode field to invalid. */
+		asd_scbsite_write_byte(asd_ha, site_no,
+				       offsetof(struct scb_header, opcode),
+				       0xFF);
+
+		/* Initialize SCB Site Flags field to mean a response
+		 * frame has been received.  This means inadvertent
+		 * frames received to be dropped. */
+		asd_scbsite_write_byte(asd_ha, site_no, 0x49, 0x01);
+
+		first_scb_site_no = site_no;
+		max_scbs++;
+	}
+	asd_ha->hw_prof.max_scbs = max_scbs;
+	ASD_DPRINTK("max_scbs:%d\n", asd_ha->hw_prof.max_scbs);
+	ASD_DPRINTK("first_scb_site_no:0x%x\n", first_scb_site_no);
+	ASD_DPRINTK("last_scb_site_no:0x%x\n", last_scb_site_no);
+}
+
+/**
+ * asd_init_cseq_cio - initialize CSEQ CIO registers
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_cseq_cio(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	asd_write_reg_byte(asd_ha, CSEQCOMINTEN, 0);
+	asd_write_reg_byte(asd_ha, CSEQDLCTL, ASD_DL_SIZE_BITS);
+	asd_write_reg_byte(asd_ha, CSEQDLOFFS, 0);
+	asd_write_reg_byte(asd_ha, CSEQDLOFFS+1, 0);
+	asd_ha->seq.scbpro = 0;
+	asd_write_reg_dword(asd_ha, SCBPRO, 0);
+	asd_write_reg_dword(asd_ha, CSEQCON, 0);
+
+	/* Intialize CSEQ Mode 11 Interrupt Vectors.
+	 * The addresses are 16 bit wide and in dword units.
+	 * The values of their macros are in byte units.
+	 * Thus we have to divide by 4. */
+	asd_write_reg_word(asd_ha, CM11INTVEC0, cseq_vecs[0]);
+	asd_write_reg_word(asd_ha, CM11INTVEC1, cseq_vecs[1]);
+	asd_write_reg_word(asd_ha, CM11INTVEC2, cseq_vecs[2]);
+
+	/* Enable ARP2HALTC (ARP2 Halted from Halt Code Write). */
+	asd_write_reg_byte(asd_ha, CARP2INTEN, EN_ARP2HALTC);
+
+	/* Initialize CSEQ Scratch Page to 0x04. */
+	asd_write_reg_byte(asd_ha, CSCRATCHPAGE, 0x04);
+
+	/* Initialize CSEQ Mode[0-8] Dependent registers. */
+	/* Initialize Scratch Page to 0. */
+	for (i = 0; i < 9; i++)
+		asd_write_reg_byte(asd_ha, CMnSCRATCHPAGE(i), 0);
+
+	/* Reset the ARP2 Program Count. */
+	asd_write_reg_word(asd_ha, CPRGMCNT, cseq_idle_loop);
+
+	for (i = 0; i < 8; i++) {
+		/* Intialize Mode n Link m Interrupt Enable. */
+		asd_write_reg_dword(asd_ha, CMnINTEN(i), EN_CMnRSPMBXF);
+		/* Initialize Mode n Request Mailbox. */
+		asd_write_reg_dword(asd_ha, CMnREQMBX(i), 0);
+	}
+}
+
+/**
+ * asd_init_lseq_cio -- initialize LmSEQ CIO registers
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_init_lseq_cio(struct asd_ha_struct *asd_ha, int lseq)
+{
+	u8  *sas_addr;
+	int  i;
+
+	/* Enable ARP2HALTC (ARP2 Halted from Halt Code Write). */
+	asd_write_reg_dword(asd_ha, LmARP2INTEN(lseq), EN_ARP2HALTC);
+
+	asd_write_reg_byte(asd_ha, LmSCRATCHPAGE(lseq), 0);
+
+	/* Initialize Mode 0,1, and 2 SCRATCHPAGE to 0. */
+	for (i = 0; i < 3; i++)
+		asd_write_reg_byte(asd_ha, LmMnSCRATCHPAGE(lseq, i), 0);
+
+	/* Initialize Mode 5 SCRATCHPAGE to 0. */
+	asd_write_reg_byte(asd_ha, LmMnSCRATCHPAGE(lseq, 5), 0);
+
+	asd_write_reg_dword(asd_ha, LmRSPMBX(lseq), 0);
+	/* Initialize Mode 0,1,2 and 5 Interrupt Enable and
+	 * Interrupt registers. */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 0), LmM0INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 0), 0xFFFFFFFF);
+	/* Mode 1 */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 1), LmM1INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 1), 0xFFFFFFFF);
+	/* Mode 2 */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 2), LmM2INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 2), 0xFFFFFFFF);
+	/* Mode 5 */
+	asd_write_reg_dword(asd_ha, LmMnINTEN(lseq, 5), LmM5INTEN_MASK);
+	asd_write_reg_dword(asd_ha, LmMnINT(lseq, 5), 0xFFFFFFFF);
+
+	/* Enable HW Timer status. */
+	asd_write_reg_byte(asd_ha, LmHWTSTATEN(lseq), LmHWTSTATEN_MASK);
+
+	/* Enable Primitive Status 0 and 1. */
+	asd_write_reg_dword(asd_ha, LmPRIMSTAT0EN(lseq), LmPRIMSTAT0EN_MASK);
+	asd_write_reg_dword(asd_ha, LmPRIMSTAT1EN(lseq), LmPRIMSTAT1EN_MASK);
+
+	/* Enable Frame Error. */
+	asd_write_reg_dword(asd_ha, LmFRMERREN(lseq), LmFRMERREN_MASK);
+	asd_write_reg_byte(asd_ha, LmMnHOLDLVL(lseq, 0), 0x50);
+
+	/* Initialize Mode 0 Transfer Level to 512. */
+	asd_write_reg_byte(asd_ha,  LmMnXFRLVL(lseq, 0), LmMnXFRLVL_512);
+	/* Initialize Mode 1 Transfer Level to 256. */
+	asd_write_reg_byte(asd_ha, LmMnXFRLVL(lseq, 1), LmMnXFRLVL_256);
+
+	/* Initialize Program Count. */
+	asd_write_reg_word(asd_ha, LmPRGMCNT(lseq), lseq_idle_loop);
+
+	/* Enable Blind SG Move. */
+	asd_write_reg_dword(asd_ha, LmMODECTL(lseq), LmBLIND48);
+	asd_write_reg_word(asd_ha, LmM3SATATIMER(lseq),
+			   ASD_SATA_INTERLOCK_TIMEOUT);
+
+	(void) asd_read_reg_dword(asd_ha, LmREQMBX(lseq));
+
+	/* Clear Primitive Status 0 and 1. */
+	asd_write_reg_dword(asd_ha, LmPRMSTAT0(lseq), 0xFFFFFFFF);
+	asd_write_reg_dword(asd_ha, LmPRMSTAT1(lseq), 0xFFFFFFFF);
+
+	/* Clear HW Timer status. */
+	asd_write_reg_byte(asd_ha, LmHWTSTAT(lseq), 0xFF);
+
+	/* Clear DMA Errors for Mode 0 and 1. */
+	asd_write_reg_byte(asd_ha, LmMnDMAERRS(lseq, 0), 0xFF);
+	asd_write_reg_byte(asd_ha, LmMnDMAERRS(lseq, 1), 0xFF);
+
+	/* Clear SG DMA Errors for Mode 0 and 1. */
+	asd_write_reg_byte(asd_ha, LmMnSGDMAERRS(lseq, 0), 0xFF);
+	asd_write_reg_byte(asd_ha, LmMnSGDMAERRS(lseq, 1), 0xFF);
+
+	/* Clear Mode 0 Buffer Parity Error. */
+	asd_write_reg_byte(asd_ha, LmMnBUFSTAT(lseq, 0), LmMnBUFPERR);
+
+	/* Clear Mode 0 Frame Error register. */
+	asd_write_reg_dword(asd_ha, LmMnFRMERR(lseq, 0), 0xFFFFFFFF);
+
+	/* Reset LSEQ external interrupt arbiter. */
+	asd_write_reg_byte(asd_ha, LmARP2INTCTL(lseq), RSTINTCTL);
+
+	/* Set the Phy SAS for the LmSEQ WWN. */
+	sas_addr = asd_ha->phys[lseq].phy_desc->sas_addr;
+	for (i = 0; i < SAS_ADDR_SIZE; i++)
+		asd_write_reg_byte(asd_ha, LmWWN(lseq) + i, sas_addr[i]);
+
+	/* Set the Transmit Size to 1024 bytes, 0 = 256 Dwords. */
+	asd_write_reg_byte(asd_ha, LmMnXMTSIZE(lseq, 1), 0);
+
+	/* Set the Bus Inactivity Time Limit Timer. */
+	asd_write_reg_word(asd_ha, LmBITL_TIMER(lseq), 9);
+
+	/* Enable SATA Port Multiplier. */
+	asd_write_reg_byte(asd_ha, LmMnSATAFS(lseq, 1), 0x80);
+
+	/* Initialize Interrupt Vector[0-10] address in Mode 3.
+	 * See the comment on CSEQ_INT_* */
+	asd_write_reg_word(asd_ha, LmM3INTVEC0(lseq), lseq_vecs[0]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC1(lseq), lseq_vecs[1]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC2(lseq), lseq_vecs[2]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC3(lseq), lseq_vecs[3]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC4(lseq), lseq_vecs[4]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC5(lseq), lseq_vecs[5]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC6(lseq), lseq_vecs[6]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC7(lseq), lseq_vecs[7]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC8(lseq), lseq_vecs[8]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC9(lseq), lseq_vecs[9]);
+	asd_write_reg_word(asd_ha, LmM3INTVEC10(lseq), lseq_vecs[10]);
+	/*
+	 * Program the Link LED control, applicable only for
+	 * Chip Rev. B or later.
+	 */
+	asd_write_reg_dword(asd_ha, LmCONTROL(lseq),
+			    (LEDTIMER | LEDMODE_TXRX | LEDTIMERS_100ms));
+
+	/* Set the Align Rate for SAS and STP mode. */
+	asd_write_reg_byte(asd_ha, LmM1SASALIGN(lseq), SAS_ALIGN_DEFAULT);
+	asd_write_reg_byte(asd_ha, LmM1STPALIGN(lseq), STP_ALIGN_DEFAULT);
+}
+
+
+/**
+ * asd_post_init_cseq -- clear CSEQ Mode n Int. status and Response mailbox
+ * @asd_ha: pointer to host adapter struct
+ */
+static void asd_post_init_cseq(struct asd_ha_struct *asd_ha)
+{
+	int i;
+
+	for (i = 0; i < 8; i++)
+		asd_write_reg_dword(asd_ha, CMnINT(i), 0xFFFFFFFF);
+	for (i = 0; i < 8; i++)
+		asd_read_reg_dword(asd_ha, CMnRSPMBX(i));
+	/* Reset the external interrupt arbiter. */
+	asd_write_reg_byte(asd_ha, CARP2INTCTL, RSTINTCTL);
+}
+
+/**
+ * asd_init_ddb_0 -- initialize DDB 0
+ * @asd_ha: pointer to host adapter structure
+ *
+ * Initialize DDB site 0 which is used internally by the sequencer.
+ */
+static void asd_init_ddb_0(struct asd_ha_struct *asd_ha)
+{
+	int	i;
+
+	/* Zero out the DDB explicitly */
+	for (i = 0; i < sizeof(struct asd_ddb_seq_shared); i+=4)
+		asd_ddbsite_write_dword(asd_ha, 0, i, 0);
+
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_free_ddb_head), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_free_ddb_tail),
+			       asd_ha->hw_prof.max_ddbs-1);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_free_ddb_cnt), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_used_ddb_head), 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, q_used_ddb_tail), 0xFFFF);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, shared_mem_lock), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, smp_conn_tag), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, est_nexus_buf_cnt), 0);
+	asd_ddbsite_write_word(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, est_nexus_buf_thresh),
+			       asd_ha->hw_prof.num_phys * 2);
+	asd_ddbsite_write_byte(asd_ha, 0,
+		 offsetof(struct asd_ddb_seq_shared, settable_max_contexts),0);
+	asd_ddbsite_write_byte(asd_ha, 0,
+	       offsetof(struct asd_ddb_seq_shared, conn_not_active), 0xFF);
+	asd_ddbsite_write_byte(asd_ha, 0,
+	       offsetof(struct asd_ddb_seq_shared, phy_is_up), 0x00);
+	/* DDB 0 is reserved */
+	set_bit(0, asd_ha->hw_prof.ddb_bitmap);
+}
+
+/**
+ * asd_seq_setup_seqs -- setup and initialize central and link sequencers
+ * @asd_ha: pointer to host adapter structure
+ */
+static void asd_seq_setup_seqs(struct asd_ha_struct *asd_ha)
+{
+	int 		lseq;
+	u8		lseq_mask;
+
+	/* Initialize SCB sites. Done first to compute some values which
+	 * the rest of the init code depends on. */
+	asd_init_scb_sites(asd_ha);
+
+	/* Initialize CSEQ Scratch RAM registers. */
+	asd_init_cseq_scratch(asd_ha);
+
+	/* Initialize LmSEQ Scratch RAM registers. */
+	asd_init_lseq_scratch(asd_ha);
+
+	/* Initialize CSEQ CIO registers. */
+	asd_init_cseq_cio(asd_ha);
+
+	asd_init_ddb_0(asd_ha);
+
+	/* Initialize LmSEQ CIO registers. */
+	lseq_mask = asd_ha->hw_prof.enabled_phys;
+	for_each_sequencer(lseq_mask, lseq_mask, lseq)
+		asd_init_lseq_cio(asd_ha, lseq);
+	asd_post_init_cseq(asd_ha);
+}
+
+
+/**
+ * asd_seq_start_cseq -- start the central sequencer, CSEQ
+ * @asd_ha: pointer to host adapter structure
+ */
+static int asd_seq_start_cseq(struct asd_ha_struct *asd_ha)
+{
+	/* Reset the ARP2 instruction to location zero. */
+	asd_write_reg_word(asd_ha, CPRGMCNT, cseq_idle_loop);
+
+	/* Unpause the CSEQ  */
+	return asd_unpause_cseq(asd_ha);
+}
+
+/**
+ * asd_seq_start_lseq -- start a link sequencer
+ * @asd_ha: pointer to host adapter structure
+ * @lseq: the link sequencer of interest
+ */
+static int asd_seq_start_lseq(struct asd_ha_struct *asd_ha, int lseq)
+{
+	/* Reset the ARP2 instruction to location zero. */
+	asd_write_reg_word(asd_ha, LmPRGMCNT(lseq), lseq_idle_loop);
+
+	/* Unpause the LmSEQ  */
+	return asd_seq_unpause_lseq(asd_ha, lseq);
+}
+
+static int asd_request_firmware(struct asd_ha_struct *asd_ha)
+{
+	int err, i;
+	struct sequencer_file_header header, *hdr_ptr;
+	u32 csum = 0;
+	u16 *ptr_cseq_vecs, *ptr_lseq_vecs;
+
+	if (sequencer_fw)
+		/* already loaded */
+		return 0;
+
+	err = request_firmware(&sequencer_fw,
+			       SAS_RAZOR_SEQUENCER_FW_FILE,
+			       &asd_ha->pcidev->dev);
+	if (err)
+		return err;
+
+	hdr_ptr = (struct sequencer_file_header *)sequencer_fw->data;
+
+	header.csum = le32_to_cpu(hdr_ptr->csum);
+	header.major = le32_to_cpu(hdr_ptr->major);
+	header.minor = le32_to_cpu(hdr_ptr->minor);
+	sequencer_version = hdr_ptr->version;
+	header.cseq_table_offset = le32_to_cpu(hdr_ptr->cseq_table_offset);
+	header.cseq_table_size = le32_to_cpu(hdr_ptr->cseq_table_size);
+	header.lseq_table_offset = le32_to_cpu(hdr_ptr->lseq_table_offset);
+	header.lseq_table_size = le32_to_cpu(hdr_ptr->lseq_table_size);
+	header.cseq_code_offset = le32_to_cpu(hdr_ptr->cseq_code_offset);
+	header.cseq_code_size = le32_to_cpu(hdr_ptr->cseq_code_size);
+	header.lseq_code_offset = le32_to_cpu(hdr_ptr->lseq_code_offset);
+	header.lseq_code_size = le32_to_cpu(hdr_ptr->lseq_code_size);
+	header.mode2_task = le16_to_cpu(hdr_ptr->mode2_task);
+	header.cseq_idle_loop = le16_to_cpu(hdr_ptr->cseq_idle_loop);
+	header.lseq_idle_loop = le16_to_cpu(hdr_ptr->lseq_idle_loop);
+
+	for (i = sizeof(header.csum); i < sequencer_fw->size; i++)
+		csum += sequencer_fw->data[i];
+
+	if (csum != header.csum) {
+		asd_printk("Firmware file checksum mismatch\n");
+		return -EINVAL;
+	}
+
+	if (header.cseq_table_size != CSEQ_NUM_VECS ||
+	    header.lseq_table_size != LSEQ_NUM_VECS) {
+		asd_printk("Firmware file table size mismatch\n");
+		return -EINVAL;
+	}
+
+	ptr_cseq_vecs = (u16 *)&sequencer_fw->data[header.cseq_table_offset];
+	ptr_lseq_vecs = (u16 *)&sequencer_fw->data[header.lseq_table_offset];
+	mode2_task = header.mode2_task;
+	cseq_idle_loop = header.cseq_idle_loop;
+	lseq_idle_loop = header.lseq_idle_loop;
+
+	for (i = 0; i < CSEQ_NUM_VECS; i++)
+		cseq_vecs[i] = le16_to_cpu(ptr_cseq_vecs[i]);
+
+	for (i = 0; i < LSEQ_NUM_VECS; i++)
+		lseq_vecs[i] = le16_to_cpu(ptr_lseq_vecs[i]);
+
+	cseq_code = &sequencer_fw->data[header.cseq_code_offset];
+	cseq_code_size = header.cseq_code_size;
+	lseq_code = &sequencer_fw->data[header.lseq_code_offset];
+	lseq_code_size = header.lseq_code_size;
+
+	return 0;
+}
+
+int asd_init_seqs(struct asd_ha_struct *asd_ha)
+{
+	int err;
+
+	err = asd_request_firmware(asd_ha);
+
+	if (err) {
+		asd_printk("Failed to load sequencer firmware file %s, error %d\n",
+			   SAS_RAZOR_SEQUENCER_FW_FILE, err);
+		return err;
+	}
+
+	asd_printk("using sequencer %s\n", sequencer_version);
+	err = asd_seq_download_seqs(asd_ha);
+	if (err) {
+		asd_printk("couldn't download sequencers for %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+
+	asd_seq_setup_seqs(asd_ha);
+
+	return 0;
+}
+
+int asd_start_seqs(struct asd_ha_struct *asd_ha)
+{
+	int err;
+	u8  lseq_mask;
+	int lseq;
+
+	err = asd_seq_start_cseq(asd_ha);
+	if (err) {
+		asd_printk("couldn't start CSEQ for %s\n",
+			   pci_name(asd_ha->pcidev));
+		return err;
+	}
+
+	lseq_mask = asd_ha->hw_prof.enabled_phys;
+	for_each_sequencer(lseq_mask, lseq_mask, lseq) {
+		err = asd_seq_start_lseq(asd_ha, lseq);
+		if (err) {
+			asd_printk("coudln't start LSEQ %d for %s\n", lseq,
+				   pci_name(asd_ha->pcidev));
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * asd_update_port_links -- update port_map_by_links and phy_is_up
+ * @sas_phy: pointer to the phy which has been added to a port
+ *
+ * 1) When a link reset has completed and we got BYTES DMAED with a
+ * valid frame we call this function for that phy, to indicate that
+ * the phy is up, i.e. we update the phy_is_up in DDB 0.  The
+ * sequencer checks phy_is_up when pending SCBs are to be sent, and
+ * when an open address frame has been received.
+ *
+ * 2) When we know of ports, we call this function to update the map
+ * of phys participaing in that port, i.e. we update the
+ * port_map_by_links in DDB 0.  When a HARD_RESET primitive has been
+ * received, the sequencer disables all phys in that port.
+ * port_map_by_links is also used as the conn_mask byte in the
+ * initiator/target port DDB.
+ */
+void asd_update_port_links(struct asd_sas_phy *sas_phy)
+{
+	struct asd_ha_struct *asd_ha = sas_phy->ha->lldd_ha;
+	const u8 phy_mask = (u8) sas_phy->port->phy_mask;
+	u8  phy_is_up;
+	u8  mask;
+	int i, err;
+
+	for_each_phy(phy_mask, mask, i)
+		asd_ddbsite_write_byte(asd_ha, 0,
+				       offsetof(struct asd_ddb_seq_shared,
+						port_map_by_links)+i,phy_mask);
+
+	for (i = 0; i < 12; i++) {
+		phy_is_up = asd_ddbsite_read_byte(asd_ha, 0,
+			  offsetof(struct asd_ddb_seq_shared, phy_is_up));
+		err = asd_ddbsite_update_byte(asd_ha, 0,
+				offsetof(struct asd_ddb_seq_shared, phy_is_up),
+				phy_is_up,
+				phy_is_up | phy_mask);
+		if (!err)
+			break;
+		else if (err == -EFAULT) {
+			asd_printk("phy_is_up: parity error in DDB 0\n");
+			break;
+		}
+	}
+
+	if (err)
+		asd_printk("couldn't update DDB 0:error:%d\n", err);
+}
+
+MODULE_FIRMWARE(SAS_RAZOR_SEQUENCER_FW_FILE);
diff --git a/drivers/scsi/aic94xx/aic94xx_seq.h b/drivers/scsi/aic94xx/aic94xx_seq.h
new file mode 100644
index 0000000..42281c3
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_seq.h
@@ -0,0 +1,70 @@
+/*
+ * Aic94xx SAS/SATA driver sequencer interface header file.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef _AIC94XX_SEQ_H_
+#define _AIC94XX_SEQ_H_
+
+#define CSEQ_NUM_VECS	3
+#define LSEQ_NUM_VECS	11
+
+#define SAS_RAZOR_SEQUENCER_FW_FILE "aic94xx-seq.fw"
+
+/* Note:  All quantites in the sequencer file are little endian */
+struct sequencer_file_header {
+	/* Checksum of the entire contents of the sequencer excluding
+	 * these four bytes */
+	u32	csum;
+	/* numeric major version */
+	u32	major;
+	/* numeric minor version */
+	u32	minor;
+	/* version string printed by driver */
+	char	version[16];
+	u32	cseq_table_offset;
+	u32	cseq_table_size;
+	u32	lseq_table_offset;
+	u32	lseq_table_size;
+	u32	cseq_code_offset;
+	u32	cseq_code_size;
+	u32	lseq_code_offset;
+	u32	lseq_code_size;
+	u16	mode2_task;
+	u16	cseq_idle_loop;
+	u16	lseq_idle_loop;
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+int asd_pause_cseq(struct asd_ha_struct *asd_ha);
+int asd_unpause_cseq(struct asd_ha_struct *asd_ha);
+int asd_pause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask);
+int asd_unpause_lseq(struct asd_ha_struct *asd_ha, u8 lseq_mask);
+int asd_init_seqs(struct asd_ha_struct *asd_ha);
+int asd_start_seqs(struct asd_ha_struct *asd_ha);
+
+void asd_update_port_links(struct asd_sas_phy *phy);
+#endif
+
+#endif
diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
new file mode 100644
index 0000000..285e70d
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_task.c
@@ -0,0 +1,642 @@
+/*
+ * Aic94xx SAS/SATA Tasks
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/spinlock.h>
+#include "aic94xx.h"
+#include "aic94xx_sas.h"
+#include "aic94xx_hwi.h"
+
+static void asd_unbuild_ata_ascb(struct asd_ascb *a);
+static void asd_unbuild_smp_ascb(struct asd_ascb *a);
+static void asd_unbuild_ssp_ascb(struct asd_ascb *a);
+
+static inline void asd_can_dequeue(struct asd_ha_struct *asd_ha, int num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	asd_ha->seq.can_queue += num;
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+}
+
+/* PCI_DMA_... to our direction translation.
+ */
+static const u8 data_dir_flags[] = {
+	[PCI_DMA_BIDIRECTIONAL] = DATA_DIR_BYRECIPIENT,	/* UNSPECIFIED */
+	[PCI_DMA_TODEVICE]      = DATA_DIR_OUT, /* OUTBOUND */
+	[PCI_DMA_FROMDEVICE]    = DATA_DIR_IN, /* INBOUND */
+	[PCI_DMA_NONE]          = DATA_DIR_NONE, /* NO TRANSFER */
+};
+
+static inline int asd_map_scatterlist(struct sas_task *task,
+				      struct sg_el *sg_arr,
+				      unsigned long gfp_flags)
+{
+	struct asd_ascb *ascb = task->lldd_task;
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct scatterlist *sc;
+	int num_sg, res;
+
+	if (task->data_dir == PCI_DMA_NONE)
+		return 0;
+
+	if (task->num_scatter == 0) {
+		void *p = task->scatter;
+		dma_addr_t dma = pci_map_single(asd_ha->pcidev, p,
+						task->total_xfer_len,
+						task->data_dir);
+		sg_arr[0].bus_addr = cpu_to_le64((u64)dma);
+		sg_arr[0].size = cpu_to_le32(task->total_xfer_len);
+		sg_arr[0].flags |= ASD_SG_EL_LIST_EOL;
+		return 0;
+	}
+
+	num_sg = pci_map_sg(asd_ha->pcidev, task->scatter, task->num_scatter,
+			    task->data_dir);
+	if (num_sg == 0)
+		return -ENOMEM;
+
+	if (num_sg > 3) {
+		int i;
+
+		ascb->sg_arr = asd_alloc_coherent(asd_ha,
+						  num_sg*sizeof(struct sg_el),
+						  gfp_flags);
+		if (!ascb->sg_arr) {
+			res = -ENOMEM;
+			goto err_unmap;
+		}
+		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+			struct sg_el *sg =
+				&((struct sg_el *)ascb->sg_arr->vaddr)[i];
+			sg->bus_addr = cpu_to_le64((u64)sg_dma_address(sc));
+			sg->size = cpu_to_le32((u32)sg_dma_len(sc));
+			if (i == num_sg-1)
+				sg->flags |= ASD_SG_EL_LIST_EOL;
+		}
+
+		for (sc = task->scatter, i = 0; i < 2; i++, sc++) {
+			sg_arr[i].bus_addr =
+				cpu_to_le64((u64)sg_dma_address(sc));
+			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
+		}
+		sg_arr[1].next_sg_offs = 2 * sizeof(*sg_arr);
+		sg_arr[1].flags |= ASD_SG_EL_LIST_EOS;
+
+		memset(&sg_arr[2], 0, sizeof(*sg_arr));
+		sg_arr[2].bus_addr=cpu_to_le64((u64)ascb->sg_arr->dma_handle);
+	} else {
+		int i;
+		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+			sg_arr[i].bus_addr =
+				cpu_to_le64((u64)sg_dma_address(sc));
+			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
+		}
+		sg_arr[i-1].flags |= ASD_SG_EL_LIST_EOL;
+	}
+
+	return 0;
+err_unmap:
+	pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter,
+		     task->data_dir);
+	return res;
+}
+
+static inline void asd_unmap_scatterlist(struct asd_ascb *ascb)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_task *task = ascb->uldd_task;
+
+	if (task->data_dir == PCI_DMA_NONE)
+		return;
+
+	if (task->num_scatter == 0) {
+		dma_addr_t dma = (dma_addr_t)
+		       le64_to_cpu(ascb->scb->ssp_task.sg_element[0].bus_addr);
+		pci_unmap_single(ascb->ha->pcidev, dma, task->total_xfer_len,
+				 task->data_dir);
+		return;
+	}
+
+	asd_free_coherent(asd_ha, ascb->sg_arr);
+	pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter,
+		     task->data_dir);
+}
+
+/* ---------- Task complete tasklet ---------- */
+
+static void asd_get_response_tasklet(struct asd_ascb *ascb,
+				     struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct sas_task *task = ascb->uldd_task;
+	struct task_status_struct *ts = &task->task_status;
+	unsigned long flags;
+	struct tc_resp_sb_struct {
+		__le16 index_escb;
+		u8     len_lsb;
+		u8     flags;
+	} __attribute__ ((packed)) *resp_sb = (void *) dl->status_block;
+
+/* 	int  size   = ((resp_sb->flags & 7) << 8) | resp_sb->len_lsb; */
+	int  edb_id = ((resp_sb->flags & 0x70) >> 4)-1;
+	struct asd_ascb *escb;
+	struct asd_dma_tok *edb;
+	void *r;
+
+	spin_lock_irqsave(&asd_ha->seq.tc_index_lock, flags);
+	escb = asd_tc_index_find(&asd_ha->seq,
+				 (int)le16_to_cpu(resp_sb->index_escb));
+	spin_unlock_irqrestore(&asd_ha->seq.tc_index_lock, flags);
+
+	if (!escb) {
+		ASD_DPRINTK("Uh-oh! No escb for this dl?!\n");
+		return;
+	}
+
+	ts->buf_valid_size = 0;
+	edb = asd_ha->seq.edb_arr[edb_id + escb->edb_index];
+	r = edb->vaddr;
+	if (task->task_proto == SAS_PROTO_SSP) {
+		struct ssp_response_iu *iu =
+			r + 16 + sizeof(struct ssp_frame_hdr);
+
+		ts->residual = le32_to_cpu(*(__le32 *)r);
+		ts->resp = SAS_TASK_COMPLETE;
+		if (iu->datapres == 0)
+			ts->stat = iu->status;
+		else if (iu->datapres == 1)
+			ts->stat = iu->resp_data[3];
+		else if (iu->datapres == 2) {
+			ts->stat = SAM_CHECK_COND;
+			ts->buf_valid_size = min((u32) SAS_STATUS_BUF_SIZE,
+					 be32_to_cpu(iu->sense_data_len));
+			memcpy(ts->buf, iu->sense_data, ts->buf_valid_size);
+			if (iu->status != SAM_CHECK_COND) {
+				ASD_DPRINTK("device %llx sent sense data, but "
+					    "stat(0x%x) is not CHECK_CONDITION"
+					    "\n",
+					    SAS_ADDR(task->dev->sas_addr),
+					    ts->stat);
+			}
+		}
+	}  else {
+		struct ata_task_resp *resp = (void *) &ts->buf[0];
+
+		ts->residual = le32_to_cpu(*(__le32 *)r);
+
+		if (SAS_STATUS_BUF_SIZE >= sizeof(*resp)) {
+			resp->frame_len = le16_to_cpu(*(__le16 *)(r+6));
+			memcpy(&resp->ending_fis[0], r+16, 24);
+			ts->buf_valid_size = sizeof(*resp);
+		}
+	}
+
+	asd_invalidate_edb(escb, edb_id);
+}
+
+static void asd_task_tasklet_complete(struct asd_ascb *ascb,
+				      struct done_list_struct *dl)
+{
+	struct sas_task *task = ascb->uldd_task;
+	struct task_status_struct *ts = &task->task_status;
+	unsigned long flags;
+	u8 opcode = dl->opcode;
+
+	asd_can_dequeue(ascb->ha, 1);
+
+Again:
+	switch (opcode) {
+	case TC_NO_ERROR:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAM_GOOD;
+		break;
+	case TC_UNDERRUN:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_UNDERRUN;
+		ts->residual = le32_to_cpu(*(__le32 *)dl->status_block);
+		break;
+	case TC_OVERRUN:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_DATA_OVERRUN;
+		ts->residual = 0;
+		break;
+	case TC_SSP_RESP:
+	case TC_ATA_RESP:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_PROTO_RESPONSE;
+		asd_get_response_tasklet(ascb, dl);
+		break;
+	case TF_OPEN_REJECT:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_REJECT;
+		if (dl->status_block[1] & 2)
+			ts->open_rej_reason = 1 + dl->status_block[2];
+		else if (dl->status_block[1] & 1)
+			ts->open_rej_reason = (dl->status_block[2] >> 4)+10;
+		else
+			ts->open_rej_reason = SAS_OREJ_UNKNOWN;
+		break;
+	case TF_OPEN_TO:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_OPEN_TO;
+		break;
+	case TF_PHY_DOWN:
+	case TU_PHY_DOWN:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case TI_PHY_DOWN:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_PHY_DOWN;
+		break;
+	case TI_BREAK:
+	case TI_PROTO_ERR:
+	case TI_NAK:
+	case TI_ACK_NAK_TO:
+	case TF_SMP_XMIT_RCV_ERR:
+	case TC_ATA_R_ERR_RECV:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_INTERRUPTED;
+		break;
+	case TF_BREAK:
+	case TU_BREAK:
+	case TU_ACK_NAK_TO:
+	case TF_SMPRSP_TO:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+	case TF_NAK_RECV:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_NAK_R_ERR;
+		break;
+	case TA_I_T_NEXUS_LOSS:
+		opcode = dl->status_block[0];
+		goto Again;
+		break;
+	case TF_INV_CONN_HANDLE:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEVICE_UNKNOWN;
+		break;
+	case TF_REQUESTED_N_PENDING:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_PENDING;
+		break;
+	case TC_TASK_CLEARED:
+	case TA_ON_REQ:
+		ts->resp = SAS_TASK_COMPLETE;
+		ts->stat = SAS_ABORTED_TASK;
+		break;
+
+	case TF_NO_SMP_CONN:
+	case TF_TMF_NO_CTX:
+	case TF_TMF_NO_TAG:
+	case TF_TMF_TAG_FREE:
+	case TF_TMF_TASK_DONE:
+	case TF_TMF_NO_CONN_HANDLE:
+	case TF_IRTT_TO:
+	case TF_IU_SHORT:
+	case TF_DATA_OFFS_ERR:
+		ts->resp = SAS_TASK_UNDELIVERED;
+		ts->stat = SAS_DEV_NO_RESPONSE;
+		break;
+
+	case TC_LINK_ADM_RESP:
+	case TC_CONTROL_PHY:
+	case TC_RESUME:
+	case TC_PARTIAL_SG_LIST:
+	default:
+		ASD_DPRINTK("%s: dl opcode: 0x%x?\n", __FUNCTION__, opcode);
+		break;
+	}
+
+	switch (task->task_proto) {
+	case SATA_PROTO:
+	case SAS_PROTO_STP:
+		asd_unbuild_ata_ascb(ascb);
+		break;
+	case SAS_PROTO_SMP:
+		asd_unbuild_smp_ascb(ascb);
+		break;
+	case SAS_PROTO_SSP:
+		asd_unbuild_ssp_ascb(ascb);
+	default:
+		break;
+	}
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	task->task_state_flags &= ~SAS_TASK_STATE_PENDING;
+	task->task_state_flags |= SAS_TASK_STATE_DONE;
+	if (unlikely((task->task_state_flags & SAS_TASK_STATE_ABORTED))) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		ASD_DPRINTK("task 0x%p done with opcode 0x%x resp 0x%x "
+			    "stat 0x%x but aborted by upper layer!\n",
+			    task, opcode, ts->resp, ts->stat);
+		complete(&ascb->completion);
+	} else {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		task->lldd_task = NULL;
+		asd_ascb_free(ascb);
+		mb();
+		task->task_done(task);
+	}
+}
+
+/* ---------- ATA ---------- */
+
+static int asd_build_ata_ascb(struct asd_ascb *ascb, struct sas_task *task,
+			      unsigned long gfp_flags)
+{
+	struct domain_device *dev = task->dev;
+	struct scb *scb;
+	u8     flags;
+	int    res = 0;
+
+	scb = ascb->scb;
+
+	if (unlikely(task->ata_task.device_control_reg_update))
+		scb->header.opcode = CONTROL_ATA_DEV;
+	else if (dev->sata_dev.command_set == ATA_COMMAND_SET)
+		scb->header.opcode = INITIATE_ATA_TASK;
+	else
+		scb->header.opcode = INITIATE_ATAPI_TASK;
+
+	scb->ata_task.proto_conn_rate = (1 << 5); /* STP */
+	if (dev->port->oob_mode == SAS_OOB_MODE)
+		scb->ata_task.proto_conn_rate |= dev->linkrate;
+
+	scb->ata_task.total_xfer_len = cpu_to_le32(task->total_xfer_len);
+	scb->ata_task.fis = task->ata_task.fis;
+	scb->ata_task.fis.fis_type = 0x27;
+	if (likely(!task->ata_task.device_control_reg_update))
+		scb->ata_task.fis.flags |= 0x80; /* C=1: update ATA cmd reg */
+	scb->ata_task.fis.flags &= 0xF0; /* PM_PORT field shall be 0 */
+	if (dev->sata_dev.command_set == ATAPI_COMMAND_SET)
+		memcpy(scb->ata_task.atapi_packet, task->ata_task.atapi_packet,
+		       16);
+	scb->ata_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->ata_task.conn_handle = cpu_to_le16(
+		(u16)(unsigned long)dev->lldd_dev);
+
+	if (likely(!task->ata_task.device_control_reg_update)) {
+		flags = 0;
+		if (task->ata_task.dma_xfer)
+			flags |= DATA_XFER_MODE_DMA;
+		if (task->ata_task.use_ncq &&
+		    dev->sata_dev.command_set != ATAPI_COMMAND_SET)
+			flags |= ATA_Q_TYPE_NCQ;
+		flags |= data_dir_flags[task->data_dir];
+		scb->ata_task.ata_flags = flags;
+
+		scb->ata_task.retry_count = task->ata_task.retry_count;
+
+		flags = 0;
+		if (task->ata_task.set_affil_pol)
+			flags |= SET_AFFIL_POLICY;
+		if (task->ata_task.stp_affil_pol)
+			flags |= STP_AFFIL_POLICY;
+		scb->ata_task.flags = flags;
+	}
+	ascb->tasklet_complete = asd_task_tasklet_complete;
+
+	if (likely(!task->ata_task.device_control_reg_update))
+		res = asd_map_scatterlist(task, scb->ata_task.sg_element,
+					  gfp_flags);
+
+	return res;
+}
+
+static void asd_unbuild_ata_ascb(struct asd_ascb *a)
+{
+	asd_unmap_scatterlist(a);
+}
+
+/* ---------- SMP ---------- */
+
+static int asd_build_smp_ascb(struct asd_ascb *ascb, struct sas_task *task,
+			      unsigned long gfp_flags)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	struct domain_device *dev = task->dev;
+	struct scb *scb;
+
+	pci_map_sg(asd_ha->pcidev, &task->smp_task.smp_req, 1,
+		   PCI_DMA_FROMDEVICE);
+	pci_map_sg(asd_ha->pcidev, &task->smp_task.smp_resp, 1,
+		   PCI_DMA_FROMDEVICE);
+
+	scb = ascb->scb;
+
+	scb->header.opcode = INITIATE_SMP_TASK;
+
+	scb->smp_task.proto_conn_rate = dev->linkrate;
+
+	scb->smp_task.smp_req.bus_addr =
+		cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req));
+	scb->smp_task.smp_req.size =
+		cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_req)-4);
+
+	scb->smp_task.smp_resp.bus_addr =
+		cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_resp));
+	scb->smp_task.smp_resp.size =
+		cpu_to_le32((u32)sg_dma_len(&task->smp_task.smp_resp)-4);
+
+	scb->smp_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->smp_task.conn_handle = cpu_to_le16((u16)
+						(unsigned long)dev->lldd_dev);
+
+	ascb->tasklet_complete = asd_task_tasklet_complete;
+
+	return 0;
+}
+
+static void asd_unbuild_smp_ascb(struct asd_ascb *a)
+{
+	struct sas_task *task = a->uldd_task;
+
+	BUG_ON(!task);
+	pci_unmap_sg(a->ha->pcidev, &task->smp_task.smp_req, 1,
+		     PCI_DMA_FROMDEVICE);
+	pci_unmap_sg(a->ha->pcidev, &task->smp_task.smp_resp, 1,
+		     PCI_DMA_FROMDEVICE);
+}
+
+/* ---------- SSP ---------- */
+
+static int asd_build_ssp_ascb(struct asd_ascb *ascb, struct sas_task *task,
+			      unsigned long gfp_flags)
+{
+	struct domain_device *dev = task->dev;
+	struct scb *scb;
+	int    res = 0;
+
+	scb = ascb->scb;
+
+	scb->header.opcode = INITIATE_SSP_TASK;
+
+	scb->ssp_task.proto_conn_rate  = (1 << 4); /* SSP */
+	scb->ssp_task.proto_conn_rate |= dev->linkrate;
+	scb->ssp_task.total_xfer_len = cpu_to_le32(task->total_xfer_len);
+	scb->ssp_task.ssp_frame.frame_type = SSP_DATA;
+	memcpy(scb->ssp_task.ssp_frame.hashed_dest_addr, dev->hashed_sas_addr,
+	       HASHED_SAS_ADDR_SIZE);
+	memcpy(scb->ssp_task.ssp_frame.hashed_src_addr,
+	       dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	scb->ssp_task.ssp_frame.tptt = cpu_to_be16(0xFFFF);
+
+	memcpy(scb->ssp_task.ssp_cmd.lun, task->ssp_task.LUN, 8);
+	if (task->ssp_task.enable_first_burst)
+		scb->ssp_task.ssp_cmd.efb_prio_attr |= EFB_MASK;
+	scb->ssp_task.ssp_cmd.efb_prio_attr |= (task->ssp_task.task_prio << 3);
+	scb->ssp_task.ssp_cmd.efb_prio_attr |= (task->ssp_task.task_attr & 7);
+	memcpy(scb->ssp_task.ssp_cmd.cdb, task->ssp_task.cdb, 16);
+
+	scb->ssp_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->ssp_task.conn_handle = cpu_to_le16(
+		(u16)(unsigned long)dev->lldd_dev);
+	scb->ssp_task.data_dir = data_dir_flags[task->data_dir];
+	scb->ssp_task.retry_count = scb->ssp_task.retry_count;
+
+	ascb->tasklet_complete = asd_task_tasklet_complete;
+
+	res = asd_map_scatterlist(task, scb->ssp_task.sg_element, gfp_flags);
+
+	return res;
+}
+
+static void asd_unbuild_ssp_ascb(struct asd_ascb *a)
+{
+	asd_unmap_scatterlist(a);
+}
+
+/* ---------- Execute Task ---------- */
+
+static inline int asd_can_queue(struct asd_ha_struct *asd_ha, int num)
+{
+	int res = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&asd_ha->seq.pend_q_lock, flags);
+	if ((asd_ha->seq.can_queue - num) < 0)
+		res = -SAS_QUEUE_FULL;
+	else
+		asd_ha->seq.can_queue -= num;
+	spin_unlock_irqrestore(&asd_ha->seq.pend_q_lock, flags);
+
+	return res;
+}
+
+int asd_execute_task(struct sas_task *task, const int num,
+		     unsigned long gfp_flags)
+{
+	int res = 0;
+	LIST_HEAD(alist);
+	struct sas_task *t = task;
+	struct asd_ascb *ascb = NULL, *a;
+	struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha;
+
+	res = asd_can_queue(asd_ha, num);
+	if (res)
+		return res;
+
+	res = num;
+	ascb = asd_ascb_alloc_list(asd_ha, &res, gfp_flags);
+	if (res) {
+		res = -ENOMEM;
+		goto out_err;
+	}
+
+	__list_add(&alist, ascb->list.prev, &ascb->list);
+	list_for_each_entry(a, &alist, list) {
+		a->uldd_task = t;
+		t->lldd_task = a;
+		t = list_entry(t->list.next, struct sas_task, list);
+	}
+	list_for_each_entry(a, &alist, list) {
+		t = a->uldd_task;
+		a->uldd_timer = 1;
+		if (t->task_proto & SAS_PROTO_STP)
+			t->task_proto = SAS_PROTO_STP;
+		switch (t->task_proto) {
+		case SATA_PROTO:
+		case SAS_PROTO_STP:
+			res = asd_build_ata_ascb(a, t, gfp_flags);
+			break;
+		case SAS_PROTO_SMP:
+			res = asd_build_smp_ascb(a, t, gfp_flags);
+			break;
+		case SAS_PROTO_SSP:
+			res = asd_build_ssp_ascb(a, t, gfp_flags);
+			break;
+		default:
+			asd_printk("unknown sas_task proto: 0x%x\n",
+				   t->task_proto);
+			res = -ENOMEM;
+			break;
+		}
+		if (res)
+			goto out_err_unmap;
+	}
+	list_del_init(&alist);
+
+	res = asd_post_ascb_list(asd_ha, ascb, num);
+	if (unlikely(res)) {
+		a = NULL;
+		__list_add(&alist, ascb->list.prev, &ascb->list);
+		goto out_err_unmap;
+	}
+
+	return 0;
+out_err_unmap:
+	{
+		struct asd_ascb *b = a;
+		list_for_each_entry(a, &alist, list) {
+			if (a == b)
+				break;
+			t = a->uldd_task;
+			switch (t->task_proto) {
+			case SATA_PROTO:
+			case SAS_PROTO_STP:
+				asd_unbuild_ata_ascb(a);
+				break;
+			case SAS_PROTO_SMP:
+				asd_unbuild_smp_ascb(a);
+				break;
+			case SAS_PROTO_SSP:
+				asd_unbuild_ssp_ascb(a);
+			default:
+				break;
+			}
+			t->lldd_task = NULL;
+		}
+	}
+	list_del_init(&alist);
+out_err:
+	if (ascb)
+		asd_ascb_free_list(ascb);
+	asd_can_dequeue(asd_ha, num);
+	return res;
+}
diff --git a/drivers/scsi/aic94xx/aic94xx_tmf.c b/drivers/scsi/aic94xx/aic94xx_tmf.c
new file mode 100644
index 0000000..6123438
--- /dev/null
+++ b/drivers/scsi/aic94xx/aic94xx_tmf.c
@@ -0,0 +1,636 @@
+/*
+ * Aic94xx Task Management Functions
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file is part of the aic94xx driver.
+ *
+ * The aic94xx driver is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * The aic94xx driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the aic94xx driver; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/spinlock.h>
+#include "aic94xx.h"
+#include "aic94xx_sas.h"
+#include "aic94xx_hwi.h"
+
+/* ---------- Internal enqueue ---------- */
+
+static int asd_enqueue_internal(struct asd_ascb *ascb,
+		void (*tasklet_complete)(struct asd_ascb *,
+					 struct done_list_struct *),
+				void (*timed_out)(unsigned long))
+{
+	int res;
+
+	ascb->tasklet_complete = tasklet_complete;
+	ascb->uldd_timer = 1;
+
+	ascb->timer.data = (unsigned long) ascb;
+	ascb->timer.function = timed_out;
+	ascb->timer.expires = jiffies + AIC94XX_SCB_TIMEOUT;
+
+	add_timer(&ascb->timer);
+
+	res = asd_post_ascb_list(ascb->ha, ascb, 1);
+	if (unlikely(res))
+		del_timer(&ascb->timer);
+	return res;
+}
+
+static inline void asd_timedout_common(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+	struct asd_seq_data *seq = &ascb->ha->seq;
+        unsigned long flags;
+
+	spin_lock_irqsave(&seq->pend_q_lock, flags);
+        seq->pending--;
+        list_del_init(&ascb->list);
+        spin_unlock_irqrestore(&seq->pend_q_lock, flags);
+}
+
+/* ---------- CLEAR NEXUS ---------- */
+
+static void asd_clear_nexus_tasklet_complete(struct asd_ascb *ascb,
+					     struct done_list_struct *dl)
+{
+	ASD_DPRINTK("%s: here\n", __FUNCTION__);
+	if (!del_timer(&ascb->timer)) {
+		ASD_DPRINTK("%s: couldn't delete timer\n", __FUNCTION__);
+		return;
+	}
+	ASD_DPRINTK("%s: opcode: 0x%x\n", __FUNCTION__, dl->opcode);
+	ascb->uldd_task = (void *) (unsigned long) dl->opcode;
+	complete(&ascb->completion);
+}
+
+static void asd_clear_nexus_timedout(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+
+	ASD_DPRINTK("%s: here\n", __FUNCTION__);
+	asd_timedout_common(data);
+	ascb->uldd_task = (void *) TMF_RESP_FUNC_FAILED;
+	complete(&ascb->completion);
+}
+
+#define CLEAR_NEXUS_PRE         \
+	ASD_DPRINTK("%s: PRE\n", __FUNCTION__); \
+        res = 1;                \
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL); \
+	if (!ascb)              \
+		return -ENOMEM; \
+                                \
+	scb = ascb->scb;        \
+	scb->header.opcode = CLEAR_NEXUS
+
+#define CLEAR_NEXUS_POST        \
+	ASD_DPRINTK("%s: POST\n", __FUNCTION__); \
+	res = asd_enqueue_internal(ascb, asd_clear_nexus_tasklet_complete, \
+				   asd_clear_nexus_timedout);              \
+	if (res)                \
+		goto out_err;   \
+	ASD_DPRINTK("%s: clear nexus posted, waiting...\n", __FUNCTION__); \
+	wait_for_completion(&ascb->completion); \
+	res = (int) (unsigned long) ascb->uldd_task; \
+	if (res == TC_NO_ERROR) \
+		res = TMF_RESP_FUNC_COMPLETE;   \
+out_err:                        \
+	asd_ascb_free(ascb);    \
+	return res
+
+int asd_clear_nexus_ha(struct sas_ha_struct *sas_ha)
+{
+	struct asd_ha_struct *asd_ha = sas_ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_ADAPTER;
+	CLEAR_NEXUS_POST;
+}
+
+int asd_clear_nexus_port(struct asd_sas_port *port)
+{
+	struct asd_ha_struct *asd_ha = port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_PORT;
+	scb->clear_nexus.conn_mask = port->phy_mask;
+	CLEAR_NEXUS_POST;
+}
+
+#if 0
+static int asd_clear_nexus_I_T(struct domain_device *dev)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_I_T;
+	scb->clear_nexus.flags = SEND_Q | EXEC_Q | NOTINQ;
+	if (dev->tproto)
+		scb->clear_nexus.flags |= SUSPEND_TX;
+	scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+						   dev->lldd_dev);
+	CLEAR_NEXUS_POST;
+}
+#endif
+
+static int asd_clear_nexus_I_T_L(struct domain_device *dev, u8 *lun)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_I_T_L;
+	scb->clear_nexus.flags = SEND_Q | EXEC_Q | NOTINQ;
+	if (dev->tproto)
+		scb->clear_nexus.flags |= SUSPEND_TX;
+	memcpy(scb->clear_nexus.ssp_task.lun, lun, 8);
+	scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+						   dev->lldd_dev);
+	CLEAR_NEXUS_POST;
+}
+
+static int asd_clear_nexus_tag(struct sas_task *task)
+{
+	struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha;
+	struct asd_ascb *tascb = task->lldd_task;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_TAG;
+	memcpy(scb->clear_nexus.ssp_task.lun, task->ssp_task.LUN, 8);
+	scb->clear_nexus.ssp_task.tag = tascb->tag;
+	if (task->dev->tproto)
+		scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+							  task->dev->lldd_dev);
+	CLEAR_NEXUS_POST;
+}
+
+static int asd_clear_nexus_index(struct sas_task *task)
+{
+	struct asd_ha_struct *asd_ha = task->dev->port->ha->lldd_ha;
+	struct asd_ascb *tascb = task->lldd_task;
+	struct asd_ascb *ascb;
+	struct scb *scb;
+	int res;
+
+	CLEAR_NEXUS_PRE;
+	scb->clear_nexus.nexus = NEXUS_TRANS_CX;
+	if (task->dev->tproto)
+		scb->clear_nexus.conn_handle = cpu_to_le16((u16)(unsigned long)
+							  task->dev->lldd_dev);
+	scb->clear_nexus.index = cpu_to_le16(tascb->tc_index);
+	CLEAR_NEXUS_POST;
+}
+
+/* ---------- TMFs ---------- */
+
+static void asd_tmf_timedout(unsigned long data)
+{
+	struct asd_ascb *ascb = (void *) data;
+
+	ASD_DPRINTK("tmf timed out\n");
+	asd_timedout_common(data);
+	ascb->uldd_task = (void *) TMF_RESP_FUNC_FAILED;
+	complete(&ascb->completion);
+}
+
+static int asd_get_tmf_resp_tasklet(struct asd_ascb *ascb,
+				    struct done_list_struct *dl)
+{
+	struct asd_ha_struct *asd_ha = ascb->ha;
+	unsigned long flags;
+	struct tc_resp_sb_struct {
+		__le16 index_escb;
+		u8     len_lsb;
+		u8     flags;
+	} __attribute__ ((packed)) *resp_sb = (void *) dl->status_block;
+
+	int  edb_id = ((resp_sb->flags & 0x70) >> 4)-1;
+	struct asd_ascb *escb;
+	struct asd_dma_tok *edb;
+	struct ssp_frame_hdr *fh;
+	struct ssp_response_iu   *ru;
+	int res = TMF_RESP_FUNC_FAILED;
+
+	ASD_DPRINTK("tmf resp tasklet\n");
+
+	spin_lock_irqsave(&asd_ha->seq.tc_index_lock, flags);
+	escb = asd_tc_index_find(&asd_ha->seq,
+				 (int)le16_to_cpu(resp_sb->index_escb));
+	spin_unlock_irqrestore(&asd_ha->seq.tc_index_lock, flags);
+
+	if (!escb) {
+		ASD_DPRINTK("Uh-oh! No escb for this dl?!\n");
+		return res;
+	}
+
+	edb = asd_ha->seq.edb_arr[edb_id + escb->edb_index];
+	ascb->tag = *(__be16 *)(edb->vaddr+4);
+	fh = edb->vaddr + 16;
+	ru = edb->vaddr + 16 + sizeof(*fh);
+	res = ru->status;
+	if (ru->datapres == 1)	  /* Response data present */
+		res = ru->resp_data[3];
+#if 0
+	ascb->tag = fh->tag;
+#endif
+	ascb->tag_valid = 1;
+
+	asd_invalidate_edb(escb, edb_id);
+	return res;
+}
+
+static void asd_tmf_tasklet_complete(struct asd_ascb *ascb,
+				     struct done_list_struct *dl)
+{
+	if (!del_timer(&ascb->timer))
+		return;
+
+	ASD_DPRINTK("tmf tasklet complete\n");
+
+	if (dl->opcode == TC_SSP_RESP)
+		ascb->uldd_task = (void *) (unsigned long)
+			asd_get_tmf_resp_tasklet(ascb, dl);
+	else
+		ascb->uldd_task = (void *) 0xFF00 + (unsigned long) dl->opcode;
+
+	complete(&ascb->completion);
+}
+
+static inline int asd_clear_nexus(struct sas_task *task)
+{
+	int res = TMF_RESP_FUNC_FAILED;
+	struct asd_ascb *tascb = task->lldd_task;
+	unsigned long flags;
+
+	ASD_DPRINTK("task not done, clearing nexus\n");
+	if (tascb->tag_valid)
+		res = asd_clear_nexus_tag(task);
+	else
+		res = asd_clear_nexus_index(task);
+	wait_for_completion_timeout(&tascb->completion,
+				    AIC94XX_SCB_TIMEOUT);
+	ASD_DPRINTK("came back from clear nexus\n");
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE)
+		res = TMF_RESP_FUNC_COMPLETE;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	return res;
+}
+
+/**
+ * asd_abort_task -- ABORT TASK TMF
+ * @task: the task to be aborted
+ *
+ * Before calling ABORT TASK the task state flags should be ORed with
+ * SAS_TASK_STATE_ABORTED (unless SAS_TASK_STATE_DONE is set) under
+ * the task_state_lock IRQ spinlock, then ABORT TASK *must* be called.
+ *
+ * Implements the ABORT TASK TMF, I_T_L_Q nexus.
+ * Returns: SAS TMF responses (see sas_task.h),
+ *          -ENOMEM,
+ *          -SAS_QUEUE_FULL.
+ *
+ * When ABORT TASK returns, the caller of ABORT TASK checks first the
+ * task->task_state_flags, and then the return value of ABORT TASK.
+ *
+ * If the task has task state bit SAS_TASK_STATE_DONE set, then the
+ * task was completed successfully prior to it being aborted.  The
+ * caller of ABORT TASK has responsibility to call task->task_done()
+ * xor free the task, depending on their framework.  The return code
+ * is TMF_RESP_FUNC_FAILED in this case.
+ *
+ * Else the SAS_TASK_STATE_DONE bit is not set,
+ * 	If the return code is TMF_RESP_FUNC_COMPLETE, then
+ * 		the task was aborted successfully.  The caller of
+ * 		ABORT TASK has responsibility to call task->task_done()
+ *              to finish the task, xor free the task depending on their
+ *		framework.
+ *	else
+ * 		the ABORT TASK returned some kind of error. The task
+ *              was _not_ cancelled.  Nothing can be assumed.
+ *		The caller of ABORT TASK may wish to retry.
+ */
+int asd_abort_task(struct sas_task *task)
+{
+	struct asd_ascb *tascb = task->lldd_task;
+	struct asd_ha_struct *asd_ha = tascb->ha;
+	int res = 1;
+	unsigned long flags;
+	struct asd_ascb *ascb = NULL;
+	struct scb *scb;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		res = TMF_RESP_FUNC_COMPLETE;
+		ASD_DPRINTK("%s: task 0x%p done\n", __FUNCTION__, task);
+		goto out_done;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
+	if (!ascb)
+		return -ENOMEM;
+	scb = ascb->scb;
+
+	scb->header.opcode = ABORT_TASK;
+
+	switch (task->task_proto) {
+	case SATA_PROTO:
+	case SAS_PROTO_STP:
+		scb->abort_task.proto_conn_rate = (1 << 5); /* STP */
+		break;
+	case SAS_PROTO_SSP:
+		scb->abort_task.proto_conn_rate  = (1 << 4); /* SSP */
+		scb->abort_task.proto_conn_rate |= task->dev->linkrate;
+		break;
+	case SAS_PROTO_SMP:
+		break;
+	default:
+		break;
+	}
+
+	if (task->task_proto == SAS_PROTO_SSP) {
+		scb->abort_task.ssp_frame.frame_type = SSP_TASK;
+		memcpy(scb->abort_task.ssp_frame.hashed_dest_addr,
+		       task->dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+		memcpy(scb->abort_task.ssp_frame.hashed_src_addr,
+		       task->dev->port->ha->hashed_sas_addr,
+		       HASHED_SAS_ADDR_SIZE);
+		scb->abort_task.ssp_frame.tptt = cpu_to_be16(0xFFFF);
+
+		memcpy(scb->abort_task.ssp_task.lun, task->ssp_task.LUN, 8);
+		scb->abort_task.ssp_task.tmf = TMF_ABORT_TASK;
+		scb->abort_task.ssp_task.tag = cpu_to_be16(0xFFFF);
+	}
+
+	scb->abort_task.sister_scb = cpu_to_le16(0xFFFF);
+	scb->abort_task.conn_handle = cpu_to_le16(
+		(u16)(unsigned long)task->dev->lldd_dev);
+	scb->abort_task.retry_count = 1;
+	scb->abort_task.index = cpu_to_le16((u16)tascb->tc_index);
+	scb->abort_task.itnl_to = cpu_to_le16(ITNL_TIMEOUT_CONST);
+
+	res = asd_enqueue_internal(ascb, asd_tmf_tasklet_complete,
+				   asd_tmf_timedout);
+	if (res)
+		goto out;
+	wait_for_completion(&ascb->completion);
+	ASD_DPRINTK("tmf came back\n");
+
+	res = (int) (unsigned long) ascb->uldd_task;
+	tascb->tag = ascb->tag;
+	tascb->tag_valid = ascb->tag_valid;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		res = TMF_RESP_FUNC_COMPLETE;
+		ASD_DPRINTK("%s: task 0x%p done\n", __FUNCTION__, task);
+		goto out_done;
+	}
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	switch (res) {
+	/* The task to be aborted has been sent to the device.
+	 * We got a Response IU for the ABORT TASK TMF. */
+	case TC_NO_ERROR + 0xFF00:
+	case TMF_RESP_FUNC_COMPLETE:
+	case TMF_RESP_FUNC_FAILED:
+		res = asd_clear_nexus(task);
+		break;
+	case TMF_RESP_INVALID_FRAME:
+	case TMF_RESP_OVERLAPPED_TAG:
+	case TMF_RESP_FUNC_ESUPP:
+	case TMF_RESP_NO_LUN:
+		goto out_done; break;
+	}
+	/* In the following we assume that the managing layer
+	 * will _never_ make a mistake, when issuing ABORT TASK.
+	 */
+	switch (res) {
+	default:
+		res = asd_clear_nexus(task);
+		/* fallthrough */
+	case TC_NO_ERROR + 0xFF00:
+	case TMF_RESP_FUNC_COMPLETE:
+		break;
+	/* The task hasn't been sent to the device xor we never got
+	 * a (sane) Response IU for the ABORT TASK TMF.
+	 */
+	case TF_NAK_RECV + 0xFF00:
+		res = TMF_RESP_INVALID_FRAME;
+		break;
+	case TF_TMF_TASK_DONE + 0xFF00:	/* done but not reported yet */
+		res = TMF_RESP_FUNC_FAILED;
+		wait_for_completion_timeout(&tascb->completion,
+					    AIC94XX_SCB_TIMEOUT);
+		spin_lock_irqsave(&task->task_state_lock, flags);
+		if (task->task_state_flags & SAS_TASK_STATE_DONE)
+			res = TMF_RESP_FUNC_COMPLETE;
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		goto out_done;
+	case TF_TMF_NO_TAG + 0xFF00:
+	case TF_TMF_TAG_FREE + 0xFF00: /* the tag is in the free list */
+	case TF_TMF_NO_CONN_HANDLE + 0xFF00: /* no such device */
+		res = TMF_RESP_FUNC_COMPLETE;
+		goto out_done;
+	case TF_TMF_NO_CTX + 0xFF00: /* not in seq, or proto != SSP */
+		res = TMF_RESP_FUNC_ESUPP;
+		goto out;
+	}
+out_done:
+	if (res == TMF_RESP_FUNC_COMPLETE) {
+		task->lldd_task = NULL;
+		mb();
+		asd_ascb_free(tascb);
+	}
+out:
+	asd_ascb_free(ascb);
+	ASD_DPRINTK("task 0x%p aborted, res: 0x%x\n", task, res);
+	return res;
+}
+
+/**
+ * asd_initiate_ssp_tmf -- send a TMF to an I_T_L or I_T_L_Q nexus
+ * @dev: pointer to struct domain_device of interest
+ * @lun: pointer to u8[8] which is the LUN
+ * @tmf: the TMF to be performed (see sas_task.h or the SAS spec)
+ * @index: the transaction context of the task to be queried if QT TMF
+ *
+ * This function is used to send ABORT TASK SET, CLEAR ACA,
+ * CLEAR TASK SET, LU RESET and QUERY TASK TMFs.
+ *
+ * No SCBs should be queued to the I_T_L nexus when this SCB is
+ * pending.
+ *
+ * Returns: TMF response code (see sas_task.h or the SAS spec)
+ */
+static int asd_initiate_ssp_tmf(struct domain_device *dev, u8 *lun,
+				int tmf, int index)
+{
+	struct asd_ha_struct *asd_ha = dev->port->ha->lldd_ha;
+	struct asd_ascb *ascb;
+	int res = 1;
+	struct scb *scb;
+
+	if (!(dev->tproto & SAS_PROTO_SSP))
+		return TMF_RESP_FUNC_ESUPP;
+
+	ascb = asd_ascb_alloc_list(asd_ha, &res, GFP_KERNEL);
+	if (!ascb)
+		return -ENOMEM;
+	scb = ascb->scb;
+
+	if (tmf == TMF_QUERY_TASK)
+		scb->header.opcode = QUERY_SSP_TASK;
+	else
+		scb->header.opcode = INITIATE_SSP_TMF;
+
+	scb->ssp_tmf.proto_conn_rate  = (1 << 4); /* SSP */
+	scb->ssp_tmf.proto_conn_rate |= dev->linkrate;
+	/* SSP frame header */
+	scb->ssp_tmf.ssp_frame.frame_type = SSP_TASK;
+	memcpy(scb->ssp_tmf.ssp_frame.hashed_dest_addr,
+	       dev->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	memcpy(scb->ssp_tmf.ssp_frame.hashed_src_addr,
+	       dev->port->ha->hashed_sas_addr, HASHED_SAS_ADDR_SIZE);
+	scb->ssp_tmf.ssp_frame.tptt = cpu_to_be16(0xFFFF);
+	/* SSP Task IU */
+	memcpy(scb->ssp_tmf.ssp_task.lun, lun, 8);
+	scb->ssp_tmf.ssp_task.tmf = tmf;
+
+	scb->ssp_tmf.sister_scb = cpu_to_le16(0xFFFF);
+	scb->ssp_tmf.conn_handle= cpu_to_le16((u16)(unsigned long)
+					      dev->lldd_dev);
+	scb->ssp_tmf.retry_count = 1;
+	scb->ssp_tmf.itnl_to = cpu_to_le16(ITNL_TIMEOUT_CONST);
+	if (tmf == TMF_QUERY_TASK)
+		scb->ssp_tmf.index = cpu_to_le16(index);
+
+	res = asd_enqueue_internal(ascb, asd_tmf_tasklet_complete,
+				   asd_tmf_timedout);
+	if (res)
+		goto out_err;
+	wait_for_completion(&ascb->completion);
+	res = (int) (unsigned long) ascb->uldd_task;
+
+	switch (res) {
+	case TC_NO_ERROR + 0xFF00:
+		res = TMF_RESP_FUNC_COMPLETE;
+		break;
+	case TF_NAK_RECV + 0xFF00:
+		res = TMF_RESP_INVALID_FRAME;
+		break;
+	case TF_TMF_TASK_DONE + 0xFF00:
+		res = TMF_RESP_FUNC_FAILED;
+		break;
+	case TF_TMF_NO_TAG + 0xFF00:
+	case TF_TMF_TAG_FREE + 0xFF00: /* the tag is in the free list */
+	case TF_TMF_NO_CONN_HANDLE + 0xFF00: /* no such device */
+		res = TMF_RESP_FUNC_COMPLETE;
+		break;
+	case TF_TMF_NO_CTX + 0xFF00: /* not in seq, or proto != SSP */
+		res = TMF_RESP_FUNC_ESUPP;
+		break;
+	default:
+		ASD_DPRINTK("%s: converting result 0x%x to TMF_RESP_FUNC_FAILED\n",
+			    __FUNCTION__, res);
+		res = TMF_RESP_FUNC_FAILED;
+		break;
+	}
+out_err:
+	asd_ascb_free(ascb);
+	return res;
+}
+
+int asd_abort_task_set(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_ABORT_TASK_SET, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+int asd_clear_aca(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_CLEAR_ACA, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+int asd_clear_task_set(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_CLEAR_TASK_SET, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+int asd_lu_reset(struct domain_device *dev, u8 *lun)
+{
+	int res = asd_initiate_ssp_tmf(dev, lun, TMF_LU_RESET, 0);
+
+	if (res == TMF_RESP_FUNC_COMPLETE)
+		asd_clear_nexus_I_T_L(dev, lun);
+	return res;
+}
+
+/**
+ * asd_query_task -- send a QUERY TASK TMF to an I_T_L_Q nexus
+ * task: pointer to sas_task struct of interest
+ *
+ * Returns: TMF_RESP_FUNC_COMPLETE if the task is not in the task set,
+ * or TMF_RESP_FUNC_SUCC if the task is in the task set.
+ *
+ * Normally the management layer sets the task to aborted state,
+ * and then calls query task and then abort task.
+ */
+int asd_query_task(struct sas_task *task)
+{
+	struct asd_ascb *ascb = task->lldd_task;
+	int index;
+
+	if (ascb) {
+		index = ascb->tc_index;
+		return asd_initiate_ssp_tmf(task->dev, task->ssp_task.LUN,
+					    TMF_QUERY_TASK, index);
+	}
+	return TMF_RESP_FUNC_COMPLETE;
+}
diff --git a/drivers/scsi/arcmsr/Makefile b/drivers/scsi/arcmsr/Makefile
new file mode 100644
index 0000000..721aced
--- /dev/null
+++ b/drivers/scsi/arcmsr/Makefile
@@ -0,0 +1,6 @@
+# File: drivers/arcmsr/Makefile
+# Makefile for the ARECA PCI-X PCI-EXPRESS SATA RAID controllers SCSI driver.
+
+arcmsr-objs := arcmsr_attr.o arcmsr_hba.o
+
+obj-$(CONFIG_SCSI_ARCMSR) := arcmsr.o
diff --git a/drivers/scsi/arcmsr/arcmsr.h b/drivers/scsi/arcmsr/arcmsr.h
new file mode 100644
index 0000000..aff96db
--- /dev/null
+++ b/drivers/scsi/arcmsr/arcmsr.h
@@ -0,0 +1,472 @@
+/*
+*******************************************************************************
+**        O.S   : Linux
+**   FILE NAME  : arcmsr.h
+**        BY    : Erich Chen
+**   Description: SCSI RAID Device Driver for
+**                ARECA RAID Host adapter
+*******************************************************************************
+** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved.
+**
+**     Web site: www.areca.com.tw
+**       E-mail: erich@areca.com.tw
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License version 2 as
+** published by the Free Software Foundation.
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*******************************************************************************
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions
+** are met:
+** 1. Redistributions of source code must retain the above copyright
+**    notice, this list of conditions and the following disclaimer.
+** 2. Redistributions in binary form must reproduce the above copyright
+**    notice, this list of conditions and the following disclaimer in the
+**    documentation and/or other materials provided with the distribution.
+** 3. The name of the author may not be used to endorse or promote products
+**    derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT
+** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+**(INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
+** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************
+*/
+#include <linux/interrupt.h>
+
+struct class_device_attribute;
+
+#define ARCMSR_MAX_OUTSTANDING_CMD 						256
+#define ARCMSR_MAX_FREECCB_NUM							288
+#define ARCMSR_DRIVER_VERSION				"Driver Version 1.20.00.13"
+#define ARCMSR_SCSI_INITIATOR_ID						255
+#define ARCMSR_MAX_XFER_SECTORS							512
+#define ARCMSR_MAX_TARGETID							 17
+#define ARCMSR_MAX_TARGETLUN							  8
+#define ARCMSR_MAX_CMD_PERLUN				 ARCMSR_MAX_OUTSTANDING_CMD
+#define ARCMSR_MAX_QBUFFER						       4096
+#define ARCMSR_MAX_SG_ENTRIES							 38
+
+/*
+*******************************************************************************
+**        split 64bits dma addressing
+*******************************************************************************
+*/
+#define dma_addr_hi32(addr)               (uint32_t) ((addr>>16)>>16)
+#define dma_addr_lo32(addr)               (uint32_t) (addr & 0xffffffff)
+/*
+*******************************************************************************
+**        MESSAGE CONTROL CODE
+*******************************************************************************
+*/
+struct CMD_MESSAGE
+{
+      uint32_t HeaderLength;
+      uint8_t  Signature[8];
+      uint32_t Timeout;
+      uint32_t ControlCode;
+      uint32_t ReturnCode;
+      uint32_t Length;
+};
+/*
+*******************************************************************************
+**        IOP Message Transfer Data for user space
+*******************************************************************************
+*/
+struct CMD_MESSAGE_FIELD
+{
+    struct CMD_MESSAGE			cmdmessage;
+    uint8_t				messagedatabuffer[1032];
+};
+/* IOP message transfer */
+#define ARCMSR_MESSAGE_FAIL             0x0001
+/* DeviceType */
+#define ARECA_SATA_RAID				0x90000000
+/* FunctionCode */
+#define FUNCTION_READ_RQBUFFER			0x0801
+#define FUNCTION_WRITE_WQBUFFER			0x0802
+#define FUNCTION_CLEAR_RQBUFFER			0x0803
+#define FUNCTION_CLEAR_WQBUFFER			0x0804
+#define FUNCTION_CLEAR_ALLQBUFFER		0x0805
+#define FUNCTION_RETURN_CODE_3F			0x0806
+#define FUNCTION_SAY_HELLO			0x0807
+#define FUNCTION_SAY_GOODBYE			0x0808
+#define FUNCTION_FLUSH_ADAPTER_CACHE		0x0809
+/* ARECA IO CONTROL CODE*/
+#define ARCMSR_MESSAGE_READ_RQBUFFER       \
+	ARECA_SATA_RAID | FUNCTION_READ_RQBUFFER
+#define ARCMSR_MESSAGE_WRITE_WQBUFFER      \
+	ARECA_SATA_RAID | FUNCTION_WRITE_WQBUFFER
+#define ARCMSR_MESSAGE_CLEAR_RQBUFFER      \
+	ARECA_SATA_RAID | FUNCTION_CLEAR_RQBUFFER
+#define ARCMSR_MESSAGE_CLEAR_WQBUFFER      \
+	ARECA_SATA_RAID | FUNCTION_CLEAR_WQBUFFER
+#define ARCMSR_MESSAGE_CLEAR_ALLQBUFFER    \
+	ARECA_SATA_RAID | FUNCTION_CLEAR_ALLQBUFFER
+#define ARCMSR_MESSAGE_RETURN_CODE_3F      \
+	ARECA_SATA_RAID | FUNCTION_RETURN_CODE_3F
+#define ARCMSR_MESSAGE_SAY_HELLO           \
+	ARECA_SATA_RAID | FUNCTION_SAY_HELLO
+#define ARCMSR_MESSAGE_SAY_GOODBYE         \
+	ARECA_SATA_RAID | FUNCTION_SAY_GOODBYE
+#define ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE \
+	ARECA_SATA_RAID | FUNCTION_FLUSH_ADAPTER_CACHE
+/* ARECA IOCTL ReturnCode */
+#define ARCMSR_MESSAGE_RETURNCODE_OK              0x00000001
+#define ARCMSR_MESSAGE_RETURNCODE_ERROR           0x00000006
+#define ARCMSR_MESSAGE_RETURNCODE_3F              0x0000003F
+/*
+*************************************************************
+**   structure for holding DMA address data
+*************************************************************
+*/
+#define IS_SG64_ADDR                0x01000000 /* bit24 */
+struct  SG32ENTRY
+{
+	uint32_t					length;
+	uint32_t					address;
+};
+struct  SG64ENTRY
+{
+ 	uint32_t					length;
+ 	uint32_t					address;
+ 	uint32_t					addresshigh;
+};
+struct SGENTRY_UNION
+{
+	union
+	{
+		struct SG32ENTRY            sg32entry;
+		struct SG64ENTRY            sg64entry;
+	}u;
+};
+/*
+********************************************************************
+**      Q Buffer of IOP Message Transfer
+********************************************************************
+*/
+struct QBUFFER
+{
+	uint32_t      data_len;
+	uint8_t       data[124];
+};
+/*
+*******************************************************************************
+**      FIRMWARE INFO
+*******************************************************************************
+*/
+struct FIRMWARE_INFO
+{
+	uint32_t      signature;                /*0, 00-03*/
+	uint32_t      request_len;              /*1, 04-07*/
+	uint32_t      numbers_queue;            /*2, 08-11*/
+	uint32_t      sdram_size;               /*3, 12-15*/
+	uint32_t      ide_channels;             /*4, 16-19*/
+	char          vendor[40];               /*5, 20-59*/
+	char          model[8];                 /*15, 60-67*/
+	char          firmware_ver[16];         /*17, 68-83*/
+	char          device_map[16];           /*21, 84-99*/
+};
+/* signature of set and get firmware config */
+#define ARCMSR_SIGNATURE_GET_CONFIG                   0x87974060
+#define ARCMSR_SIGNATURE_SET_CONFIG                   0x87974063
+/* message code of inbound message register */
+#define ARCMSR_INBOUND_MESG0_NOP                      0x00000000
+#define ARCMSR_INBOUND_MESG0_GET_CONFIG               0x00000001
+#define ARCMSR_INBOUND_MESG0_SET_CONFIG               0x00000002
+#define ARCMSR_INBOUND_MESG0_ABORT_CMD                0x00000003
+#define ARCMSR_INBOUND_MESG0_STOP_BGRB                0x00000004
+#define ARCMSR_INBOUND_MESG0_FLUSH_CACHE              0x00000005
+#define ARCMSR_INBOUND_MESG0_START_BGRB               0x00000006
+#define ARCMSR_INBOUND_MESG0_CHK331PENDING            0x00000007
+#define ARCMSR_INBOUND_MESG0_SYNC_TIMER               0x00000008
+/* doorbell interrupt generator */
+#define ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK           0x00000001
+#define ARCMSR_INBOUND_DRIVER_DATA_READ_OK            0x00000002
+#define ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK          0x00000001
+#define ARCMSR_OUTBOUND_IOP331_DATA_READ_OK           0x00000002
+/* ccb areca cdb flag */
+#define ARCMSR_CCBPOST_FLAG_SGL_BSIZE                 0x80000000
+#define ARCMSR_CCBPOST_FLAG_IAM_BIOS                  0x40000000
+#define ARCMSR_CCBREPLY_FLAG_IAM_BIOS                 0x40000000
+#define ARCMSR_CCBREPLY_FLAG_ERROR                    0x10000000
+/* outbound firmware ok */
+#define ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK             0x80000000
+/*
+*******************************************************************************
+**    ARECA SCSI COMMAND DESCRIPTOR BLOCK size 0x1F8 (504)
+*******************************************************************************
+*/
+struct ARCMSR_CDB
+{
+	uint8_t							Bus;
+	uint8_t							TargetID;
+	uint8_t							LUN;
+	uint8_t							Function;
+
+	uint8_t							CdbLength;
+	uint8_t							sgcount;
+	uint8_t							Flags;
+#define ARCMSR_CDB_FLAG_SGL_BSIZE          0x01
+#define ARCMSR_CDB_FLAG_BIOS               0x02
+#define ARCMSR_CDB_FLAG_WRITE              0x04
+#define ARCMSR_CDB_FLAG_SIMPLEQ            0x00
+#define ARCMSR_CDB_FLAG_HEADQ              0x08
+#define ARCMSR_CDB_FLAG_ORDEREDQ           0x10
+	uint8_t							Reserved1;
+
+	uint32_t						Context;
+	uint32_t						DataLength;
+
+	uint8_t							Cdb[16];
+
+	uint8_t							DeviceStatus;
+#define ARCMSR_DEV_CHECK_CONDITION          0x02
+#define ARCMSR_DEV_SELECT_TIMEOUT			0xF0
+#define ARCMSR_DEV_ABORTED				0xF1
+#define ARCMSR_DEV_INIT_FAIL				0xF2
+	uint8_t							SenseData[15];
+
+	union
+	{
+		struct SG32ENTRY                sg32entry[ARCMSR_MAX_SG_ENTRIES];
+		struct SG64ENTRY                sg64entry[ARCMSR_MAX_SG_ENTRIES];
+	} u;
+};
+/*
+*******************************************************************************
+**     Messaging Unit (MU) of the Intel R 80331 I/O processor (80331)
+*******************************************************************************
+*/
+struct MessageUnit
+{
+	uint32_t	resrved0[4];			/*0000 000F*/
+	uint32_t	inbound_msgaddr0;		/*0010 0013*/
+	uint32_t	inbound_msgaddr1;		/*0014 0017*/
+	uint32_t	outbound_msgaddr0;		/*0018 001B*/
+	uint32_t	outbound_msgaddr1;		/*001C 001F*/
+	uint32_t	inbound_doorbell;		/*0020 0023*/
+	uint32_t	inbound_intstatus;		/*0024 0027*/
+	uint32_t	inbound_intmask;		/*0028 002B*/
+	uint32_t	outbound_doorbell;		/*002C 002F*/
+	uint32_t	outbound_intstatus;		/*0030 0033*/
+	uint32_t	outbound_intmask;		/*0034 0037*/
+	uint32_t	reserved1[2];			/*0038 003F*/
+	uint32_t	inbound_queueport;		/*0040 0043*/
+	uint32_t	outbound_queueport;     	/*0044 0047*/
+	uint32_t	reserved2[2];			/*0048 004F*/
+	uint32_t	reserved3[492];			/*0050 07FF 492*/
+	uint32_t	reserved4[128];			/*0800 09FF 128*/
+	uint32_t	message_rwbuffer[256];		/*0a00 0DFF 256*/
+	uint32_t	message_wbuffer[32];		/*0E00 0E7F  32*/
+	uint32_t	reserved5[32];			/*0E80 0EFF  32*/
+	uint32_t	message_rbuffer[32];		/*0F00 0F7F  32*/
+	uint32_t	reserved6[32];			/*0F80 0FFF  32*/
+};
+/*
+*******************************************************************************
+**                 Adapter Control Block
+*******************************************************************************
+*/
+struct AdapterControlBlock
+{
+	struct pci_dev *		pdev;
+	struct Scsi_Host *		host;
+	unsigned long			vir2phy_offset;
+	/* Offset is used in making arc cdb physical to virtual calculations */
+	uint32_t			outbound_int_enable;
+
+	struct MessageUnit __iomem *		pmu;
+	/* message unit ATU inbound base address0 */
+
+	uint32_t			acb_flags;
+#define ACB_F_SCSISTOPADAPTER         0x0001
+#define ACB_F_MSG_STOP_BGRB           0x0002
+	/* stop RAID background rebuild */
+#define ACB_F_MSG_START_BGRB          0x0004
+	/* stop RAID background rebuild */
+#define ACB_F_IOPDATA_OVERFLOW        0x0008
+	/* iop message data rqbuffer overflow */
+#define ACB_F_MESSAGE_WQBUFFER_CLEARED  0x0010
+	/* message clear wqbuffer */
+#define ACB_F_MESSAGE_RQBUFFER_CLEARED  0x0020
+	/* message clear rqbuffer */
+#define ACB_F_MESSAGE_WQBUFFER_READED   0x0040
+#define ACB_F_BUS_RESET               0x0080
+#define ACB_F_IOP_INITED              0x0100
+	/* iop init */
+
+	struct CommandControlBlock *			pccb_pool[ARCMSR_MAX_FREECCB_NUM];
+	/* used for memory free */
+	struct list_head		ccb_free_list;
+	/* head of free ccb list */
+	atomic_t			ccboutstandingcount;
+
+	void *				dma_coherent;
+	/* dma_coherent used for memory free */
+	dma_addr_t			dma_coherent_handle;
+	/* dma_coherent_handle used for memory free */
+
+	uint8_t				rqbuffer[ARCMSR_MAX_QBUFFER];
+	/* data collection buffer for read from 80331 */
+	int32_t				rqbuf_firstindex;
+	/* first of read buffer  */
+	int32_t				rqbuf_lastindex;
+	/* last of read buffer   */
+	uint8_t				wqbuffer[ARCMSR_MAX_QBUFFER];
+	/* data collection buffer for write to 80331  */
+	int32_t				wqbuf_firstindex;
+	/* first of write buffer */
+	int32_t				wqbuf_lastindex;
+	/* last of write buffer  */
+	uint8_t				devstate[ARCMSR_MAX_TARGETID][ARCMSR_MAX_TARGETLUN];
+	/* id0 ..... id15, lun0...lun7 */
+#define ARECA_RAID_GONE               0x55
+#define ARECA_RAID_GOOD               0xaa
+	uint32_t			num_resets;
+	uint32_t			num_aborts;
+	uint32_t			firm_request_len;
+	uint32_t			firm_numbers_queue;
+	uint32_t			firm_sdram_size;
+	uint32_t			firm_hd_channels;
+	char				firm_model[12];
+	char				firm_version[20];
+};/* HW_DEVICE_EXTENSION */
+/*
+*******************************************************************************
+**                   Command Control Block
+**             this CCB length must be 32 bytes boundary
+*******************************************************************************
+*/
+struct CommandControlBlock
+{
+	struct ARCMSR_CDB		arcmsr_cdb;
+	/*
+	** 0-503 (size of CDB=504):
+	** arcmsr messenger scsi command descriptor size 504 bytes
+	*/
+	uint32_t			cdb_shifted_phyaddr;
+	/* 504-507 */
+	uint32_t			reserved1;
+	/* 508-511 */
+#if BITS_PER_LONG == 64
+	/*  ======================512+64 bytes========================  */
+	struct list_head		list;
+	/* 512-527 16 bytes next/prev ptrs for ccb lists */
+	struct scsi_cmnd *		pcmd;
+	/* 528-535 8 bytes pointer of linux scsi command */
+	struct AdapterControlBlock *	acb;
+	/* 536-543 8 bytes pointer of acb */
+
+	uint16_t			ccb_flags;
+	/* 544-545 */
+	#define		CCB_FLAG_READ			0x0000
+	#define		CCB_FLAG_WRITE			0x0001
+	#define		CCB_FLAG_ERROR			0x0002
+	#define		CCB_FLAG_FLUSHCACHE		0x0004
+	#define		CCB_FLAG_MASTER_ABORTED		0x0008
+	uint16_t			startdone;
+	/* 546-547 */
+	#define		ARCMSR_CCB_DONE			0x0000
+	#define		ARCMSR_CCB_START		0x55AA
+	#define		ARCMSR_CCB_ABORTED		0xAA55
+	#define		ARCMSR_CCB_ILLEGAL		0xFFFF
+	uint32_t			reserved2[7];
+	/* 548-551 552-555 556-559 560-563 564-567 568-571 572-575 */
+#else
+	/*  ======================512+32 bytes========================  */
+	struct list_head		list;
+	/* 512-519 8 bytes next/prev ptrs for ccb lists */
+	struct scsi_cmnd *		pcmd;
+	/* 520-523 4 bytes pointer of linux scsi command */
+	struct AdapterControlBlock *	acb;
+	/* 524-527 4 bytes pointer of acb */
+
+	uint16_t			ccb_flags;
+	/* 528-529 */
+	#define		CCB_FLAG_READ			0x0000
+	#define		CCB_FLAG_WRITE			0x0001
+	#define		CCB_FLAG_ERROR			0x0002
+	#define		CCB_FLAG_FLUSHCACHE		0x0004
+	#define		CCB_FLAG_MASTER_ABORTED		0x0008
+	uint16_t			startdone;
+	/* 530-531 */
+	#define		ARCMSR_CCB_DONE			0x0000
+	#define		ARCMSR_CCB_START		0x55AA
+	#define		ARCMSR_CCB_ABORTED		0xAA55
+	#define		ARCMSR_CCB_ILLEGAL		0xFFFF
+	uint32_t			reserved2[3];
+	/* 532-535 536-539 540-543 */
+#endif
+	/*  ==========================================================  */
+};
+/*
+*******************************************************************************
+**    ARECA SCSI sense data
+*******************************************************************************
+*/
+struct SENSE_DATA
+{
+	uint8_t				ErrorCode:7;
+#define SCSI_SENSE_CURRENT_ERRORS	0x70
+#define SCSI_SENSE_DEFERRED_ERRORS	0x71
+	uint8_t				Valid:1;
+	uint8_t				SegmentNumber;
+	uint8_t				SenseKey:4;
+	uint8_t				Reserved:1;
+	uint8_t				IncorrectLength:1;
+	uint8_t				EndOfMedia:1;
+	uint8_t				FileMark:1;
+	uint8_t				Information[4];
+	uint8_t				AdditionalSenseLength;
+	uint8_t				CommandSpecificInformation[4];
+	uint8_t				AdditionalSenseCode;
+	uint8_t				AdditionalSenseCodeQualifier;
+	uint8_t				FieldReplaceableUnitCode;
+	uint8_t				SenseKeySpecific[3];
+};
+/*
+*******************************************************************************
+**  Outbound Interrupt Status Register - OISR
+*******************************************************************************
+*/
+#define     ARCMSR_MU_OUTBOUND_INTERRUPT_STATUS_REG                 0x30
+#define     ARCMSR_MU_OUTBOUND_PCI_INT                              0x10
+#define     ARCMSR_MU_OUTBOUND_POSTQUEUE_INT                        0x08
+#define     ARCMSR_MU_OUTBOUND_DOORBELL_INT                         0x04
+#define     ARCMSR_MU_OUTBOUND_MESSAGE1_INT                         0x02
+#define     ARCMSR_MU_OUTBOUND_MESSAGE0_INT                         0x01
+#define     ARCMSR_MU_OUTBOUND_HANDLE_INT                 \
+                    (ARCMSR_MU_OUTBOUND_MESSAGE0_INT      \
+                     |ARCMSR_MU_OUTBOUND_MESSAGE1_INT     \
+                     |ARCMSR_MU_OUTBOUND_DOORBELL_INT     \
+                     |ARCMSR_MU_OUTBOUND_POSTQUEUE_INT    \
+                     |ARCMSR_MU_OUTBOUND_PCI_INT)
+/*
+*******************************************************************************
+**  Outbound Interrupt Mask Register - OIMR
+*******************************************************************************
+*/
+#define     ARCMSR_MU_OUTBOUND_INTERRUPT_MASK_REG                   0x34
+#define     ARCMSR_MU_OUTBOUND_PCI_INTMASKENABLE                    0x10
+#define     ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE              0x08
+#define     ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE               0x04
+#define     ARCMSR_MU_OUTBOUND_MESSAGE1_INTMASKENABLE               0x02
+#define     ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE               0x01
+#define     ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE                    0x1F
+
+extern void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb);
+extern struct class_device_attribute *arcmsr_host_attrs[];
+extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb);
+void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb);
+
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
new file mode 100644
index 0000000..12497da
--- /dev/null
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -0,0 +1,381 @@
+/*
+*******************************************************************************
+**        O.S   : Linux
+**   FILE NAME  : arcmsr_attr.c
+**        BY    : Erich Chen
+**   Description: attributes exported to sysfs and device host
+*******************************************************************************
+** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved
+**
+**     Web site: www.areca.com.tw
+**       E-mail: erich@areca.com.tw
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License version 2 as
+** published by the Free Software Foundation.
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*******************************************************************************
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions
+** are met:
+** 1. Redistributions of source code must retain the above copyright
+**    notice, this list of conditions and the following disclaimer.
+** 2. Redistributions in binary form must reproduce the above copyright
+**    notice, this list of conditions and the following disclaimer in the
+**    documentation and/or other materials provided with the distribution.
+** 3. The name of the author may not be used to endorse or promote products
+**    derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT
+** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
+** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************
+** For history of changes, see Documentation/scsi/ChangeLog.arcmsr
+**     Firmware Specification, see Documentation/scsi/arcmsr_spec.txt
+*******************************************************************************
+*/
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h>
+#include "arcmsr.h"
+
+struct class_device_attribute *arcmsr_host_attrs[];
+
+static ssize_t
+arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint8_t *pQbuffer,*ptmpQbuffer;
+	int32_t allxfer_len = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	/* do message unit read. */
+	ptmpQbuffer = (uint8_t *)buf;
+	while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex)
+		&& (allxfer_len < 1031)) {
+		pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex];
+		memcpy(ptmpQbuffer, pQbuffer, 1);
+		acb->rqbuf_firstindex++;
+		acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+		ptmpQbuffer++;
+		allxfer_len++;
+	}
+	if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+		struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *)
+					&reg->message_rbuffer;
+		uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data;
+		int32_t iop_len;
+
+		acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+		iop_len = readl(&prbuffer->data_len);
+		while (iop_len > 0) {
+			acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data);
+			acb->rqbuf_lastindex++;
+			acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+			iop_data++;
+			iop_len--;
+		}
+		writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+				&reg->inbound_doorbell);
+	}
+	return (allxfer_len);
+}
+
+static ssize_t
+arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex;
+	uint8_t *pQbuffer, *ptmpuserbuffer;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (count > 1032)
+		return -EINVAL;
+	/* do message unit write. */
+	ptmpuserbuffer = (uint8_t *)buf;
+	user_len = (int32_t)count;
+	wqbuf_lastindex = acb->wqbuf_lastindex;
+	wqbuf_firstindex = acb->wqbuf_firstindex;
+	if (wqbuf_lastindex != wqbuf_firstindex) {
+		arcmsr_post_Qbuffer(acb);
+		return 0;	/*need retry*/
+	} else {
+		my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1)
+				&(ARCMSR_MAX_QBUFFER - 1);
+		if (my_empty_len >= user_len) {
+			while (user_len > 0) {
+				pQbuffer =
+				&acb->wqbuffer[acb->wqbuf_lastindex];
+				memcpy(pQbuffer, ptmpuserbuffer, 1);
+				acb->wqbuf_lastindex++;
+				acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+				ptmpuserbuffer++;
+				user_len--;
+			}
+			if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) {
+				acb->acb_flags &=
+					~ACB_F_MESSAGE_WQBUFFER_CLEARED;
+				arcmsr_post_Qbuffer(acb);
+			}
+			return count;
+		} else {
+			return 0;	/*need retry*/
+		}
+	}
+}
+
+static ssize_t
+arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint8_t *pQbuffer;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+		acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+		writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK
+				, &reg->inbound_doorbell);
+	}
+	acb->acb_flags |=
+		(ACB_F_MESSAGE_WQBUFFER_CLEARED
+		| ACB_F_MESSAGE_RQBUFFER_CLEARED
+		| ACB_F_MESSAGE_WQBUFFER_READED);
+	acb->rqbuf_firstindex = 0;
+	acb->rqbuf_lastindex = 0;
+	acb->wqbuf_firstindex = 0;
+	acb->wqbuf_lastindex = 0;
+	pQbuffer = acb->rqbuffer;
+	memset(pQbuffer, 0, sizeof (struct QBUFFER));
+	pQbuffer = acb->wqbuffer;
+	memset(pQbuffer, 0, sizeof (struct QBUFFER));
+	return 1;
+}
+
+static struct bin_attribute arcmsr_sysfs_message_read_attr = {
+	.attr = {
+		.name = "mu_read",
+		.mode = S_IRUSR ,
+		.owner = THIS_MODULE,
+	},
+	.size = 1032,
+	.read = arcmsr_sysfs_iop_message_read,
+};
+
+static struct bin_attribute arcmsr_sysfs_message_write_attr = {
+	.attr = {
+		.name = "mu_write",
+		.mode = S_IWUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = 1032,
+	.write = arcmsr_sysfs_iop_message_write,
+};
+
+static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
+	.attr = {
+		.name = "mu_clear",
+		.mode = S_IWUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = 1,
+	.write = arcmsr_sysfs_iop_message_clear,
+};
+
+int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb)
+{
+	struct Scsi_Host *host = acb->host;
+	int error;
+
+	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_read_attr);
+	if (error) {
+		printk(KERN_ERR "arcmsr: alloc sysfs mu_read failed\n");
+		goto error_bin_file_message_read;
+	}
+	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_write_attr);
+	if (error) {
+		printk(KERN_ERR "arcmsr: alloc sysfs mu_write failed\n");
+		goto error_bin_file_message_write;
+	}
+	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_clear_attr);
+	if (error) {
+		printk(KERN_ERR "arcmsr: alloc sysfs mu_clear failed\n");
+		goto error_bin_file_message_clear;
+	}
+	return 0;
+error_bin_file_message_clear:
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_write_attr);
+error_bin_file_message_write:
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_read_attr);
+error_bin_file_message_read:
+	return error;
+}
+
+void
+arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb) {
+	struct Scsi_Host *host = acb->host;
+
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_clear_attr);
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_write_attr);
+	sysfs_remove_bin_file(&host->shost_classdev.kobj,
+				&arcmsr_sysfs_message_read_attr);
+}
+
+
+static ssize_t
+arcmsr_attr_host_driver_version(struct class_device *cdev, char *buf) {
+	return snprintf(buf, PAGE_SIZE,
+			"%s\n",
+			ARCMSR_DRIVER_VERSION);
+}
+
+static ssize_t
+arcmsr_attr_host_driver_posted_cmd(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			atomic_read(&acb->ccboutstandingcount));
+}
+
+static ssize_t
+arcmsr_attr_host_driver_reset(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			acb->num_resets);
+}
+
+static ssize_t
+arcmsr_attr_host_driver_abort(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			acb->num_aborts);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_model(struct class_device *cdev, char *buf) {
+    struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+	return snprintf(buf, PAGE_SIZE,
+			"%s\n",
+			acb->firm_model);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_version(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"%s\n",
+			acb->firm_version);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_request_len(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			acb->firm_request_len);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_numbers_queue(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			acb->firm_numbers_queue);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_sdram_size(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			acb->firm_sdram_size);
+}
+
+static ssize_t
+arcmsr_attr_host_fw_hd_channels(struct class_device *cdev, char *buf) {
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
+
+	return snprintf(buf, PAGE_SIZE,
+			"%4d\n",
+			acb->firm_hd_channels);
+}
+
+static CLASS_DEVICE_ATTR(host_driver_version, S_IRUGO, arcmsr_attr_host_driver_version, NULL);
+static CLASS_DEVICE_ATTR(host_driver_posted_cmd, S_IRUGO, arcmsr_attr_host_driver_posted_cmd, NULL);
+static CLASS_DEVICE_ATTR(host_driver_reset, S_IRUGO, arcmsr_attr_host_driver_reset, NULL);
+static CLASS_DEVICE_ATTR(host_driver_abort, S_IRUGO, arcmsr_attr_host_driver_abort, NULL);
+static CLASS_DEVICE_ATTR(host_fw_model, S_IRUGO, arcmsr_attr_host_fw_model, NULL);
+static CLASS_DEVICE_ATTR(host_fw_version, S_IRUGO, arcmsr_attr_host_fw_version, NULL);
+static CLASS_DEVICE_ATTR(host_fw_request_len, S_IRUGO, arcmsr_attr_host_fw_request_len, NULL);
+static CLASS_DEVICE_ATTR(host_fw_numbers_queue, S_IRUGO, arcmsr_attr_host_fw_numbers_queue, NULL);
+static CLASS_DEVICE_ATTR(host_fw_sdram_size, S_IRUGO, arcmsr_attr_host_fw_sdram_size, NULL);
+static CLASS_DEVICE_ATTR(host_fw_hd_channels, S_IRUGO, arcmsr_attr_host_fw_hd_channels, NULL);
+
+struct class_device_attribute *arcmsr_host_attrs[] = {
+	&class_device_attr_host_driver_version,
+	&class_device_attr_host_driver_posted_cmd,
+	&class_device_attr_host_driver_reset,
+	&class_device_attr_host_driver_abort,
+	&class_device_attr_host_fw_model,
+	&class_device_attr_host_fw_version,
+	&class_device_attr_host_fw_request_len,
+	&class_device_attr_host_fw_numbers_queue,
+	&class_device_attr_host_fw_sdram_size,
+	&class_device_attr_host_fw_hd_channels,
+	NULL,
+};
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
new file mode 100644
index 0000000..475f978
--- /dev/null
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -0,0 +1,1496 @@
+/*
+*******************************************************************************
+**        O.S   : Linux
+**   FILE NAME  : arcmsr_hba.c
+**        BY    : Erich Chen
+**   Description: SCSI RAID Device Driver for
+**                ARECA RAID Host adapter
+*******************************************************************************
+** Copyright (C) 2002 - 2005, Areca Technology Corporation All rights reserved
+**
+**     Web site: www.areca.com.tw
+**       E-mail: erich@areca.com.tw
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License version 2 as
+** published by the Free Software Foundation.
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*******************************************************************************
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions
+** are met:
+** 1. Redistributions of source code must retain the above copyright
+**    notice, this list of conditions and the following disclaimer.
+** 2. Redistributions in binary form must reproduce the above copyright
+**    notice, this list of conditions and the following disclaimer in the
+**    documentation and/or other materials provided with the distribution.
+** 3. The name of the author may not be used to endorse or promote products
+**    derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING,BUT
+** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION)HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF
+** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************
+** For history of changes, see Documentation/scsi/ChangeLog.arcmsr
+**     Firmware Specification, see Documentation/scsi/arcmsr_spec.txt
+*******************************************************************************
+*/
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/spinlock.h>
+#include <linux/pci_ids.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsicam.h>
+#include "arcmsr.h"
+
+MODULE_AUTHOR("Erich Chen <erich@areca.com.tw>");
+MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ARCMSR_DRIVER_VERSION);
+
+static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd);
+static int arcmsr_abort(struct scsi_cmnd *);
+static int arcmsr_bus_reset(struct scsi_cmnd *);
+static int arcmsr_bios_param(struct scsi_device *sdev,
+				struct block_device *bdev, sector_t capacity, int *info);
+static int arcmsr_queue_command(struct scsi_cmnd * cmd,
+				void (*done) (struct scsi_cmnd *));
+static int arcmsr_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id);
+static void arcmsr_remove(struct pci_dev *pdev);
+static void arcmsr_shutdown(struct pci_dev *pdev);
+static void arcmsr_iop_init(struct AdapterControlBlock *acb);
+static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb);
+static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb);
+static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb);
+static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb);
+static const char *arcmsr_info(struct Scsi_Host *);
+static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
+
+static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth)
+{
+	if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
+		queue_depth = ARCMSR_MAX_CMD_PERLUN;
+	scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, queue_depth);
+	return queue_depth;
+}
+
+static struct scsi_host_template arcmsr_scsi_host_template = {
+	.module			= THIS_MODULE,
+	.name			= "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION,
+	.info			= arcmsr_info,
+	.queuecommand		= arcmsr_queue_command,
+	.eh_abort_handler	= arcmsr_abort,
+	.eh_bus_reset_handler	= arcmsr_bus_reset,
+	.bios_param		= arcmsr_bios_param,
+	.change_queue_depth	= arcmsr_adjust_disk_queue_depth,
+	.can_queue		= ARCMSR_MAX_OUTSTANDING_CMD,
+	.this_id		= ARCMSR_SCSI_INITIATOR_ID,
+	.sg_tablesize		= ARCMSR_MAX_SG_ENTRIES,
+	.max_sectors    	= ARCMSR_MAX_XFER_SECTORS,
+	.cmd_per_lun		= ARCMSR_MAX_CMD_PERLUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= arcmsr_host_attrs,
+};
+
+static struct pci_device_id arcmsr_device_id_table[] = {
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1120)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1130)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1160)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1170)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1210)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1220)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1230)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1260)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1270)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1280)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1380)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1381)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1680)},
+	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1681)},
+	{0, 0}, /* Terminating entry */
+};
+MODULE_DEVICE_TABLE(pci, arcmsr_device_id_table);
+static struct pci_driver arcmsr_pci_driver = {
+	.name			= "arcmsr",
+	.id_table		= arcmsr_device_id_table,
+	.probe			= arcmsr_probe,
+	.remove			= arcmsr_remove,
+	.shutdown		= arcmsr_shutdown
+};
+
+static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id,
+	struct pt_regs *regs)
+{
+	irqreturn_t handle_state;
+	struct AdapterControlBlock *acb;
+	unsigned long flags;
+
+	acb = (struct AdapterControlBlock *)dev_id;
+
+	spin_lock_irqsave(acb->host->host_lock, flags);
+	handle_state = arcmsr_interrupt(acb);
+	spin_unlock_irqrestore(acb->host->host_lock, flags);
+	return handle_state;
+}
+
+static int arcmsr_bios_param(struct scsi_device *sdev,
+		struct block_device *bdev, sector_t capacity, int *geom)
+{
+	int ret, heads, sectors, cylinders, total_capacity;
+	unsigned char *buffer;/* return copy of block device's partition table */
+
+	buffer = scsi_bios_ptable(bdev);
+	if (buffer) {
+		ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]);
+		kfree(buffer);
+		if (ret != -1)
+			return ret;
+	}
+	total_capacity = capacity;
+	heads = 64;
+	sectors = 32;
+	cylinders = total_capacity / (heads * sectors);
+	if (cylinders > 1024) {
+		heads = 255;
+		sectors = 63;
+		cylinders = total_capacity / (heads * sectors);
+	}
+	geom[0] = heads;
+	geom[1] = sectors;
+	geom[2] = cylinders;
+	return 0;
+}
+
+static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
+{
+	struct pci_dev *pdev = acb->pdev;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	u32 ccb_phyaddr_hi32;
+	void *dma_coherent;
+	dma_addr_t dma_coherent_handle, dma_addr;
+	struct CommandControlBlock *ccb_tmp;
+	int i, j;
+
+	dma_coherent = dma_alloc_coherent(&pdev->dev,
+			ARCMSR_MAX_FREECCB_NUM *
+			sizeof (struct CommandControlBlock) + 0x20,
+			&dma_coherent_handle, GFP_KERNEL);
+	if (!dma_coherent)
+		return -ENOMEM;
+
+	acb->dma_coherent = dma_coherent;
+	acb->dma_coherent_handle = dma_coherent_handle;
+
+	if (((unsigned long)dma_coherent & 0x1F)) {
+		dma_coherent = dma_coherent +
+			(0x20 - ((unsigned long)dma_coherent & 0x1F));
+		dma_coherent_handle = dma_coherent_handle +
+			(0x20 - ((unsigned long)dma_coherent_handle & 0x1F));
+	}
+
+	dma_addr = dma_coherent_handle;
+	ccb_tmp = (struct CommandControlBlock *)dma_coherent;
+	for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+		ccb_tmp->cdb_shifted_phyaddr = dma_addr >> 5;
+		ccb_tmp->acb = acb;
+		acb->pccb_pool[i] = ccb_tmp;
+		list_add_tail(&ccb_tmp->list, &acb->ccb_free_list);
+		dma_addr = dma_addr + sizeof (struct CommandControlBlock);
+		ccb_tmp++;
+	}
+
+	acb->vir2phy_offset = (unsigned long)ccb_tmp -
+			      (unsigned long)dma_addr;
+	for (i = 0; i < ARCMSR_MAX_TARGETID; i++)
+		for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++)
+			acb->devstate[i][j] = ARECA_RAID_GOOD;
+
+	/*
+	** here we need to tell iop 331 our ccb_tmp.HighPart
+	** if ccb_tmp.HighPart is not zero
+	*/
+	ccb_phyaddr_hi32 = (uint32_t) ((dma_coherent_handle >> 16) >> 16);
+	if (ccb_phyaddr_hi32 != 0) {
+		writel(ARCMSR_SIGNATURE_SET_CONFIG, &reg->message_rwbuffer[0]);
+		writel(ccb_phyaddr_hi32, &reg->message_rwbuffer[1]);
+		writel(ARCMSR_INBOUND_MESG0_SET_CONFIG, &reg->inbound_msgaddr0);
+		if (arcmsr_wait_msgint_ready(acb))
+			printk(KERN_NOTICE "arcmsr%d: "
+			       "'set ccb high part physical address' timeout\n",
+				acb->host->host_no);
+	}
+
+	writel(readl(&reg->outbound_intmask) |
+			ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,
+	       &reg->outbound_intmask);
+	return 0;
+}
+
+static int arcmsr_probe(struct pci_dev *pdev,
+	const struct pci_device_id *id)
+{
+	struct Scsi_Host *host;
+	struct AdapterControlBlock *acb;
+	uint8_t bus, dev_fun;
+	int error;
+
+	error = pci_enable_device(pdev);
+	if (error)
+		goto out;
+	pci_set_master(pdev);
+
+	host = scsi_host_alloc(&arcmsr_scsi_host_template,
+			sizeof(struct AdapterControlBlock));
+	if (!host) {
+		error = -ENOMEM;
+		goto out_disable_device;
+	}
+	acb = (struct AdapterControlBlock *)host->hostdata;
+	memset(acb, 0, sizeof (struct AdapterControlBlock));
+
+	error = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+	if (error) {
+		error = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		if (error) {
+			printk(KERN_WARNING
+			       "scsi%d: No suitable DMA mask available\n",
+			       host->host_no);
+			goto out_host_put;
+		}
+	}
+	bus = pdev->bus->number;
+	dev_fun = pdev->devfn;
+	acb->host = host;
+	acb->pdev = pdev;
+	host->max_sectors = ARCMSR_MAX_XFER_SECTORS;
+	host->max_lun = ARCMSR_MAX_TARGETLUN;
+	host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/
+	host->max_cmd_len = 16;    /*this is issue of 64bit LBA, over 2T byte*/
+	host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES;
+	host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */
+	host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN;
+	host->this_id = ARCMSR_SCSI_INITIATOR_ID;
+	host->unique_id = (bus << 8) | dev_fun;
+	host->irq = pdev->irq;
+	error = pci_request_regions(pdev, "arcmsr");
+	if (error)
+		goto out_host_put;
+
+	acb->pmu = ioremap(pci_resource_start(pdev, 0),
+			   pci_resource_len(pdev, 0));
+	if (!acb->pmu) {
+		printk(KERN_NOTICE "arcmsr%d: memory"
+			" mapping region fail \n", acb->host->host_no);
+		goto out_release_regions;
+	}
+	acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED |
+			   ACB_F_MESSAGE_RQBUFFER_CLEARED |
+			   ACB_F_MESSAGE_WQBUFFER_READED);
+	acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
+	INIT_LIST_HEAD(&acb->ccb_free_list);
+
+	error = arcmsr_alloc_ccb_pool(acb);
+	if (error)
+		goto out_iounmap;
+
+	error = request_irq(pdev->irq, arcmsr_do_interrupt,
+			SA_INTERRUPT | SA_SHIRQ, "arcmsr", acb);
+	if (error)
+		goto out_free_ccb_pool;
+
+	arcmsr_iop_init(acb);
+	pci_set_drvdata(pdev, host);
+
+	error = scsi_add_host(host, &pdev->dev);
+	if (error)
+		goto out_free_irq;
+
+	error = arcmsr_alloc_sysfs_attr(acb);
+	if (error)
+		goto out_free_sysfs;
+
+	scsi_scan_host(host);
+	return 0;
+ out_free_sysfs:
+ out_free_irq:
+	free_irq(pdev->irq, acb);
+ out_free_ccb_pool:
+	arcmsr_free_ccb_pool(acb);
+ out_iounmap:
+	iounmap(acb->pmu);
+ out_release_regions:
+	pci_release_regions(pdev);
+ out_host_put:
+	scsi_host_put(host);
+ out_disable_device:
+	pci_disable_device(pdev);
+ out:
+	return error;
+}
+
+static void arcmsr_abort_allcmd(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+
+	writel(ARCMSR_INBOUND_MESG0_ABORT_CMD, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait 'abort all outstanding command' timeout \n"
+			, acb->host->host_no);
+}
+
+static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb)
+{
+	struct AdapterControlBlock *acb = ccb->acb;
+	struct scsi_cmnd *pcmd = ccb->pcmd;
+
+	if (pcmd->use_sg != 0) {
+		struct scatterlist *sl;
+
+		sl = (struct scatterlist *)pcmd->request_buffer;
+		pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction);
+	}
+	else if (pcmd->request_bufflen != 0)
+		pci_unmap_single(acb->pdev,
+			pcmd->SCp.dma_handle,
+			pcmd->request_bufflen, pcmd->sc_data_direction);
+}
+
+static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag)
+{
+	struct AdapterControlBlock *acb = ccb->acb;
+	struct scsi_cmnd *pcmd = ccb->pcmd;
+
+	arcmsr_pci_unmap_dma(ccb);
+	if (stand_flag == 1)
+		atomic_dec(&acb->ccboutstandingcount);
+	ccb->startdone = ARCMSR_CCB_DONE;
+	ccb->ccb_flags = 0;
+	list_add_tail(&ccb->list, &acb->ccb_free_list);
+	pcmd->scsi_done(pcmd);
+}
+
+static void arcmsr_remove(struct pci_dev *pdev)
+{
+	struct Scsi_Host *host = pci_get_drvdata(pdev);
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *) host->hostdata;
+	struct MessageUnit __iomem *reg = acb->pmu;
+	int poll_count = 0;
+
+	arcmsr_free_sysfs_attr(acb);
+	scsi_remove_host(host);
+	arcmsr_stop_adapter_bgrb(acb);
+	arcmsr_flush_adapter_cache(acb);
+	writel(readl(&reg->outbound_intmask) |
+		ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,
+		&reg->outbound_intmask);
+	acb->acb_flags |= ACB_F_SCSISTOPADAPTER;
+	acb->acb_flags &= ~ACB_F_IOP_INITED;
+
+	for (poll_count = 0; poll_count < 256; poll_count++) {
+		if (!atomic_read(&acb->ccboutstandingcount))
+			break;
+		arcmsr_interrupt(acb);
+		msleep(25);
+	}
+
+	if (atomic_read(&acb->ccboutstandingcount)) {
+		int i;
+
+		arcmsr_abort_allcmd(acb);
+		for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
+			readl(&reg->outbound_queueport);
+		for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+			struct CommandControlBlock *ccb = acb->pccb_pool[i];
+			if (ccb->startdone == ARCMSR_CCB_START) {
+				ccb->startdone = ARCMSR_CCB_ABORTED;
+				ccb->pcmd->result = DID_ABORT << 16;
+				arcmsr_ccb_complete(ccb, 1);
+			}
+		}
+	}
+
+	free_irq(pdev->irq, acb);
+	iounmap(acb->pmu);
+	arcmsr_free_ccb_pool(acb);
+	pci_release_regions(pdev);
+
+	scsi_host_put(host);
+
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static void arcmsr_shutdown(struct pci_dev *pdev)
+{
+	struct Scsi_Host *host = pci_get_drvdata(pdev);
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *)host->hostdata;
+
+	arcmsr_stop_adapter_bgrb(acb);
+	arcmsr_flush_adapter_cache(acb);
+}
+
+static int arcmsr_module_init(void)
+{
+	int error = 0;
+
+	error = pci_register_driver(&arcmsr_pci_driver);
+	return error;
+}
+
+static void arcmsr_module_exit(void)
+{
+	pci_unregister_driver(&arcmsr_pci_driver);
+}
+module_init(arcmsr_module_init);
+module_exit(arcmsr_module_exit);
+
+static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	u32 orig_mask = readl(&reg->outbound_intmask);
+
+	writel(orig_mask | ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE,
+			&reg->outbound_intmask);
+	return orig_mask;
+}
+
+static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb,
+		u32 orig_mask)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	u32 mask;
+
+	mask = orig_mask & ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE |
+			     ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE);
+	writel(mask, &reg->outbound_intmask);
+}
+
+static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg=acb->pmu;
+
+	writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait 'flush adapter cache' timeout \n"
+			, acb->host->host_no);
+}
+
+static void arcmsr_report_sense_info(struct CommandControlBlock *ccb)
+{
+	struct scsi_cmnd *pcmd = ccb->pcmd;
+	struct SENSE_DATA *sensebuffer = (struct SENSE_DATA *)pcmd->sense_buffer;
+
+	pcmd->result = DID_OK << 16;
+	if (sensebuffer) {
+		int sense_data_length =
+			sizeof (struct SENSE_DATA) < sizeof (pcmd->sense_buffer)
+			? sizeof (struct SENSE_DATA) : sizeof (pcmd->sense_buffer);
+		memset(sensebuffer, 0, sizeof (pcmd->sense_buffer));
+		memcpy(sensebuffer, ccb->arcmsr_cdb.SenseData, sense_data_length);
+		sensebuffer->ErrorCode = SCSI_SENSE_CURRENT_ERRORS;
+		sensebuffer->Valid = 1;
+	}
+}
+
+static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint32_t Index;
+	uint8_t Retries = 0x00;
+
+	do {
+		for (Index = 0; Index < 100; Index++) {
+			if (readl(&reg->outbound_intstatus)
+				& ARCMSR_MU_OUTBOUND_MESSAGE0_INT) {
+				writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT
+					, &reg->outbound_intstatus);
+				return 0x00;
+			}
+			msleep_interruptible(10);
+		}/*max 1 seconds*/
+	} while (Retries++ < 20);/*max 20 sec*/
+	return 0xff;
+}
+
+static void arcmsr_build_ccb(struct AdapterControlBlock *acb,
+	struct CommandControlBlock *ccb, struct scsi_cmnd *pcmd)
+{
+	struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb;
+	int8_t *psge = (int8_t *)&arcmsr_cdb->u;
+	uint32_t address_lo, address_hi;
+	int arccdbsize = 0x30;
+
+	ccb->pcmd = pcmd;
+	memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB));
+	arcmsr_cdb->Bus = 0;
+	arcmsr_cdb->TargetID = pcmd->device->id;
+	arcmsr_cdb->LUN = pcmd->device->lun;
+	arcmsr_cdb->Function = 1;
+	arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len;
+	arcmsr_cdb->Context = (unsigned long)arcmsr_cdb;
+	memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len);
+	if (pcmd->use_sg) {
+		int length, sgcount, i, cdb_sgcount = 0;
+		struct scatterlist *sl;
+
+		/* Get Scatter Gather List from scsiport. */
+		sl = (struct scatterlist *) pcmd->request_buffer;
+		sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg,
+				pcmd->sc_data_direction);
+		/* map stor port SG list to our iop SG List. */
+		for (i = 0; i < sgcount; i++) {
+			/* Get the physical address of the current data pointer */
+			length = cpu_to_le32(sg_dma_len(sl));
+			address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl)));
+			address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl)));
+			if (address_hi == 0) {
+				struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+
+				pdma_sg->address = address_lo;
+				pdma_sg->length = length;
+				psge += sizeof (struct SG32ENTRY);
+				arccdbsize += sizeof (struct SG32ENTRY);
+			} else {
+				struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge;
+
+				pdma_sg->addresshigh = address_hi;
+				pdma_sg->address = address_lo;
+				pdma_sg->length = length|IS_SG64_ADDR;
+				psge += sizeof (struct SG64ENTRY);
+				arccdbsize += sizeof (struct SG64ENTRY);
+			}
+			sl++;
+			cdb_sgcount++;
+		}
+		arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount;
+		arcmsr_cdb->DataLength = pcmd->request_bufflen;
+		if ( arccdbsize > 256)
+			arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE;
+	} else if (pcmd->request_bufflen) {
+		dma_addr_t dma_addr;
+		dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer,
+				pcmd->request_bufflen, pcmd->sc_data_direction);
+		pcmd->SCp.dma_handle = dma_addr;
+		address_lo = cpu_to_le32(dma_addr_lo32(dma_addr));
+		address_hi = cpu_to_le32(dma_addr_hi32(dma_addr));
+		if (address_hi == 0) {
+			struct  SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+			pdma_sg->address = address_lo;
+			pdma_sg->length = pcmd->request_bufflen;
+		} else {
+			struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge;
+			pdma_sg->addresshigh = address_hi;
+			pdma_sg->address = address_lo;
+			pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR;
+		}
+		arcmsr_cdb->sgcount = 1;
+		arcmsr_cdb->DataLength = pcmd->request_bufflen;
+	}
+	if (pcmd->sc_data_direction == DMA_TO_DEVICE ) {
+		arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE;
+		ccb->ccb_flags |= CCB_FLAG_WRITE;
+	}
+}
+
+static void arcmsr_post_ccb(struct AdapterControlBlock *acb, struct CommandControlBlock *ccb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint32_t cdb_shifted_phyaddr = ccb->cdb_shifted_phyaddr;
+	struct ARCMSR_CDB *arcmsr_cdb = (struct ARCMSR_CDB *)&ccb->arcmsr_cdb;
+
+	atomic_inc(&acb->ccboutstandingcount);
+	ccb->startdone = ARCMSR_CCB_START;
+	if (arcmsr_cdb->Flags & ARCMSR_CDB_FLAG_SGL_BSIZE)
+		writel(cdb_shifted_phyaddr | ARCMSR_CCBPOST_FLAG_SGL_BSIZE,
+			&reg->inbound_queueport);
+	else
+		writel(cdb_shifted_phyaddr, &reg->inbound_queueport);
+}
+
+void arcmsr_post_Qbuffer(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct QBUFFER __iomem *pwbuffer = (struct QBUFFER __iomem *) &reg->message_wbuffer;
+	uint8_t __iomem *iop_data = (uint8_t __iomem *) pwbuffer->data;
+	int32_t allxfer_len = 0;
+
+	if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_READED) {
+		acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED);
+		while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex)
+			&& (allxfer_len < 124)) {
+			writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data);
+			acb->wqbuf_firstindex++;
+			acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+			iop_data++;
+			allxfer_len++;
+		}
+		writel(allxfer_len, &pwbuffer->data_len);
+		writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK
+			, &reg->inbound_doorbell);
+	}
+}
+
+static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+
+	acb->acb_flags &= ~ACB_F_MSG_START_BGRB;
+	writel(ARCMSR_INBOUND_MESG0_STOP_BGRB, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait 'stop adapter background rebulid' timeout \n"
+			, acb->host->host_no);
+}
+
+static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb)
+{
+	dma_free_coherent(&acb->pdev->dev,
+		ARCMSR_MAX_FREECCB_NUM * sizeof (struct CommandControlBlock) + 0x20,
+		acb->dma_coherent,
+		acb->dma_coherent_handle);
+}
+
+static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CommandControlBlock *ccb;
+	uint32_t flag_ccb, outbound_intstatus, outbound_doorbell;
+
+	outbound_intstatus = readl(&reg->outbound_intstatus)
+		& acb->outbound_int_enable;
+	writel(outbound_intstatus, &reg->outbound_intstatus);
+	if (outbound_intstatus & ARCMSR_MU_OUTBOUND_DOORBELL_INT) {
+		outbound_doorbell = readl(&reg->outbound_doorbell);
+		writel(outbound_doorbell, &reg->outbound_doorbell);
+		if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_WRITE_OK) {
+			struct QBUFFER __iomem * prbuffer =
+				(struct QBUFFER __iomem *) &reg->message_rbuffer;
+			uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data;
+			int32_t my_empty_len, iop_len, rqbuf_firstindex, rqbuf_lastindex;
+
+			rqbuf_lastindex = acb->rqbuf_lastindex;
+			rqbuf_firstindex = acb->rqbuf_firstindex;
+			iop_len = readl(&prbuffer->data_len);
+			my_empty_len = (rqbuf_firstindex - rqbuf_lastindex - 1)
+					&(ARCMSR_MAX_QBUFFER - 1);
+			if (my_empty_len >= iop_len) {
+				while (iop_len > 0) {
+					acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data);
+					acb->rqbuf_lastindex++;
+					acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+					iop_data++;
+					iop_len--;
+				}
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+					&reg->inbound_doorbell);
+			} else
+				acb->acb_flags |= ACB_F_IOPDATA_OVERFLOW;
+		}
+		if (outbound_doorbell & ARCMSR_OUTBOUND_IOP331_DATA_READ_OK) {
+			acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_READED;
+			if (acb->wqbuf_firstindex != acb->wqbuf_lastindex) {
+				struct QBUFFER __iomem * pwbuffer =
+						(struct QBUFFER __iomem *) &reg->message_wbuffer;
+				uint8_t __iomem * iop_data = (uint8_t __iomem *) pwbuffer->data;
+				int32_t allxfer_len = 0;
+
+				acb->acb_flags &= (~ACB_F_MESSAGE_WQBUFFER_READED);
+				while ((acb->wqbuf_firstindex != acb->wqbuf_lastindex)
+					&& (allxfer_len < 124)) {
+					writeb(acb->wqbuffer[acb->wqbuf_firstindex], iop_data);
+					acb->wqbuf_firstindex++;
+					acb->wqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+					iop_data++;
+					allxfer_len++;
+				}
+				writel(allxfer_len, &pwbuffer->data_len);
+				writel(ARCMSR_INBOUND_DRIVER_DATA_WRITE_OK,
+					&reg->inbound_doorbell);
+			}
+			if (acb->wqbuf_firstindex == acb->wqbuf_lastindex)
+				acb->acb_flags |= ACB_F_MESSAGE_WQBUFFER_CLEARED;
+		}
+	}
+	if (outbound_intstatus & ARCMSR_MU_OUTBOUND_POSTQUEUE_INT) {
+		int id, lun;
+		/*
+		****************************************************************
+		**               areca cdb command done
+		****************************************************************
+		*/
+		while (1) {
+			if ((flag_ccb = readl(&reg->outbound_queueport)) == 0xFFFFFFFF)
+				break;/*chip FIFO no ccb for completion already*/
+			/* check if command done with no error*/
+			ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
+				(flag_ccb << 5));
+			if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) {
+				if (ccb->startdone == ARCMSR_CCB_ABORTED) {
+					struct scsi_cmnd *abortcmd=ccb->pcmd;
+					if (abortcmd) {
+					abortcmd->result |= DID_ABORT >> 16;
+					arcmsr_ccb_complete(ccb, 1);
+					printk(KERN_NOTICE
+						"arcmsr%d: ccb='0x%p' isr got aborted command \n"
+						, acb->host->host_no, ccb);
+					}
+					continue;
+				}
+				printk(KERN_NOTICE
+					"arcmsr%d: isr get an illegal ccb command done acb='0x%p'"
+					"ccb='0x%p' ccbacb='0x%p' startdone = 0x%x"
+					" ccboutstandingcount=%d \n"
+					, acb->host->host_no
+					, acb
+					, ccb
+					, ccb->acb
+					, ccb->startdone
+					, atomic_read(&acb->ccboutstandingcount));
+				continue;
+			}
+			id = ccb->pcmd->device->id;
+			lun = ccb->pcmd->device->lun;
+			if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
+				if (acb->devstate[id][lun] == ARECA_RAID_GONE)
+					acb->devstate[id][lun] = ARECA_RAID_GOOD;
+				ccb->pcmd->result = DID_OK << 16;
+				arcmsr_ccb_complete(ccb, 1);
+			} else {
+				switch(ccb->arcmsr_cdb.DeviceStatus) {
+				case ARCMSR_DEV_SELECT_TIMEOUT: {
+						acb->devstate[id][lun] = ARECA_RAID_GONE;
+						ccb->pcmd->result = DID_TIME_OUT << 16;
+						arcmsr_ccb_complete(ccb, 1);
+					}
+					break;
+				case ARCMSR_DEV_ABORTED:
+				case ARCMSR_DEV_INIT_FAIL: {
+						acb->devstate[id][lun] = ARECA_RAID_GONE;
+						ccb->pcmd->result = DID_BAD_TARGET << 16;
+						arcmsr_ccb_complete(ccb, 1);
+					}
+					break;
+				case ARCMSR_DEV_CHECK_CONDITION: {
+						acb->devstate[id][lun] = ARECA_RAID_GOOD;
+						arcmsr_report_sense_info(ccb);
+						arcmsr_ccb_complete(ccb, 1);
+					}
+					break;
+				default:
+					printk(KERN_NOTICE
+						"arcmsr%d: scsi id=%d lun=%d"
+						" isr get command error done,"
+						"but got unknown DeviceStatus = 0x%x \n"
+						, acb->host->host_no
+						, id
+						, lun
+						, ccb->arcmsr_cdb.DeviceStatus);
+						acb->devstate[id][lun] = ARECA_RAID_GONE;
+						ccb->pcmd->result = DID_NO_CONNECT << 16;
+						arcmsr_ccb_complete(ccb, 1);
+					break;
+				}
+			}
+		}/*drain reply FIFO*/
+	}
+	if (!(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT))
+		return IRQ_NONE;
+	return IRQ_HANDLED;
+}
+
+static void arcmsr_iop_parking(struct AdapterControlBlock *acb)
+{
+	if (acb) {
+		/* stop adapter background rebuild */
+		if (acb->acb_flags & ACB_F_MSG_START_BGRB) {
+			acb->acb_flags &= ~ACB_F_MSG_START_BGRB;
+			arcmsr_stop_adapter_bgrb(acb);
+			arcmsr_flush_adapter_cache(acb);
+		}
+	}
+}
+
+static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, struct scsi_cmnd *cmd)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CMD_MESSAGE_FIELD *pcmdmessagefld;
+	int retvalue = 0, transfer_len = 0;
+	char *buffer;
+	uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 |
+						(uint32_t ) cmd->cmnd[6] << 16 |
+						(uint32_t ) cmd->cmnd[7] << 8  |
+						(uint32_t ) cmd->cmnd[8];
+					/* 4 bytes: Areca io control code */
+	if (cmd->use_sg) {
+		struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer;
+
+		buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+		if (cmd->use_sg > 1) {
+			retvalue = ARCMSR_MESSAGE_FAIL;
+			goto message_out;
+		}
+		transfer_len += sg->length;
+	} else {
+		buffer = cmd->request_buffer;
+		transfer_len = cmd->request_bufflen;
+	}
+	if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) {
+		retvalue = ARCMSR_MESSAGE_FAIL;
+		goto message_out;
+	}
+	pcmdmessagefld = (struct CMD_MESSAGE_FIELD *) buffer;
+	switch(controlcode) {
+	case ARCMSR_MESSAGE_READ_RQBUFFER: {
+			unsigned long *ver_addr;
+			dma_addr_t buf_handle;
+			uint8_t *pQbuffer, *ptmpQbuffer;
+			int32_t allxfer_len = 0;
+
+			ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle);
+			if (!ver_addr) {
+				retvalue = ARCMSR_MESSAGE_FAIL;
+				goto message_out;
+			}
+			ptmpQbuffer = (uint8_t *) ver_addr;
+			while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex)
+				&& (allxfer_len < 1031)) {
+				pQbuffer = &acb->rqbuffer[acb->rqbuf_firstindex];
+				memcpy(ptmpQbuffer, pQbuffer, 1);
+				acb->rqbuf_firstindex++;
+				acb->rqbuf_firstindex %= ARCMSR_MAX_QBUFFER;
+				ptmpQbuffer++;
+				allxfer_len++;
+			}
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				struct QBUFFER __iomem * prbuffer = (struct QBUFFER __iomem *)
+							&reg->message_rbuffer;
+				uint8_t __iomem * iop_data = (uint8_t __iomem *)prbuffer->data;
+				int32_t iop_len;
+
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				iop_len = readl(&prbuffer->data_len);
+				while (iop_len > 0) {
+					acb->rqbuffer[acb->rqbuf_lastindex] = readb(iop_data);
+					acb->rqbuf_lastindex++;
+					acb->rqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+					iop_data++;
+					iop_len--;
+				}
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+						&reg->inbound_doorbell);
+			}
+			memcpy(pcmdmessagefld->messagedatabuffer,
+				(uint8_t *)ver_addr, allxfer_len);
+			pcmdmessagefld->cmdmessage.Length = allxfer_len;
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
+			pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle);
+		}
+		break;
+	case ARCMSR_MESSAGE_WRITE_WQBUFFER: {
+			unsigned long *ver_addr;
+			dma_addr_t buf_handle;
+			int32_t my_empty_len, user_len, wqbuf_firstindex, wqbuf_lastindex;
+			uint8_t *pQbuffer, *ptmpuserbuffer;
+
+			ver_addr = pci_alloc_consistent(acb->pdev, 1032, &buf_handle);
+			if (!ver_addr) {
+				retvalue = ARCMSR_MESSAGE_FAIL;
+				goto message_out;
+			}
+			ptmpuserbuffer = (uint8_t *)ver_addr;
+			user_len = pcmdmessagefld->cmdmessage.Length;
+			memcpy(ptmpuserbuffer, pcmdmessagefld->messagedatabuffer, user_len);
+			wqbuf_lastindex = acb->wqbuf_lastindex;
+			wqbuf_firstindex = acb->wqbuf_firstindex;
+			if (wqbuf_lastindex != wqbuf_firstindex) {
+				struct SENSE_DATA *sensebuffer =
+					(struct SENSE_DATA *)cmd->sense_buffer;
+				arcmsr_post_Qbuffer(acb);
+				/* has error report sensedata */
+				sensebuffer->ErrorCode = 0x70;
+				sensebuffer->SenseKey = ILLEGAL_REQUEST;
+				sensebuffer->AdditionalSenseLength = 0x0A;
+				sensebuffer->AdditionalSenseCode = 0x20;
+				sensebuffer->Valid = 1;
+				retvalue = ARCMSR_MESSAGE_FAIL;
+			} else {
+				my_empty_len = (wqbuf_firstindex-wqbuf_lastindex - 1)
+						&(ARCMSR_MAX_QBUFFER - 1);
+				if (my_empty_len >= user_len) {
+					while (user_len > 0) {
+						pQbuffer =
+						&acb->wqbuffer[acb->wqbuf_lastindex];
+						memcpy(pQbuffer, ptmpuserbuffer, 1);
+						acb->wqbuf_lastindex++;
+						acb->wqbuf_lastindex %= ARCMSR_MAX_QBUFFER;
+						ptmpuserbuffer++;
+						user_len--;
+					}
+					if (acb->acb_flags & ACB_F_MESSAGE_WQBUFFER_CLEARED) {
+						acb->acb_flags &=
+							~ACB_F_MESSAGE_WQBUFFER_CLEARED;
+						arcmsr_post_Qbuffer(acb);
+					}
+				} else {
+					/* has error report sensedata */
+					struct SENSE_DATA *sensebuffer =
+						(struct SENSE_DATA *)cmd->sense_buffer;
+					sensebuffer->ErrorCode = 0x70;
+					sensebuffer->SenseKey = ILLEGAL_REQUEST;
+					sensebuffer->AdditionalSenseLength = 0x0A;
+					sensebuffer->AdditionalSenseCode = 0x20;
+					sensebuffer->Valid = 1;
+					retvalue = ARCMSR_MESSAGE_FAIL;
+				}
+			}
+			pci_free_consistent(acb->pdev, 1032, ver_addr, buf_handle);
+		}
+		break;
+	case ARCMSR_MESSAGE_CLEAR_RQBUFFER: {
+			uint8_t *pQbuffer = acb->rqbuffer;
+
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+					&reg->inbound_doorbell);
+			}
+			acb->acb_flags |= ACB_F_MESSAGE_RQBUFFER_CLEARED;
+			acb->rqbuf_firstindex = 0;
+			acb->rqbuf_lastindex = 0;
+			memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER);
+			pcmdmessagefld->cmdmessage.ReturnCode =
+				ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_CLEAR_WQBUFFER: {
+			uint8_t *pQbuffer = acb->wqbuffer;
+
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK
+						, &reg->inbound_doorbell);
+			}
+			acb->acb_flags |=
+				(ACB_F_MESSAGE_WQBUFFER_CLEARED |
+					ACB_F_MESSAGE_WQBUFFER_READED);
+			acb->wqbuf_firstindex = 0;
+			acb->wqbuf_lastindex = 0;
+			memset(pQbuffer, 0, ARCMSR_MAX_QBUFFER);
+			pcmdmessagefld->cmdmessage.ReturnCode =
+				ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_CLEAR_ALLQBUFFER: {
+			uint8_t *pQbuffer;
+
+			if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
+				acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
+				writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK
+						, &reg->inbound_doorbell);
+			}
+			acb->acb_flags |=
+				(ACB_F_MESSAGE_WQBUFFER_CLEARED
+				| ACB_F_MESSAGE_RQBUFFER_CLEARED
+				| ACB_F_MESSAGE_WQBUFFER_READED);
+			acb->rqbuf_firstindex = 0;
+			acb->rqbuf_lastindex = 0;
+			acb->wqbuf_firstindex = 0;
+			acb->wqbuf_lastindex = 0;
+			pQbuffer = acb->rqbuffer;
+			memset(pQbuffer, 0, sizeof (struct QBUFFER));
+			pQbuffer = acb->wqbuffer;
+			memset(pQbuffer, 0, sizeof (struct QBUFFER));
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_RETURN_CODE_3F: {
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_3F;
+		}
+		break;
+	case ARCMSR_MESSAGE_SAY_HELLO: {
+			int8_t * hello_string = "Hello! I am ARCMSR";
+
+			memcpy(pcmdmessagefld->messagedatabuffer, hello_string
+				, (int16_t)strlen(hello_string));
+			pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
+		}
+		break;
+	case ARCMSR_MESSAGE_SAY_GOODBYE:
+		arcmsr_iop_parking(acb);
+		break;
+	case ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE:
+		arcmsr_flush_adapter_cache(acb);
+		break;
+	default:
+		retvalue = ARCMSR_MESSAGE_FAIL;
+	}
+ message_out:
+	if (cmd->use_sg) {
+		struct scatterlist *sg;
+
+		sg = (struct scatterlist *) cmd->request_buffer;
+		kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+	}
+	return retvalue;
+}
+
+static struct CommandControlBlock *arcmsr_get_freeccb(struct AdapterControlBlock *acb)
+{
+	struct list_head *head = &acb->ccb_free_list;
+	struct CommandControlBlock *ccb = NULL;
+
+	if (!list_empty(head)) {
+		ccb = list_entry(head->next, struct CommandControlBlock, list);
+		list_del(head->next);
+	}
+	return ccb;
+}
+
+static void arcmsr_handle_virtual_command(struct AdapterControlBlock *acb,
+		struct scsi_cmnd *cmd)
+{
+	switch (cmd->cmnd[0]) {
+	case INQUIRY: {
+		unsigned char inqdata[36];
+		char *buffer;
+
+		if (cmd->device->lun) {
+			cmd->result = (DID_TIME_OUT << 16);
+			cmd->scsi_done(cmd);
+			return;
+		}
+		inqdata[0] = TYPE_PROCESSOR;
+		/* Periph Qualifier & Periph Dev Type */
+		inqdata[1] = 0;
+		/* rem media bit & Dev Type Modifier */
+		inqdata[2] = 0;
+		/* ISO,ECMA,& ANSI versions */
+		inqdata[4] = 31;
+		/* length of additional data */
+		strncpy(&inqdata[8], "Areca   ", 8);
+		/* Vendor Identification */
+		strncpy(&inqdata[16], "RAID controller ", 16);
+		/* Product Identification */
+		strncpy(&inqdata[32], "R001", 4); /* Product Revision */
+		if (cmd->use_sg) {
+			struct scatterlist *sg;
+
+			sg = (struct scatterlist *) cmd->request_buffer;
+			buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+		} else {
+			buffer = cmd->request_buffer;
+		}
+		memcpy(buffer, inqdata, sizeof(inqdata));
+		if (cmd->use_sg) {
+			struct scatterlist *sg;
+
+			sg = (struct scatterlist *) cmd->request_buffer;
+			kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+		}
+		cmd->scsi_done(cmd);
+	}
+	break;
+	case WRITE_BUFFER:
+	case READ_BUFFER: {
+		if (arcmsr_iop_message_xfer(acb, cmd))
+			cmd->result = (DID_ERROR << 16);
+		cmd->scsi_done(cmd);
+	}
+	break;
+	default:
+		cmd->scsi_done(cmd);
+	}
+}
+
+static int arcmsr_queue_command(struct scsi_cmnd *cmd,
+	void (* done)(struct scsi_cmnd *))
+{
+	struct Scsi_Host *host = cmd->device->host;
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *) host->hostdata;
+	struct CommandControlBlock *ccb;
+	int target = cmd->device->id;
+	int lun = cmd->device->lun;
+
+	cmd->scsi_done = done;
+	cmd->host_scribble = NULL;
+	cmd->result = 0;
+	if (acb->acb_flags & ACB_F_BUS_RESET) {
+		printk(KERN_NOTICE "arcmsr%d: bus reset"
+			" and return busy \n"
+			, acb->host->host_no);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+	if(target == 16) {
+		/* virtual device for iop message transfer */
+		arcmsr_handle_virtual_command(acb, cmd);
+		return 0;
+	}
+	if (acb->devstate[target][lun] == ARECA_RAID_GONE) {
+		uint8_t block_cmd;
+
+		block_cmd = cmd->cmnd[0] & 0x0f;
+		if (block_cmd == 0x08 || block_cmd == 0x0a) {
+			printk(KERN_NOTICE
+				"arcmsr%d: block 'read/write'"
+				"command with gone raid volume"
+				" Cmd=%2x, TargetId=%d, Lun=%d \n"
+				, acb->host->host_no
+				, cmd->cmnd[0]
+				, target, lun);
+			cmd->result = (DID_NO_CONNECT << 16);
+			cmd->scsi_done(cmd);
+			return 0;
+		}
+	}
+	if (atomic_read(&acb->ccboutstandingcount) >=
+			ARCMSR_MAX_OUTSTANDING_CMD)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	ccb = arcmsr_get_freeccb(acb);
+	if (!ccb)
+		return SCSI_MLQUEUE_HOST_BUSY;
+	arcmsr_build_ccb(acb, ccb, cmd);
+	arcmsr_post_ccb(acb, ccb);
+	return 0;
+}
+
+static void arcmsr_get_firmware_spec(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	char *acb_firm_model = acb->firm_model;
+	char *acb_firm_version = acb->firm_version;
+	char __iomem *iop_firm_model = (char __iomem *) &reg->message_rwbuffer[15];
+	char __iomem *iop_firm_version = (char __iomem *) &reg->message_rwbuffer[17];
+	int count;
+
+	writel(ARCMSR_INBOUND_MESG0_GET_CONFIG, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb))
+		printk(KERN_NOTICE
+			"arcmsr%d: wait "
+			"'get adapter firmware miscellaneous data' timeout \n"
+			, acb->host->host_no);
+	count = 8;
+	while (count) {
+		*acb_firm_model = readb(iop_firm_model);
+		acb_firm_model++;
+		iop_firm_model++;
+		count--;
+	}
+	count = 16;
+	while (count) {
+		*acb_firm_version = readb(iop_firm_version);
+		acb_firm_version++;
+		iop_firm_version++;
+		count--;
+	}
+	printk(KERN_INFO
+		"ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n"
+		, acb->host->host_no
+		, acb->firm_version);
+	acb->firm_request_len = readl(&reg->message_rwbuffer[1]);
+	acb->firm_numbers_queue = readl(&reg->message_rwbuffer[2]);
+	acb->firm_sdram_size = readl(&reg->message_rwbuffer[3]);
+	acb->firm_hd_channels = readl(&reg->message_rwbuffer[4]);
+}
+
+static void arcmsr_polling_ccbdone(struct AdapterControlBlock *acb,
+	struct CommandControlBlock *poll_ccb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CommandControlBlock *ccb;
+	uint32_t flag_ccb, outbound_intstatus, poll_ccb_done = 0, poll_count = 0;
+	int id, lun;
+
+ polling_ccb_retry:
+	poll_count++;
+	outbound_intstatus = readl(&reg->outbound_intstatus)
+					& acb->outbound_int_enable;
+	writel(outbound_intstatus, &reg->outbound_intstatus);/*clear interrupt*/
+	while (1) {
+		if ((flag_ccb = readl(&reg->outbound_queueport)) == 0xFFFFFFFF) {
+			if (poll_ccb_done)
+				break;
+			else {
+				msleep(25);
+				if (poll_count > 100)
+					break;
+				goto polling_ccb_retry;
+			}
+		}
+		ccb = (struct CommandControlBlock *)
+			(acb->vir2phy_offset + (flag_ccb << 5));
+		if ((ccb->acb != acb) ||
+			(ccb->startdone != ARCMSR_CCB_START)) {
+			if ((ccb->startdone == ARCMSR_CCB_ABORTED) ||
+				(ccb == poll_ccb)) {
+				printk(KERN_NOTICE
+					"arcmsr%d: scsi id=%d lun=%d ccb='0x%p'"
+					" poll command abort successfully \n"
+					, acb->host->host_no
+					, ccb->pcmd->device->id
+					, ccb->pcmd->device->lun
+					, ccb);
+				ccb->pcmd->result = DID_ABORT << 16;
+				arcmsr_ccb_complete(ccb, 1);
+				poll_ccb_done = 1;
+				continue;
+			}
+			printk(KERN_NOTICE
+				"arcmsr%d: polling get an illegal ccb"
+				" command done ccb='0x%p'"
+				"ccboutstandingcount=%d \n"
+				, acb->host->host_no
+				, ccb
+				, atomic_read(&acb->ccboutstandingcount));
+			continue;
+		}
+		id = ccb->pcmd->device->id;
+		lun = ccb->pcmd->device->lun;
+		if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
+			if (acb->devstate[id][lun] == ARECA_RAID_GONE)
+				acb->devstate[id][lun] = ARECA_RAID_GOOD;
+			ccb->pcmd->result = DID_OK << 16;
+			arcmsr_ccb_complete(ccb, 1);
+		} else {
+			switch(ccb->arcmsr_cdb.DeviceStatus) {
+			case ARCMSR_DEV_SELECT_TIMEOUT: {
+					acb->devstate[id][lun] = ARECA_RAID_GONE;
+					ccb->pcmd->result = DID_TIME_OUT << 16;
+					arcmsr_ccb_complete(ccb, 1);
+				}
+				break;
+			case ARCMSR_DEV_ABORTED:
+			case ARCMSR_DEV_INIT_FAIL: {
+					acb->devstate[id][lun] = ARECA_RAID_GONE;
+					ccb->pcmd->result = DID_BAD_TARGET << 16;
+					arcmsr_ccb_complete(ccb, 1);
+				}
+				break;
+			case ARCMSR_DEV_CHECK_CONDITION: {
+					acb->devstate[id][lun] = ARECA_RAID_GOOD;
+					arcmsr_report_sense_info(ccb);
+					arcmsr_ccb_complete(ccb, 1);
+				}
+				break;
+			default:
+				printk(KERN_NOTICE
+					"arcmsr%d: scsi id=%d lun=%d"
+					" polling and getting command error done"
+					"but got unknown DeviceStatus = 0x%x \n"
+					, acb->host->host_no
+					, id
+					, lun
+					, ccb->arcmsr_cdb.DeviceStatus);
+				acb->devstate[id][lun] = ARECA_RAID_GONE;
+				ccb->pcmd->result = DID_BAD_TARGET << 16;
+				arcmsr_ccb_complete(ccb, 1);
+				break;
+			}
+		}
+	}
+}
+
+static void arcmsr_iop_init(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	uint32_t intmask_org, mask, outbound_doorbell, firmware_state = 0;
+
+	do {
+		firmware_state = readl(&reg->outbound_msgaddr1);
+	} while (!(firmware_state & ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK));
+	intmask_org = readl(&reg->outbound_intmask)
+			| ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE;
+	arcmsr_get_firmware_spec(acb);
+
+	acb->acb_flags |= ACB_F_MSG_START_BGRB;
+	writel(ARCMSR_INBOUND_MESG0_START_BGRB, &reg->inbound_msgaddr0);
+	if (arcmsr_wait_msgint_ready(acb)) {
+		printk(KERN_NOTICE "arcmsr%d: "
+			"wait 'start adapter background rebulid' timeout\n",
+			acb->host->host_no);
+	}
+
+	outbound_doorbell = readl(&reg->outbound_doorbell);
+	writel(outbound_doorbell, &reg->outbound_doorbell);
+	writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, &reg->inbound_doorbell);
+	mask = ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE
+			| ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE);
+	writel(intmask_org & mask, &reg->outbound_intmask);
+	acb->outbound_int_enable = ~(intmask_org & mask) & 0x000000ff;
+	acb->acb_flags |= ACB_F_IOP_INITED;
+}
+
+static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
+{
+	struct MessageUnit __iomem *reg = acb->pmu;
+	struct CommandControlBlock *ccb;
+	uint32_t intmask_org;
+	int i = 0;
+
+	if (atomic_read(&acb->ccboutstandingcount) != 0) {
+		/* talk to iop 331 outstanding command aborted */
+		arcmsr_abort_allcmd(acb);
+		/* wait for 3 sec for all command aborted*/
+		msleep_interruptible(3000);
+		/* disable all outbound interrupt */
+		intmask_org = arcmsr_disable_outbound_ints(acb);
+		/* clear all outbound posted Q */
+		for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
+			readl(&reg->outbound_queueport);
+		for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+			ccb = acb->pccb_pool[i];
+			if ((ccb->startdone == ARCMSR_CCB_START) ||
+				(ccb->startdone == ARCMSR_CCB_ABORTED)) {
+				ccb->startdone = ARCMSR_CCB_ABORTED;
+				ccb->pcmd->result = DID_ABORT << 16;
+				arcmsr_ccb_complete(ccb, 1);
+			}
+		}
+		/* enable all outbound interrupt */
+		arcmsr_enable_outbound_ints(acb, intmask_org);
+	}
+	atomic_set(&acb->ccboutstandingcount, 0);
+}
+
+static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
+{
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *)cmd->device->host->hostdata;
+	int i;
+
+	acb->num_resets++;
+	acb->acb_flags |= ACB_F_BUS_RESET;
+	for (i = 0; i < 400; i++) {
+		if (!atomic_read(&acb->ccboutstandingcount))
+			break;
+		arcmsr_interrupt(acb);
+		msleep(25);
+	}
+	arcmsr_iop_reset(acb);
+	acb->acb_flags &= ~ACB_F_BUS_RESET;
+	return SUCCESS;
+}
+
+static void arcmsr_abort_one_cmd(struct AdapterControlBlock *acb,
+		struct CommandControlBlock *ccb)
+{
+	u32 intmask;
+
+	ccb->startdone = ARCMSR_CCB_ABORTED;
+
+	/*
+	** Wait for 3 sec for all command done.
+	*/
+	msleep_interruptible(3000);
+
+	intmask = arcmsr_disable_outbound_ints(acb);
+	arcmsr_polling_ccbdone(acb, ccb);
+	arcmsr_enable_outbound_ints(acb, intmask);
+}
+
+static int arcmsr_abort(struct scsi_cmnd *cmd)
+{
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *)cmd->device->host->hostdata;
+	int i = 0;
+
+	printk(KERN_NOTICE
+		"arcmsr%d: abort device command of scsi id=%d lun=%d \n",
+		acb->host->host_no, cmd->device->id, cmd->device->lun);
+	acb->num_aborts++;
+
+	/*
+	************************************************
+	** the all interrupt service routine is locked
+	** we need to handle it as soon as possible and exit
+	************************************************
+	*/
+	if (!atomic_read(&acb->ccboutstandingcount))
+		return SUCCESS;
+
+	for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+		struct CommandControlBlock *ccb = acb->pccb_pool[i];
+		if (ccb->startdone == ARCMSR_CCB_START && ccb->pcmd == cmd) {
+			arcmsr_abort_one_cmd(acb, ccb);
+			break;
+		}
+	}
+
+	return SUCCESS;
+}
+
+static const char *arcmsr_info(struct Scsi_Host *host)
+{
+	struct AdapterControlBlock *acb =
+		(struct AdapterControlBlock *) host->hostdata;
+	static char buf[256];
+	char *type;
+	int raid6 = 1;
+
+	switch (acb->pdev->device) {
+	case PCI_DEVICE_ID_ARECA_1110:
+	case PCI_DEVICE_ID_ARECA_1210:
+		raid6 = 0;
+		/*FALLTHRU*/
+	case PCI_DEVICE_ID_ARECA_1120:
+	case PCI_DEVICE_ID_ARECA_1130:
+	case PCI_DEVICE_ID_ARECA_1160:
+	case PCI_DEVICE_ID_ARECA_1170:
+	case PCI_DEVICE_ID_ARECA_1220:
+	case PCI_DEVICE_ID_ARECA_1230:
+	case PCI_DEVICE_ID_ARECA_1260:
+	case PCI_DEVICE_ID_ARECA_1270:
+	case PCI_DEVICE_ID_ARECA_1280:
+		type = "SATA";
+		break;
+	case PCI_DEVICE_ID_ARECA_1380:
+	case PCI_DEVICE_ID_ARECA_1381:
+	case PCI_DEVICE_ID_ARECA_1680:
+	case PCI_DEVICE_ID_ARECA_1681:
+		type = "SAS";
+		break;
+	default:
+		type = "X-TYPE";
+		break;
+	}
+	sprintf(buf, "Areca %s Host Adapter RAID Controller%s\n        %s",
+			type, raid6 ? "( RAID6 capable)" : "",
+			ARCMSR_DRIVER_VERSION);
+	return buf;
+}
+
+
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index e133733..7b3bd34 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -46,7 +46,6 @@
 
 #include <linux/stat.h>
 #include <linux/slab.h>		/* for kmalloc() */
-#include <linux/config.h>	/* for CONFIG_PCI */
 #include <linux/pci.h>		/* for PCI support */
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
@@ -185,7 +184,7 @@
 	PINFO("Detecting Adaptec I2O RAID controllers...\n");
 
         /* search for all Adatpec I2O RAID cards */
-	while ((pDev = pci_find_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) {
+	while ((pDev = pci_get_device( PCI_DPT_VENDOR_ID, PCI_ANY_ID, pDev))) {
 		if(pDev->device == PCI_DPT_DEVICE_ID ||
 		   pDev->device == PCI_DPT_RAPTOR_DEVICE_ID){
 			if(adpt_install_hba(sht, pDev) ){
@@ -193,8 +192,11 @@
 				PERROR("Will not try to detect others.\n");
 				return hba_count-1;
 			}
+			pci_dev_get(pDev);
 		}
 	}
+	if (pDev)
+		pci_dev_put(pDev);
 
 	/* In INIT state, Activate IOPs */
 	for (pHba = hba_chain; pHba; pHba = pHba->next) {
@@ -1076,6 +1078,7 @@
 			}
 		}
 	}
+	pci_dev_put(pHba->pDev);
 	kfree(pHba);
 
 	if(hba_count <= 0){
diff --git a/drivers/scsi/eata_generic.h b/drivers/scsi/eata_generic.h
index 34bce2c..635c148 100644
--- a/drivers/scsi/eata_generic.h
+++ b/drivers/scsi/eata_generic.h
@@ -364,6 +364,7 @@
     __u8   moresupport;		 /* HBA supports MORE flag     */
     struct Scsi_Host *next;	    
     struct Scsi_Host *prev;
+    struct pci_dev *pdev;	/* PCI device or NULL for non PCI */
     struct eata_sp sp;		 /* status packet	       */ 
     struct eata_ccb ccb[0];	 /* ccb array begins here      */
 }hostdata;
diff --git a/drivers/scsi/eata_pio.c b/drivers/scsi/eata_pio.c
index 771b019..d312633 100644
--- a/drivers/scsi/eata_pio.c
+++ b/drivers/scsi/eata_pio.c
@@ -71,11 +71,11 @@
 #include "eata_pio.h"
 
 
-static uint ISAbases[MAXISA] =	{
+static unsigned int ISAbases[MAXISA] =	{
 	 0x1F0, 0x170, 0x330, 0x230
 };
 
-static uint ISAirqs[MAXISA] = {
+static unsigned int ISAirqs[MAXISA] = {
 	14, 12, 15, 11
 };
 
@@ -84,7 +84,7 @@
 	1, 1, 1, 1, 1, 1, 1, 1 
 };
 
-static uint registered_HBAs;
+static unsigned int registered_HBAs;
 static struct Scsi_Host *last_HBA;
 static struct Scsi_Host *first_HBA;
 static unsigned char reg_IRQ[16];
@@ -165,6 +165,7 @@
 
 static int eata_pio_release(struct Scsi_Host *sh)
 {
+	hostdata *hd = SD(sh);
 	if (sh->irq && reg_IRQ[sh->irq] == 1)
 		free_irq(sh->irq, NULL);
 	else
@@ -173,10 +174,13 @@
 		if (sh->io_port && sh->n_io_port)
 			release_region(sh->io_port, sh->n_io_port);
 	}
+	/* At this point the PCI reference can go */
+	if (hd->pdev)
+		pci_dev_put(hd->pdev);
 	return 1;
 }
 
-static void IncStat(struct scsi_pointer *SCp, uint Increment)
+static void IncStat(struct scsi_pointer *SCp, unsigned int Increment)
 {
 	SCp->ptr += Increment;
 	if ((SCp->this_residual -= Increment) == 0) {
@@ -190,46 +194,49 @@
 	}
 }
 
-static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs);
+static irqreturn_t eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs);
 
 static irqreturn_t do_eata_pio_int_handler(int irq, void *dev_id,
 						struct pt_regs *regs)
 {
 	unsigned long flags;
 	struct Scsi_Host *dev = dev_id;
+	irqreturn_t ret;
 
 	spin_lock_irqsave(dev->host_lock, flags);
-	eata_pio_int_handler(irq, dev_id, regs);
+	ret = eata_pio_int_handler(irq, dev_id, regs);
 	spin_unlock_irqrestore(dev->host_lock, flags);
-	return IRQ_HANDLED;
+	return ret;
 }
 
-static void eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t eata_pio_int_handler(int irq, void *dev_id, struct pt_regs *regs)
 {
-	uint eata_stat = 0xfffff;
+	unsigned int eata_stat = 0xfffff;
 	struct scsi_cmnd *cmd;
 	hostdata *hd;
 	struct eata_ccb *cp;
-	uint base;
-	uint x, z;
+	unsigned long base;
+	unsigned int x, z;
 	struct Scsi_Host *sh;
 	unsigned short zwickel = 0;
 	unsigned char stat, odd;
+	irqreturn_t ret = IRQ_NONE;
 
 	for (x = 1, sh = first_HBA; x <= registered_HBAs; x++, sh = SD(sh)->prev) 
 	{
 		if (sh->irq != irq)
 			continue;
-		if (inb((uint) sh->base + HA_RSTATUS) & HA_SBUSY)
+		if (inb(sh->base + HA_RSTATUS) & HA_SBUSY)
 			continue;
 
 		int_counter++;
+		ret = IRQ_HANDLED;
 
 		hd = SD(sh);
 
 		cp = &hd->ccb[0];
 		cmd = cp->cmd;
-		base = (uint) cmd->device->host->base;
+		base = cmd->device->host->base;
 
 		do {
 			stat = inb(base + HA_RSTATUS);
@@ -304,7 +311,7 @@
 		if (!(inb(base + HA_RSTATUS) & HA_SERROR)) {
 			cmd->result = (DID_OK << 16);
 			hd->devflags |= (1 << cp->cp_id);
-		} else if (hd->devflags & 1 << cp->cp_id)
+		} else if (hd->devflags & (1 << cp->cp_id))
 			cmd->result = (DID_OK << 16) + 0x02;
 		else
 			cmd->result = (DID_NO_CONNECT << 16);
@@ -313,7 +320,7 @@
 			cp->status = FREE;
 			eata_stat = inb(base + HA_RSTATUS);
 			printk(KERN_CRIT "eata_pio: int_handler, freeing locked " "queueslot\n");
-			return;
+			return ret;
 		}
 #if DBG_INTR2
 		if (stat != 0x50)
@@ -325,12 +332,12 @@
 		cmd->scsi_done(cmd);
 	}
 
-	return;
+	return ret;
 }
 
-static inline uint eata_pio_send_command(uint base, unsigned char command)
+static inline unsigned int eata_pio_send_command(unsigned long base, unsigned char command)
 {
-	uint loop = HZ / 2;
+	unsigned int loop = 50;
 
 	while (inb(base + HA_RSTATUS) & HA_SBUSY)
 		if (--loop == 0)
@@ -349,8 +356,8 @@
 static int eata_pio_queue(struct scsi_cmnd *cmd,
 		void (*done)(struct scsi_cmnd *))
 {
-	uint x, y;
-	uint base;
+	unsigned int x, y;
+	unsigned long base;
 
 	hostdata *hd;
 	struct Scsi_Host *sh;
@@ -360,7 +367,7 @@
 
 	hd = HD(cmd);
 	sh = cmd->device->host;
-	base = (uint) sh->base;
+	base = sh->base;
 
 	/* use only slot 0, as 2001 can handle only one cmd at a time */
 
@@ -395,9 +402,9 @@
 		cp->DataIn = 0;	/* Input mode  */
 
 	cp->Interpret = (cmd->device->id == hd->hostid);
-	cp->cp_datalen = htonl((unsigned long) cmd->request_bufflen);
+	cp->cp_datalen = cpu_to_be32(cmd->request_bufflen);
 	cp->Auto_Req_Sen = 0;
-	cp->cp_reqDMA = htonl(0);
+	cp->cp_reqDMA = 0;
 	cp->reqlen = 0;
 
 	cp->cp_id = cmd->device->id;
@@ -406,7 +413,7 @@
 	cp->cp_identify = 1;
 	memcpy(cp->cp_cdb, cmd->cmnd, COMMAND_SIZE(*cmd->cmnd));
 
-	cp->cp_statDMA = htonl(0);
+	cp->cp_statDMA = 0;
 
 	cp->cp_viraddr = cp;
 	cp->cmd = cmd;
@@ -445,14 +452,14 @@
 
 	DBG(DBG_QUEUE, scmd_printk(KERN_DEBUG, cmd,
 		"Queued base %#.4lx pid: %ld "
-		"slot %d irq %d\n", (long) sh->base, cmd->pid, y, sh->irq));
+		"slot %d irq %d\n", sh->base, cmd->pid, y, sh->irq));
 
 	return (0);
 }
 
 static int eata_pio_abort(struct scsi_cmnd *cmd)
 {
-	uint loop = HZ;
+	unsigned int loop = 100;
 
 	DBG(DBG_ABNORM, scmd_printk(KERN_WARNING, cmd,
 		"eata_pio_abort called pid: %ld\n",
@@ -485,7 +492,7 @@
 
 static int eata_pio_host_reset(struct scsi_cmnd *cmd)
 {
-	uint x, limit = 0;
+	unsigned int x, limit = 0;
 	unsigned char success = 0;
 	struct scsi_cmnd *sp;
 	struct Scsi_Host *host = cmd->device->host;
@@ -518,7 +525,7 @@
 	}
 
 	/* hard reset the HBA  */
-	outb(EATA_CMD_RESET, (uint) cmd->device->host->base + HA_WCOMMAND);
+	outb(EATA_CMD_RESET, cmd->device->host->base + HA_WCOMMAND);
 
 	DBG(DBG_ABNORM, printk(KERN_WARNING "eata_pio_reset: board reset done.\n"));
 	HD(cmd)->state = RESET;
@@ -558,7 +565,7 @@
 	}
 }
 
-static char *get_pio_board_data(unsigned long base, uint irq, uint id, unsigned long cplen, unsigned short cppadlen)
+static char *get_pio_board_data(unsigned long base, unsigned int irq, unsigned int id, unsigned long cplen, unsigned short cppadlen)
 {
 	struct eata_ccb cp;
 	static char buff[256];
@@ -570,8 +577,8 @@
 	cp.DataIn = 1;
 	cp.Interpret = 1;	/* Interpret command */
 
-	cp.cp_datalen = htonl(254);
-	cp.cp_dataDMA = htonl(0);
+	cp.cp_datalen = cpu_to_be32(254);
+	cp.cp_dataDMA = cpu_to_be32(0);
 
 	cp.cp_id = id;
 	cp.cp_lun = 0;
@@ -583,7 +590,7 @@
 	cp.cp_cdb[4] = 254;
 	cp.cp_cdb[5] = 0;
 
-	if (eata_pio_send_command((uint) base, EATA_CMD_PIO_SEND_CP))
+	if (eata_pio_send_command(base, EATA_CMD_PIO_SEND_CP))
 		return (NULL);
 	while (!(inb(base + HA_RSTATUS) & HA_SDRQ));
 	outsw(base + HA_RDATA, &cp, cplen);
@@ -604,7 +611,7 @@
 	}
 }
 
-static int get_pio_conf_PIO(u32 base, struct get_conf *buf)
+static int get_pio_conf_PIO(unsigned long base, struct get_conf *buf)
 {
 	unsigned long loop = HZ / 2;
 	int z;
@@ -619,30 +626,30 @@
 		if (--loop == 0)
 			goto fail;
 
-	DBG(DBG_PIO && DBG_PROBE, printk(KERN_DEBUG "Issuing PIO READ CONFIG to HBA at %#x\n", base));
+	DBG(DBG_PIO && DBG_PROBE, printk(KERN_DEBUG "Issuing PIO READ CONFIG to HBA at %#lx\n", base));
 	eata_pio_send_command(base, EATA_CMD_PIO_READ_CONFIG);
 
-	loop = HZ / 2;
+	loop = 50;
 	for (p = (unsigned short *) buf; (long) p <= ((long) buf + (sizeof(struct get_conf) / 2)); p++) {
 		while (!(inb(base + HA_RSTATUS) & HA_SDRQ))
 			if (--loop == 0)
 				goto fail;
 
-		loop = HZ / 2;
+		loop = 50;
 		*p = inw(base + HA_RDATA);
 	}
 	if (inb(base + HA_RSTATUS) & HA_SERROR) {
 		DBG(DBG_PROBE, printk("eata_dma: get_conf_PIO, error during "
-					"transfer for HBA at %x\n", base));
+					"transfer for HBA at %lx\n", base));
 		goto fail;
 	}
 
-	if (htonl(EATA_SIGNATURE) != buf->signature)
+	if (cpu_to_be32(EATA_SIGNATURE) != buf->signature)
 		goto fail;
 
 	DBG(DBG_PIO && DBG_PROBE, printk(KERN_NOTICE "EATA Controller found "
-				"at %#4x EATA Level: %x\n",
-				base, (uint) (buf->version)));
+				"at %#4lx EATA Level: %x\n",
+				base, (unsigned int) (buf->version)));
 
 	while (inb(base + HA_RSTATUS) & HA_SDRQ)
 		inw(base + HA_RDATA);
@@ -665,12 +672,12 @@
 static void print_pio_config(struct get_conf *gc)
 {
 	printk("Please check values: (read config data)\n");
-	printk("LEN: %d ver:%d OCS:%d TAR:%d TRNXFR:%d MORES:%d\n", (uint) ntohl(gc->len), gc->version, gc->OCS_enabled, gc->TAR_support, gc->TRNXFR, gc->MORE_support);
-	printk("HAAV:%d SCSIID0:%d ID1:%d ID2:%d QUEUE:%d SG:%d SEC:%d\n", gc->HAA_valid, gc->scsi_id[3], gc->scsi_id[2], gc->scsi_id[1], ntohs(gc->queuesiz), ntohs(gc->SGsiz), gc->SECOND);
+	printk("LEN: %d ver:%d OCS:%d TAR:%d TRNXFR:%d MORES:%d\n", be32_to_cpu(gc->len), gc->version, gc->OCS_enabled, gc->TAR_support, gc->TRNXFR, gc->MORE_support);
+	printk("HAAV:%d SCSIID0:%d ID1:%d ID2:%d QUEUE:%d SG:%d SEC:%d\n", gc->HAA_valid, gc->scsi_id[3], gc->scsi_id[2], gc->scsi_id[1], be16_to_cpu(gc->queuesiz), be16_to_cpu(gc->SGsiz), gc->SECOND);
 	printk("IRQ:%d IRQT:%d FORCADR:%d MCH:%d RIDQ:%d\n", gc->IRQ, gc->IRQ_TR, gc->FORCADR, gc->MAX_CHAN, gc->ID_qest);
 }
 
-static uint print_selftest(uint base)
+static unsigned int print_selftest(unsigned int base)
 {
 	unsigned char buffer[512];
 #ifdef VERBOSE_SETUP
@@ -697,7 +704,7 @@
 	return (!(inb(base + HA_RSTATUS) & HA_SERROR));
 }
 
-static int register_pio_HBA(long base, struct get_conf *gc)
+static int register_pio_HBA(long base, struct get_conf *gc, struct pci_dev *pdev)
 {
 	unsigned long size = 0;
 	char *buff;
@@ -714,17 +721,17 @@
 			return 0;
 	}
 
-	if ((buff = get_pio_board_data((uint) base, gc->IRQ, gc->scsi_id[3], cplen = (htonl(gc->cplen) + 1) / 2, cppadlen = (htons(gc->cppadlen) + 1) / 2)) == NULL) {
-		printk("HBA at %#lx didn't react on INQUIRY. Sorry.\n", (unsigned long) base);
+	if ((buff = get_pio_board_data(base, gc->IRQ, gc->scsi_id[3], cplen = (cpu_to_be32(gc->cplen) + 1) / 2, cppadlen = (cpu_to_be16(gc->cppadlen) + 1) / 2)) == NULL) {
+		printk("HBA at %#lx didn't react on INQUIRY. Sorry.\n", base);
 		return 0;
 	}
 
 	if (!print_selftest(base) && !ALLOW_DMA_BOARDS) {
-		printk("HBA at %#lx failed while performing self test & setup.\n", (unsigned long) base);
+		printk("HBA at %#lx failed while performing self test & setup.\n", base);
 		return 0;
 	}
 
-	size = sizeof(hostdata) + (sizeof(struct eata_ccb) * ntohs(gc->queuesiz));
+	size = sizeof(hostdata) + (sizeof(struct eata_ccb) * be16_to_cpu(gc->queuesiz));
 
 	sh = scsi_register(&driver_template, size);
 	if (sh == NULL)
@@ -749,8 +756,8 @@
 
 	hd = SD(sh);
 
-	memset(hd->ccb, 0, (sizeof(struct eata_ccb) * ntohs(gc->queuesiz)));
-	memset(hd->reads, 0, sizeof(unsigned long) * 26);
+	memset(hd->ccb, 0, (sizeof(struct eata_ccb) * be16_to_cpu(gc->queuesiz)));
+	memset(hd->reads, 0, sizeof(hd->reads));
 
 	strlcpy(SD(sh)->vendor, &buff[8], sizeof(SD(sh)->vendor));
 	strlcpy(SD(sh)->name, &buff[16], sizeof(SD(sh)->name));
@@ -761,7 +768,7 @@
 	SD(sh)->revision[4] = buff[35];
 	SD(sh)->revision[5] = 0;
 
-	switch (ntohl(gc->len)) {
+	switch (be32_to_cpu(gc->len)) {
 	case 0x1c:
 		SD(sh)->EATA_revision = 'a';
 		break;
@@ -777,7 +784,7 @@
 		SD(sh)->EATA_revision = '?';
 	}
 
-	if (ntohl(gc->len) >= 0x22) {
+	if (be32_to_cpu(gc->len) >= 0x22) {
 		if (gc->is_PCI)
 			hd->bustype = IS_PCI;
 		else if (gc->is_EISA)
@@ -811,6 +818,8 @@
 
 	hd->channel = 0;
 
+	hd->pdev = pci_dev_get(pdev);	/* Keep a PCI reference */
+
 	sh->max_id = 8;
 	sh->max_lun = 8;
 
@@ -841,7 +850,7 @@
 			continue;
 		if (!get_pio_conf_PIO(ISAbases[i], buf))
 			continue;
-		if (!register_pio_HBA(ISAbases[i], buf))
+		if (!register_pio_HBA(ISAbases[i], buf, NULL))
 			release_region(ISAbases[i], 9);
 		else
 			ISAbases[i] = 0;
@@ -873,7 +882,7 @@
 				if (get_pio_conf_PIO(base, buf)) {
 					DBG(DBG_PROBE && DBG_EISA, print_pio_config(buf));
 					if (buf->IRQ) {
-						if (!register_pio_HBA(base, buf))
+						if (!register_pio_HBA(base, buf, NULL))
 							release_region(base, 9);
 					} else {
 						printk(KERN_NOTICE "eata_dma: No valid IRQ. HBA " "removed from list\n");
@@ -896,9 +905,9 @@
 	printk("eata_dma: kernel PCI support not enabled. Skipping scan for PCI HBAs.\n");
 #else
 	struct pci_dev *dev = NULL;
-	u32 base, x;
+	unsigned long base, x;
 
-	while ((dev = pci_find_device(PCI_VENDOR_ID_DPT, PCI_DEVICE_ID_DPT, dev)) != NULL) {
+	while ((dev = pci_get_device(PCI_VENDOR_ID_DPT, PCI_DEVICE_ID_DPT, dev)) != NULL) {
 		DBG(DBG_PROBE && DBG_PCI, printk("eata_pio: find_PCI, HBA at %s\n", pci_name(dev)));
 		if (pci_enable_device(dev))
 			continue;
@@ -926,7 +935,7 @@
 				 * eventually remove it from the EISA and ISA list 
 				 */
 
-				if (!register_pio_HBA(base, buf)) {
+				if (!register_pio_HBA(base, buf, dev)) {
 					release_region(base, 9);
 					continue;
 				}
@@ -976,12 +985,12 @@
 		printk("Registered HBAs:\n");
 		printk("HBA no. Boardtype: Revis: EATA: Bus: BaseIO: IRQ: Ch: ID: Pr:" " QS: SG: CPL:\n");
 		for (i = 1; i <= registered_HBAs; i++) {
-			printk("scsi%-2d: %.10s v%s 2.0%c  %s %#.4x   %2d   %d   %d   %c"
+			printk("scsi%-2d: %.10s v%s 2.0%c  %s %#.4lx   %2d   %d   %d   %c"
 			       "  %2d  %2d  %2d\n",
 			       HBA_ptr->host_no, SD(HBA_ptr)->name, SD(HBA_ptr)->revision,
 			       SD(HBA_ptr)->EATA_revision, (SD(HBA_ptr)->bustype == 'P') ?
 			       "PCI " : (SD(HBA_ptr)->bustype == 'E') ? "EISA" : "ISA ",
-			       (uint) HBA_ptr->base, HBA_ptr->irq, SD(HBA_ptr)->channel, HBA_ptr->this_id,
+			       HBA_ptr->base, HBA_ptr->irq, SD(HBA_ptr)->channel, HBA_ptr->this_id,
 			       SD(HBA_ptr)->primary ? 'Y' : 'N', HBA_ptr->can_queue,
 			       HBA_ptr->sg_tablesize, HBA_ptr->cmd_per_lun);
 			HBA_ptr = SD(HBA_ptr)->next;
diff --git a/drivers/scsi/fcal.c b/drivers/scsi/fcal.c
index 7f89102..c4e16c0 100644
--- a/drivers/scsi/fcal.c
+++ b/drivers/scsi/fcal.c
@@ -248,8 +248,7 @@
 				if (scd->id == target) {
 					SPRINTF ("  [AL-PA: %02x, Id: %02d, Port WWN: %08x%08x, Node WWN: %08x%08x]  ",
 						alpa, target, u1[0], u1[1], u2[0], u2[1]);
-					SPRINTF ("%s ", (scd->type < MAX_SCSI_DEVICE_CODE) ?
-						scsi_device_types[(short) scd->type] : "Unknown device");
+					SPRINTF ("%s ", scsi_device_type(scd->type));
 
 					for (j = 0; (j < 8) && (scd->vendor[j] >= 0x20); j++)
 						SPRINTF ("%c", scd->vendor[j]);
diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67f1100..cdd893b 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -811,7 +811,6 @@
 	struct NCR5380_hostdata *hostdata;
 #ifdef NCR5380_STATS
 	struct scsi_device *dev;
-	extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
 #endif
 
 	NCR5380_setup(scsi_ptr);
@@ -851,7 +850,7 @@
 		long tr = hostdata->time_read[dev->id] / HZ;
 		long tw = hostdata->time_write[dev->id] / HZ;
 
-		PRINTP("  T:%d %s " ANDP dev->id ANDP(dev->type < MAX_SCSI_DEVICE_CODE) ? scsi_device_types[(int) dev->type] : "Unknown");
+		PRINTP("  T:%d %s " ANDP dev->id ANDP scsi_device_type(dev->type));
 		for (i = 0; i < 8; i++)
 			if (dev->vendor[i] >= 0x20)
 				*(buffer + (len++)) = dev->vendor[i];
diff --git a/drivers/scsi/gvp11.c b/drivers/scsi/gvp11.c
index a0d831b..18dbe5c 100644
--- a/drivers/scsi/gvp11.c
+++ b/drivers/scsi/gvp11.c
@@ -47,7 +47,7 @@
     gvp11_xfer_mask = ints[1];
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned short cntr = GVP11_DMAC_INT_ENABLE;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -142,8 +142,8 @@
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
-		      int status)
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
+		     int status)
 {
     /* stop DMA */
     DMA(instance)->SP_DMA = 1;
@@ -341,7 +341,7 @@
     return num_gvp11;
 }
 
-static int gvp11_bus_reset(Scsi_Cmnd *cmd)
+static int gvp11_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/gvp11.h b/drivers/scsi/gvp11.h
index 575d219d..bf22859 100644
--- a/drivers/scsi/gvp11.h
+++ b/drivers/scsi/gvp11.h
@@ -13,10 +13,6 @@
 
 int gvp11_detect(struct scsi_host_template *);
 int gvp11_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index dfcb96f..68ef163 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -265,6 +265,9 @@
 		destroy_workqueue(shost->work_q);
 
 	scsi_destroy_command_freelist(shost);
+	if (shost->bqt)
+		blk_free_tags(shost->bqt);
+
 	kfree(shost->shost_data);
 
 	if (parent)
@@ -487,7 +490,9 @@
  * @work:	Work to queue for execution.
  *
  * Return value:
- * 	0 on success / != 0 for error
+ * 	1 - work queued for execution
+ *	0 - work is already queued
+ *	-EINVAL - work queue doesn't exist
  **/
 int scsi_queue_work(struct Scsi_Host *shost, struct work_struct *work)
 {
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index bcb3444..28bfb8f 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -15,7 +15,6 @@
  *
  * For more information, visit http://www.highpoint-tech.com
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c
index ed22b96..01b8ac6 100644
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -156,8 +156,8 @@
 {
 	struct device_node *rootdn;
 
-	char *ppartition_name;
-	unsigned int *p_number_ptr;
+	const char *ppartition_name;
+	const unsigned int *p_number_ptr;
 
 	/* Retrieve information about this partition */
 	rootdn = find_path_device("/");
@@ -165,14 +165,11 @@
 		return;
 	}
 
-	ppartition_name =
-		get_property(rootdn, "ibm,partition-name", NULL);
+	ppartition_name = get_property(rootdn, "ibm,partition-name", NULL);
 	if (ppartition_name)
 		strncpy(partition_name, ppartition_name,
 				sizeof(partition_name));
-	p_number_ptr =
-		(unsigned int *)get_property(rootdn, "ibm,partition-no",
-					     NULL);
+	p_number_ptr = get_property(rootdn, "ibm,partition-no", NULL);
 	if (p_number_ptr)
 		partition_number = *p_number_ptr;
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 01080b3..7ed4eef 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -175,6 +175,8 @@
 	"Qualified success"},
 	{0x01080000, 1, 1,
 	"FFFE: Soft device bus error recovered by the IOA"},
+	{0x01088100, 0, 1,
+	"4101: Soft device bus fabric error"},
 	{0x01170600, 0, 1,
 	"FFF9: Device sector reassign successful"},
 	{0x01170900, 0, 1,
@@ -225,6 +227,8 @@
 	"3109: IOA timed out a device command"},
 	{0x04088000, 0, 0,
 	"3120: SCSI bus is not operational"},
+	{0x04088100, 0, 1,
+	"4100: Hard device bus fabric error"},
 	{0x04118000, 0, 1,
 	"9000: IOA reserved area data check"},
 	{0x04118100, 0, 1,
@@ -273,6 +277,14 @@
 	"9091: Incorrect hardware configuration change has been detected"},
 	{0x04678000, 0, 1,
 	"9073: Invalid multi-adapter configuration"},
+	{0x04678100, 0, 1,
+	"4010: Incorrect connection between cascaded expanders"},
+	{0x04678200, 0, 1,
+	"4020: Connections exceed IOA design limits"},
+	{0x04678300, 0, 1,
+	"4030: Incorrect multipath connection"},
+	{0x04679000, 0, 1,
+	"4110: Unsupported enclosure function"},
 	{0x046E0000, 0, 1,
 	"FFF4: Command to logical unit failed"},
 	{0x05240000, 1, 0,
@@ -297,6 +309,8 @@
 	"9031: Array protection temporarily suspended, protection resuming"},
 	{0x06040600, 0, 1,
 	"9040: Array protection temporarily suspended, protection resuming"},
+	{0x06288000, 0, 1,
+	"3140: Device bus not ready to ready transition"},
 	{0x06290000, 0, 1,
 	"FFFB: SCSI bus was reset"},
 	{0x06290500, 0, 0,
@@ -319,6 +333,16 @@
 	"3150: SCSI bus configuration error"},
 	{0x06678100, 0, 1,
 	"9074: Asymmetric advanced function disk configuration"},
+	{0x06678300, 0, 1,
+	"4040: Incomplete multipath connection between IOA and enclosure"},
+	{0x06678400, 0, 1,
+	"4041: Incomplete multipath connection between enclosure and device"},
+	{0x06678500, 0, 1,
+	"9075: Incomplete multipath connection between IOA and remote IOA"},
+	{0x06678600, 0, 1,
+	"9076: Configuration error, missing remote IOA"},
+	{0x06679100, 0, 1,
+	"4050: Enclosure does not support a required multipath function"},
 	{0x06690200, 0, 1,
 	"9041: Array protection temporarily suspended"},
 	{0x06698200, 0, 1,
@@ -331,6 +355,10 @@
 	"9072: Link not operational transition"},
 	{0x066B8200, 0, 1,
 	"9032: Array exposed but still protected"},
+	{0x066B9100, 0, 1,
+	"4061: Multipath redundancy level got better"},
+	{0x066B9200, 0, 1,
+	"4060: Multipath redundancy level got worse"},
 	{0x07270000, 0, 0,
 	"Failure due to other device"},
 	{0x07278000, 0, 1,
@@ -4099,8 +4127,7 @@
 {
 	struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
 
-	if ((be32_to_cpu(ioasa->ioasc_specific) &
-	     (IPR_ADDITIONAL_STATUS_FMT | IPR_AUTOSENSE_VALID)) == 0)
+	if ((be32_to_cpu(ioasa->ioasc_specific) & IPR_AUTOSENSE_VALID) == 0)
 		return 0;
 
 	memcpy(ipr_cmd->scsi_cmd->sense_buffer, ioasa->auto_sense.data,
@@ -4190,7 +4217,8 @@
 	case IPR_IOASC_NR_INIT_CMD_REQUIRED:
 		break;
 	default:
-		scsi_cmd->result |= (DID_ERROR << 16);
+		if (IPR_IOASC_SENSE_KEY(ioasc) > RECOVERED_ERROR)
+			scsi_cmd->result |= (DID_ERROR << 16);
 		if (!ipr_is_vset_device(res) && !ipr_is_naca_model(res))
 			res->needs_sync_complete = 1;
 		break;
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 1ad24df..11eaff52 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -36,8 +36,8 @@
 /*
  * Literals
  */
-#define IPR_DRIVER_VERSION "2.1.3"
-#define IPR_DRIVER_DATE "(March 29, 2006)"
+#define IPR_DRIVER_VERSION "2.1.4"
+#define IPR_DRIVER_DATE "(August 2, 2006)"
 
 /*
  * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding
@@ -45,6 +45,7 @@
  *	This can be adjusted at runtime through sysfs device attributes.
  */
 #define IPR_MAX_CMD_PER_LUN				6
+#define IPR_MAX_CMD_PER_ATA_LUN			1
 
 /*
  * IPR_NUM_BASE_CMD_BLKS: This defines the maximum number of
@@ -106,7 +107,7 @@
 #define IPR_IOA_BUS						0xff
 #define IPR_IOA_TARGET					0xff
 #define IPR_IOA_LUN						0xff
-#define IPR_MAX_NUM_BUSES				8
+#define IPR_MAX_NUM_BUSES				16
 #define IPR_MAX_BUS_TO_SCAN				IPR_MAX_NUM_BUSES
 
 #define IPR_NUM_RESET_RELOAD_RETRIES		3
@@ -145,6 +146,7 @@
 #define	IPR_LUN_RESET					0x40
 #define	IPR_TARGET_RESET					0x20
 #define	IPR_BUS_RESET					0x10
+#define	IPR_ATA_PHY_RESET					0x80
 #define IPR_ID_HOST_RR_Q				0xC4
 #define IPR_QUERY_IOA_CONFIG				0xC5
 #define IPR_CANCEL_ALL_REQUESTS			0xCE
@@ -295,7 +297,11 @@
 }__attribute__ ((packed));
 
 struct ipr_config_table_entry {
-	u8 service_level;
+	u8 proto;
+#define IPR_PROTO_SATA			0x02
+#define IPR_PROTO_SATA_ATAPI		0x03
+#define IPR_PROTO_SAS_STP		0x06
+#define IPR_PROTO_SAS_STP_ATAPI	0x07
 	u8 array_id;
 	u8 flags;
 #define IPR_IS_IOA_RESOURCE	0x80
@@ -307,6 +313,7 @@
 #define IPR_SUBTYPE_AF_DASD			0
 #define IPR_SUBTYPE_GENERIC_SCSI	1
 #define IPR_SUBTYPE_VOLUME_SET		2
+#define IPR_SUBTYPE_GENERIC_ATA	4
 
 #define IPR_QUEUEING_MODEL(res)	((((res)->cfgte.flags) & 0x70) >> 4)
 #define IPR_QUEUE_FROZEN_MODEL	0
@@ -350,6 +357,7 @@
 #define IPR_RQTYPE_SCSICDB		0x00
 #define IPR_RQTYPE_IOACMD		0x01
 #define IPR_RQTYPE_HCAM			0x02
+#define IPR_RQTYPE_ATA_PASSTHRU	0x04
 
 	u8 luntar_luntrn;
 
@@ -373,6 +381,37 @@
 	__be16 timeout;
 }__attribute__ ((packed, aligned(4)));
 
+struct ipr_ioarcb_ata_regs {
+	u8 flags;
+#define IPR_ATA_FLAG_PACKET_CMD			0x80
+#define IPR_ATA_FLAG_XFER_TYPE_DMA			0x40
+#define IPR_ATA_FLAG_STATUS_ON_GOOD_COMPLETION	0x20
+	u8 reserved[3];
+
+	__be16 data;
+	u8 feature;
+	u8 nsect;
+	u8 lbal;
+	u8 lbam;
+	u8 lbah;
+	u8 device;
+	u8 command;
+	u8 reserved2[3];
+	u8 hob_feature;
+	u8 hob_nsect;
+	u8 hob_lbal;
+	u8 hob_lbam;
+	u8 hob_lbah;
+	u8 ctl;
+}__attribute__ ((packed, aligned(4)));
+
+struct ipr_ioarcb_add_data {
+	union {
+		struct ipr_ioarcb_ata_regs regs;
+		__be32 add_cmd_parms[10];
+	}u;
+}__attribute__ ((packed, aligned(4)));
+
 /* IOA Request Control Block    128 bytes  */
 struct ipr_ioarcb {
 	__be32 ioarcb_host_pci_addr;
@@ -397,7 +436,7 @@
 	struct ipr_cmd_pkt cmd_pkt;
 
 	__be32 add_cmd_parms_len;
-	__be32 add_cmd_parms[10];
+	struct ipr_ioarcb_add_data add_data;
 }__attribute__((packed, aligned (4)));
 
 struct ipr_ioadl_desc {
@@ -433,6 +472,21 @@
 	__be32 ioa_data[2];
 }__attribute__((packed, aligned (4)));
 
+struct ipr_ioasa_gata {
+	u8 error;
+	u8 nsect;		/* Interrupt reason */
+	u8 lbal;
+	u8 lbam;
+	u8 lbah;
+	u8 device;
+	u8 status;
+	u8 alt_status;	/* ATA CTL */
+	u8 hob_nsect;
+	u8 hob_lbal;
+	u8 hob_lbam;
+	u8 hob_lbah;
+}__attribute__((packed, aligned (4)));
+
 struct ipr_auto_sense {
 	__be16 auto_sense_len;
 	__be16 ioa_data_len;
@@ -466,6 +520,7 @@
 	__be32 ioasc_specific;	/* status code specific field */
 #define IPR_ADDITIONAL_STATUS_FMT		0x80000000
 #define IPR_AUTOSENSE_VALID			0x40000000
+#define IPR_ATA_DEVICE_WAS_RESET		0x20000000
 #define IPR_IOASC_SPECIFIC_MASK		0x00ffffff
 #define IPR_FIELD_POINTER_VALID		(0x80000000 >> 8)
 #define IPR_FIELD_POINTER_MASK		0x0000ffff
@@ -474,6 +529,7 @@
 		struct ipr_ioasa_vset vset;
 		struct ipr_ioasa_af_dasd dasd;
 		struct ipr_ioasa_gpdd gpdd;
+		struct ipr_ioasa_gata gata;
 	} u;
 
 	struct ipr_auto_sense auto_sense;
@@ -1308,6 +1364,22 @@
 }
 
 /**
+ * ipr_is_gata - Determine if a resource is a generic ATA resource
+ * @res:	resource entry struct
+ *
+ * Return value:
+ * 	1 if GATA / 0 if not GATA
+ **/
+static inline int ipr_is_gata(struct ipr_resource_entry *res)
+{
+	if (!ipr_is_ioa_resource(res) &&
+	    IPR_RES_SUBTYPE(res) == IPR_SUBTYPE_GENERIC_ATA)
+		return 1;
+	else
+		return 0;
+}
+
+/**
  * ipr_is_naca_model - Determine if a resource is using NACA queueing model
  * @res:	resource entry struct
  *
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 058f094..0a9dbc5 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -108,8 +108,8 @@
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 
-	crypto_digest_digest(tcp_conn->tx_tfm, &buf->sg, 1, crc);
-	buf->sg.length += sizeof(uint32_t);
+	crypto_hash_digest(&tcp_conn->tx_hash, &buf->sg, buf->sg.length, crc);
+	buf->sg.length = tcp_conn->hdr_size;
 }
 
 static inline int
@@ -281,7 +281,6 @@
 {
 	struct iscsi_data *hdr;
 	struct scsi_cmnd *sc = ctask->sc;
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 
 	hdr = &r2t->dtask.hdr;
 	memset(hdr, 0, sizeof(struct iscsi_data));
@@ -336,10 +335,12 @@
 			sg_count += sg->length;
 		}
 		BUG_ON(r2t->sg == NULL);
-	} else
-		iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+	} else {
+		iscsi_buf_init_iov(&r2t->sendbuf,
 			    (char*)sc->request_buffer + r2t->data_offset,
 			    r2t->data_count);
+		r2t->sg = NULL;
+	}
 }
 
 /**
@@ -358,8 +359,11 @@
 	int r2tsn = be32_to_cpu(rhdr->r2tsn);
 	int rc;
 
-	if (tcp_conn->in.datalen)
+	if (tcp_conn->in.datalen) {
+		printk(KERN_ERR "iscsi_tcp: invalid R2t with datalen %d\n",
+		       tcp_conn->in.datalen);
 		return ISCSI_ERR_DATALEN;
+	}
 
 	if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn)
 		return ISCSI_ERR_R2TSN;
@@ -385,15 +389,23 @@
 
 	r2t->exp_statsn = rhdr->statsn;
 	r2t->data_length = be32_to_cpu(rhdr->data_length);
-	if (r2t->data_length == 0 ||
-	    r2t->data_length > session->max_burst) {
+	if (r2t->data_length == 0) {
+		printk(KERN_ERR "iscsi_tcp: invalid R2T with zero data len\n");
 		spin_unlock(&session->lock);
 		return ISCSI_ERR_DATALEN;
 	}
 
+	if (r2t->data_length > session->max_burst)
+		debug_scsi("invalid R2T with data len %u and max burst %u."
+			   "Attempting to execute request.\n",
+			    r2t->data_length, session->max_burst);
+
 	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
 	if (r2t->data_offset + r2t->data_length > ctask->total_length) {
 		spin_unlock(&session->lock);
+		printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
+		       "offset %u and total length %d\n", r2t->data_length,
+		       r2t->data_offset, ctask->total_length);
 		return ISCSI_ERR_DATALEN;
 	}
 
@@ -456,7 +468,8 @@
 
 		sg_init_one(&sg, (u8 *)hdr,
 			    sizeof(struct iscsi_hdr) + ahslen);
-		crypto_digest_digest(tcp_conn->rx_tfm, &sg, 1, (u8 *)&cdgst);
+		crypto_hash_digest(&tcp_conn->rx_hash, &sg, sg.length,
+				   (u8 *)&cdgst);
 		rdgst = *(uint32_t*)((char*)hdr + sizeof(struct iscsi_hdr) +
 				     ahslen);
 		if (cdgst != rdgst) {
@@ -492,7 +505,6 @@
 			goto copy_hdr;
 
 		spin_lock(&session->lock);
-		iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask);
 		rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
 		spin_unlock(&session->lock);
 		break;
@@ -637,10 +649,9 @@
  *	byte counters.
  **/
 static inline int
-iscsi_tcp_copy(struct iscsi_conn *conn)
+iscsi_tcp_copy(struct iscsi_conn *conn, int buf_size)
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	int buf_size = tcp_conn->in.datalen;
 	int buf_left = buf_size - tcp_conn->data_copied;
 	int size = min(tcp_conn->in.copy, buf_left);
 	int rc;
@@ -665,15 +676,15 @@
 }
 
 static inline void
-partial_sg_digest_update(struct iscsi_tcp_conn *tcp_conn,
-			 struct scatterlist *sg, int offset, int length)
+partial_sg_digest_update(struct hash_desc *desc, struct scatterlist *sg,
+			 int offset, int length)
 {
 	struct scatterlist temp;
 
 	memcpy(&temp, sg, sizeof(struct scatterlist));
 	temp.offset = offset;
 	temp.length = length;
-	crypto_digest_update(tcp_conn->data_rx_tfm, &temp, 1);
+	crypto_hash_update(desc, &temp, length);
 }
 
 static void
@@ -682,7 +693,7 @@
 	struct scatterlist tmp;
 
 	sg_init_one(&tmp, buf, len);
-	crypto_digest_update(tcp_conn->data_rx_tfm, &tmp, 1);
+	crypto_hash_update(&tcp_conn->rx_hash, &tmp, len);
 }
 
 static int iscsi_scsi_data_in(struct iscsi_conn *conn)
@@ -736,11 +747,12 @@
 		if (!rc) {
 			if (conn->datadgst_en) {
 				if (!offset)
-					crypto_digest_update(
-							tcp_conn->data_rx_tfm,
+					crypto_hash_update(
+							&tcp_conn->rx_hash,
 							&sg[i], 1);
 				else
-					partial_sg_digest_update(tcp_conn,
+					partial_sg_digest_update(
+							&tcp_conn->rx_hash,
 							&sg[i],
 							sg[i].offset + offset,
 							sg[i].length - offset);
@@ -754,8 +766,10 @@
 				/*
 				 * data-in is complete, but buffer not...
 				 */
-				partial_sg_digest_update(tcp_conn, &sg[i],
-						sg[i].offset, sg[i].length-rc);
+				partial_sg_digest_update(&tcp_conn->rx_hash,
+							 &sg[i],
+							 sg[i].offset,
+							 sg[i].length-rc);
 			rc = 0;
 			break;
 		}
@@ -772,7 +786,6 @@
 			   (long)sc, sc->result, ctask->itt,
 			   tcp_conn->in.hdr->flags);
 		spin_lock(&conn->session->lock);
-		iscsi_tcp_cleanup_ctask(conn, ctask);
 		__iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
 		spin_unlock(&conn->session->lock);
 	}
@@ -792,9 +805,6 @@
 		rc = iscsi_scsi_data_in(conn);
 		break;
 	case ISCSI_OP_SCSI_CMD_RSP:
-		spin_lock(&conn->session->lock);
-		iscsi_tcp_cleanup_ctask(conn, tcp_conn->in.ctask);
-		spin_unlock(&conn->session->lock);
 	case ISCSI_OP_TEXT_RSP:
 	case ISCSI_OP_LOGIN_RSP:
 	case ISCSI_OP_ASYNC_EVENT:
@@ -803,7 +813,7 @@
 		 * Collect data segment to the connection's data
 		 * placeholder
 		 */
-		if (iscsi_tcp_copy(conn)) {
+		if (iscsi_tcp_copy(conn, tcp_conn->in.datalen)) {
 			rc = -EAGAIN;
 			goto exit;
 		}
@@ -876,10 +886,8 @@
 		 */
 		rc = iscsi_tcp_hdr_recv(conn);
 		if (!rc && tcp_conn->in.datalen) {
-			if (conn->datadgst_en) {
-				BUG_ON(!tcp_conn->data_rx_tfm);
-				crypto_digest_init(tcp_conn->data_rx_tfm);
-			}
+			if (conn->datadgst_en)
+				crypto_hash_init(&tcp_conn->rx_hash);
 			tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
 		} else if (rc) {
 			iscsi_conn_failure(conn, rc);
@@ -892,10 +900,15 @@
 
 		debug_tcp("extra data_recv offset %d copy %d\n",
 			  tcp_conn->in.offset, tcp_conn->in.copy);
-		skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
-				&recv_digest, 4);
-		tcp_conn->in.offset += 4;
-		tcp_conn->in.copy -= 4;
+		rc = iscsi_tcp_copy(conn, sizeof(uint32_t));
+		if (rc) {
+			if (rc == -EAGAIN)
+				goto again;
+			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+			return 0;
+		}
+
+		memcpy(&recv_digest, conn->data, sizeof(uint32_t));
 		if (recv_digest != tcp_conn->in.datadgst) {
 			debug_tcp("iscsi_tcp: data digest error!"
 				  "0x%x != 0x%x\n", recv_digest,
@@ -931,13 +944,14 @@
 					  tcp_conn->in.padding);
 				memset(pad, 0, tcp_conn->in.padding);
 				sg_init_one(&sg, pad, tcp_conn->in.padding);
-				crypto_digest_update(tcp_conn->data_rx_tfm,
-						     &sg, 1);
+				crypto_hash_update(&tcp_conn->rx_hash,
+						   &sg, sg.length);
 			}
-			crypto_digest_final(tcp_conn->data_rx_tfm,
-					    (u8 *) & tcp_conn->in.datadgst);
+			crypto_hash_final(&tcp_conn->rx_hash,
+					  (u8 *) &tcp_conn->in.datadgst);
 			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
 			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
+			tcp_conn->data_copied = 0;
 		} else
 			tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
 	}
@@ -1177,37 +1191,12 @@
 
 static inline void
 iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn,
-		      struct iscsi_cmd_task *ctask)
+		      struct iscsi_tcp_cmd_task *tcp_ctask)
 {
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-
-	BUG_ON(!tcp_conn->data_tx_tfm);
-	crypto_digest_init(tcp_conn->data_tx_tfm);
+	crypto_hash_init(&tcp_conn->tx_hash);
 	tcp_ctask->digest_count = 4;
 }
 
-static int
-iscsi_digest_final_send(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
-			struct iscsi_buf *buf, uint32_t *digest, int final)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	int rc = 0;
-	int sent = 0;
-
-	if (final)
-		crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest);
-
-	iscsi_buf_init_iov(buf, (char*)digest, 4);
-	rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
-	if (rc) {
-		tcp_ctask->datadigest = *digest;
-		tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST;
-	} else
-		tcp_ctask->digest_count = 4;
-	return rc;
-}
-
 /**
  * iscsi_solicit_data_cont - initialize next Data-Out
  * @conn: iscsi connection
@@ -1225,7 +1214,6 @@
 iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 			struct iscsi_r2t_info *r2t, int left)
 {
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 	struct iscsi_data *hdr;
 	struct scsi_cmnd *sc = ctask->sc;
 	int new_offset;
@@ -1254,27 +1242,30 @@
 	iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
 			   sizeof(struct iscsi_hdr));
 
-	if (sc->use_sg && !iscsi_buf_left(&r2t->sendbuf)) {
-		BUG_ON(tcp_ctask->bad_sg == r2t->sg);
+	if (iscsi_buf_left(&r2t->sendbuf))
+		return;
+
+	if (sc->use_sg) {
 		iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
 		r2t->sg += 1;
-	} else
-		iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+	} else {
+		iscsi_buf_init_iov(&r2t->sendbuf,
 			    (char*)sc->request_buffer + new_offset,
 			    r2t->data_count);
+		r2t->sg = NULL;
+	}
 }
 
-static void
-iscsi_unsolicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask,
+			      unsigned long len)
 {
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_data_task *dtask;
+	tcp_ctask->pad_count = len & (ISCSI_PAD_LEN - 1);
+	if (!tcp_ctask->pad_count)
+		return;
 
-	dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask;
-	iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr,
-				      tcp_ctask->r2t_data_count);
-	iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
-			   sizeof(struct iscsi_hdr));
+	tcp_ctask->pad_count = ISCSI_PAD_LEN - tcp_ctask->pad_count;
+	debug_scsi("write padding %d bytes\n", tcp_ctask->pad_count);
+	tcp_ctask->xmstate |= XMSTATE_W_PAD;
 }
 
 /**
@@ -1302,38 +1293,20 @@
 		if (sc->use_sg) {
 			struct scatterlist *sg = sc->request_buffer;
 
-			iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-					  &sg[tcp_ctask->sg_count++]);
-			tcp_ctask->sg = sg;
+			iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
+			tcp_ctask->sg = sg + 1;
 			tcp_ctask->bad_sg = sg + sc->use_sg;
-		} else
+		} else {
 			iscsi_buf_init_iov(&tcp_ctask->sendbuf,
 					   sc->request_buffer,
 					   sc->request_bufflen);
-
-		if (ctask->imm_count)
-			tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
-
-		tcp_ctask->pad_count = ctask->total_length & (ISCSI_PAD_LEN-1);
-		if (tcp_ctask->pad_count) {
-			tcp_ctask->pad_count = ISCSI_PAD_LEN -
-							tcp_ctask->pad_count;
-			debug_scsi("write padding %d bytes\n",
-				   tcp_ctask->pad_count);
-			tcp_ctask->xmstate |= XMSTATE_W_PAD;
+			tcp_ctask->sg = NULL;
+			tcp_ctask->bad_sg = NULL;
 		}
-
-		if (ctask->unsol_count)
-			tcp_ctask->xmstate |= XMSTATE_UNS_HDR |
-						XMSTATE_UNS_INIT;
-		tcp_ctask->r2t_data_count = ctask->total_length -
-				    ctask->imm_count -
-				    ctask->unsol_count;
-
-		debug_scsi("cmd [itt 0x%x total %d imm %d imm_data %d "
-			   "r2t_data %d]\n",
+		debug_scsi("cmd [itt 0x%x total %d imm_data %d "
+			   "unsol count %d, unsol offset %d]\n",
 			   ctask->itt, ctask->total_length, ctask->imm_count,
-			   ctask->unsol_count, tcp_ctask->r2t_data_count);
+			   ctask->unsol_count, ctask->unsol_offset);
 	} else
 		tcp_ctask->xmstate = XMSTATE_R_HDR;
 
@@ -1415,8 +1388,8 @@
 }
 
 static inline int
-handle_xmstate_r_hdr(struct iscsi_conn *conn,
-		     struct iscsi_tcp_cmd_task *tcp_ctask)
+iscsi_send_read_hdr(struct iscsi_conn *conn,
+		    struct iscsi_tcp_cmd_task *tcp_ctask)
 {
 	int rc;
 
@@ -1434,7 +1407,7 @@
 }
 
 static inline int
-handle_xmstate_w_hdr(struct iscsi_conn *conn,
+iscsi_send_write_hdr(struct iscsi_conn *conn,
 		     struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
@@ -1445,85 +1418,126 @@
 		iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
 				 (u8*)tcp_ctask->hdrext);
 	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count);
-	if (rc)
-		tcp_ctask->xmstate |= XMSTATE_W_HDR;
-	return rc;
-}
-
-static inline int
-handle_xmstate_data_digest(struct iscsi_conn *conn,
-			   struct iscsi_cmd_task *ctask)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	int rc;
-
-	tcp_ctask->xmstate &= ~XMSTATE_DATA_DIGEST;
-	debug_tcp("resent data digest 0x%x\n", tcp_ctask->datadigest);
-	rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf,
-				    &tcp_ctask->datadigest, 0);
 	if (rc) {
-		tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST;
-		debug_tcp("resent data digest 0x%x fail!\n",
-			  tcp_ctask->datadigest);
+		tcp_ctask->xmstate |= XMSTATE_W_HDR;
+		return rc;
 	}
 
-	return rc;
-}
+	if (ctask->imm_count) {
+		tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
+		iscsi_set_padding(tcp_ctask, ctask->imm_count);
 
-static inline int
-handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	int rc;
-
-	BUG_ON(!ctask->imm_count);
-	tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA;
-
-	if (conn->datadgst_en) {
-		iscsi_data_digest_init(tcp_conn, ctask);
-		tcp_ctask->immdigest = 0;
-	}
-
-	for (;;) {
-		rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf,
-				   &ctask->imm_count, &tcp_ctask->sent);
-		if (rc) {
-			tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
-			if (conn->datadgst_en) {
-				crypto_digest_final(tcp_conn->data_tx_tfm,
-						(u8*)&tcp_ctask->immdigest);
-				debug_tcp("tx imm sendpage fail 0x%x\n",
-					  tcp_ctask->datadigest);
-			}
-			return rc;
+		if (ctask->conn->datadgst_en) {
+			iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
+			tcp_ctask->immdigest = 0;
 		}
-		if (conn->datadgst_en)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
-					     &tcp_ctask->sendbuf.sg, 1);
-
-		if (!ctask->imm_count)
-			break;
-		iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-				  &tcp_ctask->sg[tcp_ctask->sg_count++]);
 	}
 
-	if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) {
-		rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf,
-				            &tcp_ctask->immdigest, 1);
-		if (rc) {
-			debug_tcp("sending imm digest 0x%x fail!\n",
-				  tcp_ctask->immdigest);
-			return rc;
-		}
-		debug_tcp("sending imm digest 0x%x\n", tcp_ctask->immdigest);
-	}
-
+	if (ctask->unsol_count)
+		tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
 	return 0;
 }
 
-static inline int
-handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int
+iscsi_send_padding(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+{
+	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int sent = 0, rc;
+
+	if (tcp_ctask->xmstate & XMSTATE_W_PAD) {
+		iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
+				   tcp_ctask->pad_count);
+		if (conn->datadgst_en)
+			crypto_hash_update(&tcp_conn->tx_hash,
+					   &tcp_ctask->sendbuf.sg,
+					   tcp_ctask->sendbuf.sg.length);
+	} else if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_PAD))
+		return 0;
+
+	tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
+	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_PAD;
+	debug_scsi("sending %d pad bytes for itt 0x%x\n",
+		   tcp_ctask->pad_count, ctask->itt);
+	rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count,
+			   &sent);
+	if (rc) {
+		debug_scsi("padding send failed %d\n", rc);
+		tcp_ctask->xmstate |= XMSTATE_W_RESEND_PAD;
+	}
+	return rc;
+}
+
+static int
+iscsi_send_digest(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
+			struct iscsi_buf *buf, uint32_t *digest)
+{
+	struct iscsi_tcp_cmd_task *tcp_ctask;
+	struct iscsi_tcp_conn *tcp_conn;
+	int rc, sent = 0;
+
+	if (!conn->datadgst_en)
+		return 0;
+
+	tcp_ctask = ctask->dd_data;
+	tcp_conn = conn->dd_data;
+
+	if (!(tcp_ctask->xmstate & XMSTATE_W_RESEND_DATA_DIGEST)) {
+		crypto_hash_final(&tcp_conn->tx_hash, (u8*)digest);
+		iscsi_buf_init_iov(buf, (char*)digest, 4);
+	}
+	tcp_ctask->xmstate &= ~XMSTATE_W_RESEND_DATA_DIGEST;
+
+	rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
+	if (!rc)
+		debug_scsi("sent digest 0x%x for itt 0x%x\n", *digest,
+			  ctask->itt);
+	else {
+		debug_scsi("sending digest 0x%x failed for itt 0x%x!\n",
+			  *digest, ctask->itt);
+		tcp_ctask->xmstate |= XMSTATE_W_RESEND_DATA_DIGEST;
+	}
+	return rc;
+}
+
+static int
+iscsi_send_data(struct iscsi_cmd_task *ctask, struct iscsi_buf *sendbuf,
+		struct scatterlist **sg, int *sent, int *count,
+		struct iscsi_buf *digestbuf, uint32_t *digest)
+{
+	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+	struct iscsi_conn *conn = ctask->conn;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int rc, buf_sent, offset;
+
+	while (*count) {
+		buf_sent = 0;
+		offset = sendbuf->sent;
+
+		rc = iscsi_sendpage(conn, sendbuf, count, &buf_sent);
+		*sent = *sent + buf_sent;
+		if (buf_sent && conn->datadgst_en)
+			partial_sg_digest_update(&tcp_conn->tx_hash,
+				&sendbuf->sg, sendbuf->sg.offset + offset,
+				buf_sent);
+		if (!iscsi_buf_left(sendbuf) && *sg != tcp_ctask->bad_sg) {
+			iscsi_buf_init_sg(sendbuf, *sg);
+			*sg = *sg + 1;
+		}
+
+		if (rc)
+			return rc;
+	}
+
+	rc = iscsi_send_padding(conn, ctask);
+	if (rc)
+		return rc;
+
+	return iscsi_send_digest(conn, ctask, digestbuf, digest);
+}
+
+static int
+iscsi_send_unsol_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 	struct iscsi_data_task *dtask;
@@ -1531,12 +1545,17 @@
 
 	tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
 	if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) {
-		iscsi_unsolicit_data_init(conn, ctask);
-		dtask = tcp_ctask->dtask;
+		dtask = &tcp_ctask->unsol_dtask;
+
+		iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr);
+		iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
+				   sizeof(struct iscsi_hdr));
 		if (conn->hdrdgst_en)
 			iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
 					(u8*)dtask->hdrext);
+
 		tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT;
+		iscsi_set_padding(tcp_ctask, ctask->data_count);
 	}
 
 	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->data_count);
@@ -1546,254 +1565,138 @@
 		return rc;
 	}
 
+	if (conn->datadgst_en) {
+		dtask = &tcp_ctask->unsol_dtask;
+		iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
+		dtask->digest = 0;
+	}
+
 	debug_scsi("uns dout [itt 0x%x dlen %d sent %d]\n",
 		   ctask->itt, ctask->unsol_count, tcp_ctask->sent);
 	return 0;
 }
 
-static inline int
-handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int
+iscsi_send_unsol_pdu(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_data_task *dtask = tcp_ctask->dtask;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	int rc;
 
-	BUG_ON(!ctask->data_count);
-	tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
-
-	if (conn->datadgst_en) {
-		iscsi_data_digest_init(tcp_conn, ctask);
-		dtask->digest = 0;
+	if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) {
+		BUG_ON(!ctask->unsol_count);
+		tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR;
+send_hdr:
+		rc = iscsi_send_unsol_hdr(conn, ctask);
+		if (rc)
+			return rc;
 	}
 
-	for (;;) {
+	if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) {
+		struct iscsi_data_task *dtask = &tcp_ctask->unsol_dtask;
 		int start = tcp_ctask->sent;
 
-		rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf,
-				   &ctask->data_count, &tcp_ctask->sent);
-		if (rc) {
-			ctask->unsol_count -= tcp_ctask->sent - start;
-			tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
-			/* will continue with this ctask later.. */
-			if (conn->datadgst_en) {
-				crypto_digest_final(tcp_conn->data_tx_tfm,
-						(u8 *)&dtask->digest);
-				debug_tcp("tx uns data fail 0x%x\n",
-					  dtask->digest);
-			}
-			return rc;
-		}
-
-		BUG_ON(tcp_ctask->sent > ctask->total_length);
+		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
+				     &tcp_ctask->sent, &ctask->data_count,
+				     &dtask->digestbuf, &dtask->digest);
 		ctask->unsol_count -= tcp_ctask->sent - start;
-
-		/*
-		 * XXX:we may run here with un-initial sendbuf.
-		 * so pass it
-		 */
-		if (conn->datadgst_en && tcp_ctask->sent - start > 0)
-			crypto_digest_update(tcp_conn->data_tx_tfm,
-					     &tcp_ctask->sendbuf.sg, 1);
-
-		if (!ctask->data_count)
-			break;
-		iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-				  &tcp_ctask->sg[tcp_ctask->sg_count++]);
-	}
-	BUG_ON(ctask->unsol_count < 0);
-
-	/*
-	 * Done with the Data-Out. Next, check if we need
-	 * to send another unsolicited Data-Out.
-	 */
-	if (ctask->unsol_count) {
-		if (conn->datadgst_en) {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &dtask->digestbuf,
-						    &dtask->digest, 1);
-			if (rc) {
-				debug_tcp("send uns digest 0x%x fail\n",
-					  dtask->digest);
-				return rc;
-			}
-			debug_tcp("sending uns digest 0x%x, more uns\n",
-				  dtask->digest);
-		}
-		tcp_ctask->xmstate |= XMSTATE_UNS_INIT;
-		return 1;
-	}
-
-	if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) {
-		rc = iscsi_digest_final_send(conn, ctask,
-					    &dtask->digestbuf,
-					    &dtask->digest, 1);
-		if (rc) {
-			debug_tcp("send last uns digest 0x%x fail\n",
-				   dtask->digest);
+		if (rc)
 			return rc;
+		tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
+		/*
+		 * Done with the Data-Out. Next, check if we need
+		 * to send another unsolicited Data-Out.
+		 */
+		if (ctask->unsol_count) {
+			debug_scsi("sending more uns\n");
+			tcp_ctask->xmstate |= XMSTATE_UNS_INIT;
+			goto send_hdr;
 		}
-		debug_tcp("sending uns digest 0x%x\n",dtask->digest);
 	}
-
 	return 0;
 }
 
-static inline int
-handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int iscsi_send_sol_pdu(struct iscsi_conn *conn,
+			      struct iscsi_cmd_task *ctask)
 {
-	struct iscsi_session *session = conn->session;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_r2t_info *r2t = tcp_ctask->r2t;
-	struct iscsi_data_task *dtask = &r2t->dtask;
+	struct iscsi_session *session = conn->session;
+	struct iscsi_r2t_info *r2t;
+	struct iscsi_data_task *dtask;
 	int left, rc;
 
-	tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
-	tcp_ctask->dtask = dtask;
-
-	if (conn->datadgst_en) {
-		iscsi_data_digest_init(tcp_conn, ctask);
-		dtask->digest = 0;
-	}
-solicit_again:
-	/*
-	 * send Data-Out within this R2T sequence.
-	 */
-	if (!r2t->data_count)
-		goto data_out_done;
-
-	rc = iscsi_sendpage(conn, &r2t->sendbuf, &r2t->data_count, &r2t->sent);
-	if (rc) {
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		/* will continue with this ctask later.. */
-		if (conn->datadgst_en) {
-			crypto_digest_final(tcp_conn->data_tx_tfm,
-					  (u8 *)&dtask->digest);
-			debug_tcp("r2t data send fail 0x%x\n", dtask->digest);
-		}
-		return rc;
-	}
-
-	BUG_ON(r2t->data_count < 0);
-	if (conn->datadgst_en)
-		crypto_digest_update(tcp_conn->data_tx_tfm, &r2t->sendbuf.sg,
-				     1);
-
-	if (r2t->data_count) {
-		BUG_ON(ctask->sc->use_sg == 0);
-		if (!iscsi_buf_left(&r2t->sendbuf)) {
-			BUG_ON(tcp_ctask->bad_sg == r2t->sg);
-			iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
-			r2t->sg += 1;
-		}
-		goto solicit_again;
-	}
-
-data_out_done:
-	/*
-	 * Done with this Data-Out. Next, check if we have
-	 * to send another Data-Out for this R2T.
-	 */
-	BUG_ON(r2t->data_length - r2t->sent < 0);
-	left = r2t->data_length - r2t->sent;
-	if (left) {
-		if (conn->datadgst_en) {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &dtask->digestbuf,
-						    &dtask->digest, 1);
-			if (rc) {
-				debug_tcp("send r2t data digest 0x%x"
-					  "fail\n", dtask->digest);
-				return rc;
-			}
-			debug_tcp("r2t data send digest 0x%x\n",
-				  dtask->digest);
-		}
-		iscsi_solicit_data_cont(conn, ctask, r2t, left);
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
 		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-		return 1;
-	}
+		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+		if (!tcp_ctask->r2t)
+			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
+				    sizeof(void*));
+send_hdr:
+		r2t = tcp_ctask->r2t;
+		dtask = &r2t->dtask;
 
-	/*
-	 * Done with this R2T. Check if there are more
-	 * outstanding R2Ts ready to be processed.
-	 */
-	BUG_ON(tcp_ctask->r2t_data_count - r2t->data_length < 0);
-	if (conn->datadgst_en) {
-		rc = iscsi_digest_final_send(conn, ctask, &dtask->digestbuf,
-					    &dtask->digest, 1);
+		if (conn->hdrdgst_en)
+			iscsi_hdr_digest(conn, &r2t->headbuf,
+					(u8*)dtask->hdrext);
+		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
 		if (rc) {
-			debug_tcp("send last r2t data digest 0x%x"
-				  "fail\n", dtask->digest);
+			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
+			tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
 			return rc;
 		}
-		debug_tcp("r2t done dout digest 0x%x\n", dtask->digest);
-	}
 
-	tcp_ctask->r2t_data_count -= r2t->data_length;
-	tcp_ctask->r2t = NULL;
-	spin_lock_bh(&session->lock);
-	__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
-	spin_unlock_bh(&session->lock);
-	if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		tcp_ctask->r2t = r2t;
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-		return 1;
-	}
-
-	return 0;
-}
-
-static inline int
-handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
-{
-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	struct iscsi_data_task *dtask = tcp_ctask->dtask;
-	int sent = 0, rc;
-
-	tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
-	iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
-			    tcp_ctask->pad_count);
-	rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count,
-			   &sent);
-	if (rc) {
-		tcp_ctask->xmstate |= XMSTATE_W_PAD;
-		return rc;
-	}
-
-	if (conn->datadgst_en) {
-		crypto_digest_update(tcp_conn->data_tx_tfm,
-				     &tcp_ctask->sendbuf.sg, 1);
-		/* imm data? */
-		if (!dtask) {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &tcp_ctask->immbuf,
-						    &tcp_ctask->immdigest, 1);
-			if (rc) {
-				debug_tcp("send padding digest 0x%x"
-					  "fail!\n", tcp_ctask->immdigest);
-				return rc;
-			}
-			debug_tcp("done with padding, digest 0x%x\n",
-				  tcp_ctask->datadigest);
-		} else {
-			rc = iscsi_digest_final_send(conn, ctask,
-						    &dtask->digestbuf,
-						    &dtask->digest, 1);
-			if (rc) {
-				debug_tcp("send padding digest 0x%x"
-				          "fail\n", dtask->digest);
-				return rc;
-			}
-			debug_tcp("done with padding, digest 0x%x\n",
-				  dtask->digest);
+		if (conn->datadgst_en) {
+			iscsi_data_digest_init(conn->dd_data, tcp_ctask);
+			dtask->digest = 0;
 		}
+
+		iscsi_set_padding(tcp_ctask, r2t->data_count);
+		debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
+			r2t->solicit_datasn - 1, ctask->itt, r2t->data_count,
+			r2t->sent);
 	}
 
+	if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) {
+		r2t = tcp_ctask->r2t;
+		dtask = &r2t->dtask;
+
+		rc = iscsi_send_data(ctask, &r2t->sendbuf, &r2t->sg,
+				     &r2t->sent, &r2t->data_count,
+				     &dtask->digestbuf, &dtask->digest);
+		if (rc)
+			return rc;
+		tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
+
+		/*
+		 * Done with this Data-Out. Next, check if we have
+		 * to send another Data-Out for this R2T.
+		 */
+		BUG_ON(r2t->data_length - r2t->sent < 0);
+		left = r2t->data_length - r2t->sent;
+		if (left) {
+			iscsi_solicit_data_cont(conn, ctask, r2t, left);
+			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+			goto send_hdr;
+		}
+
+		/*
+		 * Done with this R2T. Check if there are more
+		 * outstanding R2Ts ready to be processed.
+		 */
+		spin_lock_bh(&session->lock);
+		tcp_ctask->r2t = NULL;
+		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
+			    sizeof(void*));
+		if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t,
+				sizeof(void*))) {
+			tcp_ctask->r2t = r2t;
+			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+			spin_unlock_bh(&session->lock);
+			goto send_hdr;
+		}
+		spin_unlock_bh(&session->lock);
+	}
 	return 0;
 }
 
@@ -1813,85 +1716,30 @@
 		return rc;
 
 	if (tcp_ctask->xmstate & XMSTATE_R_HDR)
-		return handle_xmstate_r_hdr(conn, tcp_ctask);
+		return iscsi_send_read_hdr(conn, tcp_ctask);
 
 	if (tcp_ctask->xmstate & XMSTATE_W_HDR) {
-		rc = handle_xmstate_w_hdr(conn, ctask);
-		if (rc)
-			return rc;
-	}
-
-	/* XXX: for data digest xmit recover */
-	if (tcp_ctask->xmstate & XMSTATE_DATA_DIGEST) {
-		rc = handle_xmstate_data_digest(conn, ctask);
+		rc = iscsi_send_write_hdr(conn, ctask);
 		if (rc)
 			return rc;
 	}
 
 	if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) {
-		rc = handle_xmstate_imm_data(conn, ctask);
+		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
+				     &tcp_ctask->sent, &ctask->imm_count,
+				     &tcp_ctask->immbuf, &tcp_ctask->immdigest);
 		if (rc)
 			return rc;
+		tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA;
 	}
 
-	if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) {
-		BUG_ON(!ctask->unsol_count);
-		tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR;
-unsolicit_head_again:
-		rc = handle_xmstate_uns_hdr(conn, ctask);
-		if (rc)
-			return rc;
-	}
+	rc = iscsi_send_unsol_pdu(conn, ctask);
+	if (rc)
+		return rc;
 
-	if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) {
-		rc = handle_xmstate_uns_data(conn, ctask);
-		if (rc == 1)
-			goto unsolicit_head_again;
-		else if (rc)
-			return rc;
-		goto done;
-	}
-
-	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
-		struct iscsi_r2t_info *r2t;
-
-		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-		if (!tcp_ctask->r2t)
-			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
-				    sizeof(void*));
-solicit_head_again:
-		r2t = tcp_ctask->r2t;
-		if (conn->hdrdgst_en)
-			iscsi_hdr_digest(conn, &r2t->headbuf,
-					(u8*)r2t->dtask.hdrext);
-		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
-		if (rc) {
-			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
-			tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
-			return rc;
-		}
-
-		debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
-			r2t->solicit_datasn - 1, ctask->itt, r2t->data_count,
-			r2t->sent);
-	}
-
-	if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) {
-		rc = handle_xmstate_sol_data(conn, ctask);
-		if (rc == 1)
-			goto solicit_head_again;
-		if (rc)
-			return rc;
-	}
-
-done:
-	/*
-	 * Last thing to check is whether we need to send write
-	 * padding. Note that we check for xmstate equality, not just the bit.
-	 */
-	if (tcp_ctask->xmstate == XMSTATE_W_PAD)
-		rc = handle_xmstate_w_pad(conn, ctask);
+	rc = iscsi_send_sol_pdu(conn, ctask);
+	if (rc)
+		return rc;
 
 	return rc;
 }
@@ -1923,8 +1771,24 @@
 	/* initial operational parameters */
 	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
 
+	tcp_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+						  CRYPTO_ALG_ASYNC);
+	tcp_conn->tx_hash.flags = 0;
+	if (!tcp_conn->tx_hash.tfm)
+		goto free_tcp_conn;
+
+	tcp_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+						  CRYPTO_ALG_ASYNC);
+	tcp_conn->rx_hash.flags = 0;
+	if (!tcp_conn->rx_hash.tfm)
+		goto free_tx_tfm;
+
 	return cls_conn;
 
+free_tx_tfm:
+	crypto_free_hash(tcp_conn->tx_hash.tfm);
+free_tcp_conn:
+	kfree(tcp_conn);
 tcp_conn_alloc_fail:
 	iscsi_conn_teardown(cls_conn);
 	return NULL;
@@ -1962,14 +1826,10 @@
 
 	/* now free tcp_conn */
 	if (digest) {
-		if (tcp_conn->tx_tfm)
-			crypto_free_tfm(tcp_conn->tx_tfm);
-		if (tcp_conn->rx_tfm)
-			crypto_free_tfm(tcp_conn->rx_tfm);
-		if (tcp_conn->data_tx_tfm)
-			crypto_free_tfm(tcp_conn->data_tx_tfm);
-		if (tcp_conn->data_rx_tfm)
-			crypto_free_tfm(tcp_conn->data_rx_tfm);
+		if (tcp_conn->tx_hash.tfm)
+			crypto_free_hash(tcp_conn->tx_hash.tfm);
+		if (tcp_conn->rx_hash.tfm)
+			crypto_free_hash(tcp_conn->rx_hash.tfm);
 	}
 
 	kfree(tcp_conn);
@@ -1979,9 +1839,11 @@
 iscsi_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 
 	iscsi_conn_stop(cls_conn, flag);
 	iscsi_tcp_release_conn(conn);
+	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
 }
 
 static int
@@ -2127,48 +1989,11 @@
 	case ISCSI_PARAM_HDRDGST_EN:
 		iscsi_set_param(cls_conn, param, buf, buflen);
 		tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
-		if (conn->hdrdgst_en) {
+		if (conn->hdrdgst_en)
 			tcp_conn->hdr_size += sizeof(__u32);
-			if (!tcp_conn->tx_tfm)
-				tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c",
-								    0);
-			if (!tcp_conn->tx_tfm)
-				return -ENOMEM;
-			if (!tcp_conn->rx_tfm)
-				tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c",
-								    0);
-			if (!tcp_conn->rx_tfm) {
-				crypto_free_tfm(tcp_conn->tx_tfm);
-				return -ENOMEM;
-			}
-		} else {
-			if (tcp_conn->tx_tfm)
-				crypto_free_tfm(tcp_conn->tx_tfm);
-			if (tcp_conn->rx_tfm)
-				crypto_free_tfm(tcp_conn->rx_tfm);
-		}
 		break;
 	case ISCSI_PARAM_DATADGST_EN:
 		iscsi_set_param(cls_conn, param, buf, buflen);
-		if (conn->datadgst_en) {
-			if (!tcp_conn->data_tx_tfm)
-				tcp_conn->data_tx_tfm =
-				    crypto_alloc_tfm("crc32c", 0);
-			if (!tcp_conn->data_tx_tfm)
-				return -ENOMEM;
-			if (!tcp_conn->data_rx_tfm)
-				tcp_conn->data_rx_tfm =
-				    crypto_alloc_tfm("crc32c", 0);
-			if (!tcp_conn->data_rx_tfm) {
-				crypto_free_tfm(tcp_conn->data_tx_tfm);
-				return -ENOMEM;
-			}
-		} else {
-			if (tcp_conn->data_tx_tfm)
-				crypto_free_tfm(tcp_conn->data_tx_tfm);
-			if (tcp_conn->data_rx_tfm)
-				crypto_free_tfm(tcp_conn->data_rx_tfm);
-		}
 		tcp_conn->sendpage = conn->datadgst_en ?
 			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
 		break;
diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h
index 6a4ee70..3273683 100644
--- a/drivers/scsi/iscsi_tcp.h
+++ b/drivers/scsi/iscsi_tcp.h
@@ -31,26 +31,25 @@
 #define IN_PROGRESS_DDIGEST_RECV	0x3
 
 /* xmit state machine */
-#define	XMSTATE_IDLE			0x0
-#define	XMSTATE_R_HDR			0x1
-#define	XMSTATE_W_HDR			0x2
-#define	XMSTATE_IMM_HDR			0x4
-#define	XMSTATE_IMM_DATA		0x8
-#define	XMSTATE_UNS_INIT		0x10
-#define	XMSTATE_UNS_HDR			0x20
-#define	XMSTATE_UNS_DATA		0x40
-#define	XMSTATE_SOL_HDR			0x80
-#define	XMSTATE_SOL_DATA		0x100
-#define	XMSTATE_W_PAD			0x200
-#define XMSTATE_DATA_DIGEST		0x400
+#define XMSTATE_IDLE			0x0
+#define XMSTATE_R_HDR			0x1
+#define XMSTATE_W_HDR			0x2
+#define XMSTATE_IMM_HDR			0x4
+#define XMSTATE_IMM_DATA		0x8
+#define XMSTATE_UNS_INIT		0x10
+#define XMSTATE_UNS_HDR			0x20
+#define XMSTATE_UNS_DATA		0x40
+#define XMSTATE_SOL_HDR			0x80
+#define XMSTATE_SOL_DATA		0x100
+#define XMSTATE_W_PAD			0x200
+#define XMSTATE_W_RESEND_PAD		0x400
+#define XMSTATE_W_RESEND_DATA_DIGEST	0x800
 
-#define ISCSI_CONN_RCVBUF_MIN		262144
-#define ISCSI_CONN_SNDBUF_MIN		262144
 #define ISCSI_PAD_LEN			4
-#define ISCSI_R2T_MAX			16
 #define ISCSI_SG_TABLESIZE		SG_ALL
 #define ISCSI_TCP_MAX_CMD_LEN		16
 
+struct crypto_hash;
 struct socket;
 
 /* Socket connection recieve helper */
@@ -84,9 +83,6 @@
 	/* iSCSI connection-wide sequencing */
 	int			hdr_size;	/* PDU header size */
 
-	struct crypto_tfm	*rx_tfm;	/* CRC32C (Rx) */
-	struct crypto_tfm	*data_rx_tfm;	/* CRC32C (Rx) for data */
-
 	/* control data */
 	struct iscsi_tcp_recv	in;		/* TCP receive context */
 	int			in_progress;	/* connection state machine */
@@ -96,9 +92,9 @@
 	void			(*old_state_change)(struct sock *);
 	void			(*old_write_space)(struct sock *);
 
-	/* xmit */
-	struct crypto_tfm	*tx_tfm;	/* CRC32C (Tx) */
-	struct crypto_tfm	*data_tx_tfm;	/* CRC32C (Tx) for data */
+	/* data and header digests */
+	struct hash_desc	tx_hash;	/* CRC32C (Tx) */
+	struct hash_desc	rx_hash;	/* CRC32C (Rx) */
 
 	/* MIB custom statistics */
 	uint32_t		sendpage_failures_cnt;
@@ -157,19 +153,15 @@
 	struct scatterlist	*bad_sg;		/* assert statement */
 	int			sg_count;		/* SG's to process  */
 	uint32_t		exp_r2tsn;
-	int			r2t_data_count;		/* R2T Data-Out bytes */
 	int			data_offset;
 	struct iscsi_r2t_info	*r2t;			/* in progress R2T    */
 	struct iscsi_queue	r2tpool;
 	struct kfifo		*r2tqueue;
 	struct iscsi_r2t_info	**r2ts;
-	uint32_t		datadigest;		/* for recover digest */
 	int			digest_count;
 	uint32_t		immdigest;		/* for imm data */
 	struct iscsi_buf	immbuf;			/* for imm data digest */
-	struct iscsi_data_task	*dtask;		/* data task in progress*/
 	struct iscsi_data_task	unsol_dtask;	/* unsol data task */
-	int			digest_offset;	/* for partial buff digest */
 };
 
 #endif /* ISCSI_H */
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5884cd2..c542d0e 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -68,8 +68,7 @@
 EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn);
 
 void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask,
-				   struct iscsi_data *hdr,
-				   int transport_data_cnt)
+				   struct iscsi_data *hdr)
 {
 	struct iscsi_conn *conn = ctask->conn;
 
@@ -82,14 +81,12 @@
 
 	hdr->itt = ctask->hdr->itt;
 	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
-
-	hdr->offset = cpu_to_be32(ctask->total_length -
-				  transport_data_cnt -
-				  ctask->unsol_count);
+	hdr->offset = cpu_to_be32(ctask->unsol_offset);
 
 	if (ctask->unsol_count > conn->max_xmit_dlength) {
 		hton24(hdr->dlength, conn->max_xmit_dlength);
 		ctask->data_count = conn->max_xmit_dlength;
+		ctask->unsol_offset += ctask->data_count;
 		hdr->flags = 0;
 	} else {
 		hton24(hdr->dlength, ctask->unsol_count);
@@ -125,6 +122,7 @@
         memcpy(hdr->cdb, sc->cmnd, sc->cmd_len);
         memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len);
 
+	ctask->data_count = 0;
 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
 		hdr->flags |= ISCSI_FLAG_CMD_WRITE;
 		/*
@@ -143,6 +141,7 @@
 		 */
 		ctask->imm_count = 0;
 		ctask->unsol_count = 0;
+		ctask->unsol_offset = 0;
 		ctask->unsol_datasn = 0;
 
 		if (session->imm_data_en) {
@@ -156,9 +155,12 @@
 		} else
 			zero_data(ctask->hdr->dlength);
 
-		if (!session->initial_r2t_en)
+		if (!session->initial_r2t_en) {
 			ctask->unsol_count = min(session->first_burst,
 				ctask->total_length) - ctask->imm_count;
+			ctask->unsol_offset = ctask->imm_count;
+		}
+
 		if (!ctask->unsol_count)
 			/* No unsolicit Data-Out's */
 			ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
@@ -177,25 +179,51 @@
 
 /**
  * iscsi_complete_command - return command back to scsi-ml
- * @session: iscsi session
  * @ctask: iscsi cmd task
  *
  * Must be called with session lock.
  * This function returns the scsi command to scsi-ml and returns
  * the cmd task to the pool of available cmd tasks.
  */
-static void iscsi_complete_command(struct iscsi_session *session,
-				   struct iscsi_cmd_task *ctask)
+static void iscsi_complete_command(struct iscsi_cmd_task *ctask)
 {
+	struct iscsi_session *session = ctask->conn->session;
 	struct scsi_cmnd *sc = ctask->sc;
 
 	ctask->state = ISCSI_TASK_COMPLETED;
 	ctask->sc = NULL;
+	/* SCSI eh reuses commands to verify us */
+	sc->SCp.ptr = NULL;
 	list_del_init(&ctask->running);
 	__kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
 	sc->scsi_done(sc);
 }
 
+static void __iscsi_get_ctask(struct iscsi_cmd_task *ctask)
+{
+	atomic_inc(&ctask->refcount);
+}
+
+static void iscsi_get_ctask(struct iscsi_cmd_task *ctask)
+{
+	spin_lock_bh(&ctask->conn->session->lock);
+	__iscsi_get_ctask(ctask);
+	spin_unlock_bh(&ctask->conn->session->lock);
+}
+
+static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+{
+	if (atomic_dec_and_test(&ctask->refcount))
+		iscsi_complete_command(ctask);
+}
+
+static void iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+{
+	spin_lock_bh(&ctask->conn->session->lock);
+	__iscsi_put_ctask(ctask);
+	spin_unlock_bh(&ctask->conn->session->lock);
+}
+
 /**
  * iscsi_cmd_rsp - SCSI Command Response processing
  * @conn: iscsi connection
@@ -272,7 +300,7 @@
 		   (long)sc, sc->result, ctask->itt);
 	conn->scsirsp_pdus_cnt++;
 
-	iscsi_complete_command(conn->session, ctask);
+	__iscsi_put_ctask(ctask);
 	return rc;
 }
 
@@ -295,6 +323,30 @@
 	wake_up(&conn->ehwait);
 }
 
+static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+			       char *data, int datalen)
+{
+	struct iscsi_reject *reject = (struct iscsi_reject *)hdr;
+	struct iscsi_hdr rejected_pdu;
+	uint32_t itt;
+
+	conn->exp_statsn = be32_to_cpu(reject->statsn) + 1;
+
+	if (reject->reason == ISCSI_REASON_DATA_DIGEST_ERROR) {
+		if (ntoh24(reject->dlength) > datalen)
+			return ISCSI_ERR_PROTO;
+
+		if (ntoh24(reject->dlength) >= sizeof(struct iscsi_hdr)) {
+			memcpy(&rejected_pdu, data, sizeof(struct iscsi_hdr));
+			itt = rejected_pdu.itt & ISCSI_ITT_MASK;
+			printk(KERN_ERR "itt 0x%x had pdu (op 0x%x) rejected "
+				"due to DataDigest error.\n", itt,
+				rejected_pdu.opcode);
+		}
+	}
+	return 0;
+}
+
 /**
  * __iscsi_complete_pdu - complete pdu
  * @conn: iscsi conn
@@ -336,7 +388,7 @@
 			BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
 			if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
 				conn->scsirsp_pdus_cnt++;
-				iscsi_complete_command(session, ctask);
+				__iscsi_put_ctask(ctask);
 			}
 			break;
 		case ISCSI_OP_R2T:
@@ -406,6 +458,11 @@
 			break;
 		}
 	} else if (itt == ISCSI_RESERVED_TAG) {
+		rc = iscsi_check_assign_cmdsn(session,
+					     (struct iscsi_nopin*)hdr);
+		if (rc)
+			goto done;
+
 		switch(opcode) {
 		case ISCSI_OP_NOOP_IN:
 			if (datalen) {
@@ -413,11 +470,6 @@
 				break;
 			}
 
-			rc = iscsi_check_assign_cmdsn(session,
-						 (struct iscsi_nopin*)hdr);
-			if (rc)
-				break;
-
 			if (hdr->ttt == ISCSI_RESERVED_TAG)
 				break;
 
@@ -425,7 +477,8 @@
 				rc = ISCSI_ERR_CONN_FAILED;
 			break;
 		case ISCSI_OP_REJECT:
-			/* we need sth like iscsi_reject_rsp()*/
+			rc = iscsi_handle_reject(conn, hdr, data, datalen);
+			break;
 		case ISCSI_OP_ASYNC_EVENT:
 			conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
 			/* we need sth like iscsi_async_event_rsp() */
@@ -561,7 +614,9 @@
 	BUG_ON(conn->ctask && conn->mtask);
 
 	if (conn->ctask) {
+		iscsi_get_ctask(conn->ctask);
 		rc = tt->xmit_cmd_task(conn, conn->ctask);
+		iscsi_put_ctask(conn->ctask);
 		if (rc)
 			goto again;
 		/* done with this in-progress ctask */
@@ -602,12 +657,19 @@
 					 struct iscsi_cmd_task, running);
 		conn->ctask->state = ISCSI_TASK_RUNNING;
 		list_move_tail(conn->xmitqueue.next, &conn->run_list);
+		__iscsi_get_ctask(conn->ctask);
 		spin_unlock_bh(&conn->session->lock);
 
 		rc = tt->xmit_cmd_task(conn, conn->ctask);
 		if (rc)
 			goto again;
+
 		spin_lock_bh(&conn->session->lock);
+		__iscsi_put_ctask(conn->ctask);
+		if (rc) {
+			spin_unlock_bh(&conn->session->lock);
+			goto again;
+		}
 	}
 	spin_unlock_bh(&conn->session->lock);
 	/* done with this ctask */
@@ -657,6 +719,7 @@
 	FAILURE_SESSION_FAILED,
 	FAILURE_SESSION_FREED,
 	FAILURE_WINDOW_CLOSED,
+	FAILURE_OOM,
 	FAILURE_SESSION_TERMINATE,
 	FAILURE_SESSION_IN_RECOVERY,
 	FAILURE_SESSION_RECOVERY_TIMEOUT,
@@ -672,6 +735,7 @@
 
 	sc->scsi_done = done;
 	sc->result = 0;
+	sc->SCp.ptr = NULL;
 
 	host = sc->device->host;
 	session = iscsi_hostdata(host->hostdata);
@@ -715,10 +779,15 @@
 
 	conn = session->leadconn;
 
-	__kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
+	if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask,
+			 sizeof(void*))) {
+		reason = FAILURE_OOM;
+		goto reject;
+	}
 	sc->SCp.phase = session->age;
 	sc->SCp.ptr = (char *)ctask;
 
+	atomic_set(&ctask->refcount, 1);
 	ctask->state = ISCSI_TASK_PENDING;
 	ctask->mtask = NULL;
 	ctask->conn = conn;
@@ -731,9 +800,10 @@
 
 	list_add_tail(&ctask->running, &conn->xmitqueue);
 	debug_scsi(
-	       "ctask enq [%s cid %d sc %lx itt 0x%x len %d cmdsn %d win %d]\n",
+	       "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d "
+		"win %d]\n",
 		sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
-		conn->id, (long)sc, ctask->itt, sc->request_bufflen,
+		conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen,
 		session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
 	spin_unlock(&session->lock);
 
@@ -1061,16 +1131,30 @@
 
 	sc->result = err;
 	sc->resid = sc->request_bufflen;
-	iscsi_complete_command(conn->session, ctask);
+	/* release ref from queuecommand */
+	__iscsi_put_ctask(ctask);
 }
 
 int iscsi_eh_abort(struct scsi_cmnd *sc)
 {
-	struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
-	struct iscsi_conn *conn = ctask->conn;
-	struct iscsi_session *session = conn->session;
+	struct iscsi_cmd_task *ctask;
+	struct iscsi_conn *conn;
+	struct iscsi_session *session;
 	int rc;
 
+	/*
+	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+	 * got the command.
+	 */
+	if (!sc->SCp.ptr) {
+		debug_scsi("sc never reached iscsi layer or it completed.\n");
+		return SUCCESS;
+	}
+
+	ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
+	conn = ctask->conn;
+	session = conn->session;
+
 	conn->eh_abort_cnt++;
 	debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt);
 
@@ -1520,11 +1604,19 @@
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
 
-	if (session == NULL) {
+	if (!session) {
 		printk(KERN_ERR "iscsi: can't start unbound connection\n");
 		return -EPERM;
 	}
 
+	if ((session->imm_data_en || !session->initial_r2t_en) &&
+	     session->first_burst > session->max_burst) {
+		printk("iscsi: invalid burst lengths: "
+		       "first_burst %d max_burst %d\n",
+		       session->first_burst, session->max_burst);
+		return -EINVAL;
+	}
+
 	spin_lock_bh(&session->lock);
 	conn->c_stage = ISCSI_CONN_STARTED;
 	session->state = ISCSI_STATE_LOGGED_IN;
diff --git a/drivers/scsi/libsas/Kconfig b/drivers/scsi/libsas/Kconfig
new file mode 100644
index 0000000..aafdc92f
--- /dev/null
+++ b/drivers/scsi/libsas/Kconfig
@@ -0,0 +1,39 @@
+#
+# Kernel configuration file for the SAS Class
+#
+# Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+#
+
+config SCSI_SAS_LIBSAS
+	tristate "SAS Domain Transport Attributes"
+	depends on SCSI
+	select SCSI_SAS_ATTRS
+	help
+	  This provides transport specific helpers for SAS drivers which
+	  use the domain device construct (like the aic94xxx).
+
+config SCSI_SAS_LIBSAS_DEBUG
+	bool "Compile the SAS Domain Transport Attributes in debug mode"
+	default y
+	depends on SCSI_SAS_LIBSAS
+	help
+		Compiles the SAS Layer in debug mode.  In debug mode, the
+		SAS Layer prints diagnostic and debug messages.
diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile
new file mode 100644
index 0000000..44d972a
--- /dev/null
+++ b/drivers/scsi/libsas/Makefile
@@ -0,0 +1,36 @@
+#
+# Kernel Makefile for the libsas helpers
+#
+# Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+# Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+#
+# This file is licensed under GPLv2.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; version 2 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+
+ifeq ($(CONFIG_SCSI_SAS_LIBSAS_DEBUG),y)
+	EXTRA_CFLAGS += -DSAS_DEBUG
+endif
+
+obj-$(CONFIG_SCSI_SAS_LIBSAS) += libsas.o
+libsas-y +=  sas_init.o     \
+		sas_phy.o      \
+		sas_port.o     \
+		sas_event.o    \
+		sas_dump.o     \
+		sas_discover.o \
+		sas_expander.o \
+		sas_scsi_host.o
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
new file mode 100644
index 0000000..d977bd4
--- /dev/null
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -0,0 +1,749 @@
+/*
+ * Serial Attached SCSI (SAS) Discover process
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_eh.h>
+#include "sas_internal.h"
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+/* ---------- Basic task processing for discovery purposes ---------- */
+
+void sas_init_dev(struct domain_device *dev)
+{
+        INIT_LIST_HEAD(&dev->siblings);
+        INIT_LIST_HEAD(&dev->dev_list_node);
+        switch (dev->dev_type) {
+        case SAS_END_DEV:
+                break;
+        case EDGE_DEV:
+        case FANOUT_DEV:
+                INIT_LIST_HEAD(&dev->ex_dev.children);
+                break;
+        case SATA_DEV:
+        case SATA_PM:
+        case SATA_PM_PORT:
+                INIT_LIST_HEAD(&dev->sata_dev.children);
+                break;
+        default:
+                break;
+        }
+}
+
+static void sas_task_timedout(unsigned long _task)
+{
+	struct sas_task *task = (void *) _task;
+	unsigned long flags;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
+		task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	complete(&task->completion);
+}
+
+static void sas_disc_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
+}
+
+#define SAS_DEV_TIMEOUT 10
+
+/**
+ * sas_execute_task -- Basic task processing for discovery
+ * @task: the task to be executed
+ * @buffer: pointer to buffer to do I/O
+ * @size: size of @buffer
+ * @pci_dma_dir: PCI_DMA_...
+ */
+static int sas_execute_task(struct sas_task *task, void *buffer, int size,
+			    int pci_dma_dir)
+{
+	int res = 0;
+	struct scatterlist *scatter = NULL;
+	struct task_status_struct *ts = &task->task_status;
+	int num_scatter = 0;
+	int retries = 0;
+	struct sas_internal *i =
+		to_sas_internal(task->dev->port->ha->core.shost->transportt);
+
+	if (pci_dma_dir != PCI_DMA_NONE) {
+		scatter = kzalloc(sizeof(*scatter), GFP_KERNEL);
+		if (!scatter)
+			goto out;
+
+		sg_init_one(scatter, buffer, size);
+		num_scatter = 1;
+	}
+
+	task->task_proto = task->dev->tproto;
+	task->scatter = scatter;
+	task->num_scatter = num_scatter;
+	task->total_xfer_len = size;
+	task->data_dir = pci_dma_dir;
+	task->task_done = sas_disc_task_done;
+
+	for (retries = 0; retries < 5; retries++) {
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_completion(&task->completion);
+
+		task->timer.data = (unsigned long) task;
+		task->timer.function = sas_task_timedout;
+		task->timer.expires = jiffies + SAS_DEV_TIMEOUT*HZ;
+		add_timer(&task->timer);
+
+		res = i->dft->lldd_execute_task(task, 1, GFP_KERNEL);
+		if (res) {
+			del_timer(&task->timer);
+			SAS_DPRINTK("executing SAS discovery task failed:%d\n",
+				    res);
+			goto ex_err;
+		}
+		wait_for_completion(&task->completion);
+		res = -ETASK;
+		if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+			int res2;
+			SAS_DPRINTK("task aborted, flags:0x%x\n",
+				    task->task_state_flags);
+			res2 = i->dft->lldd_abort_task(task);
+			SAS_DPRINTK("came back from abort task\n");
+			if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+				if (res2 == TMF_RESP_FUNC_COMPLETE)
+					continue; /* Retry the task */
+				else
+					goto ex_err;
+			}
+		}
+		if (task->task_status.stat == SAM_BUSY ||
+			   task->task_status.stat == SAM_TASK_SET_FULL ||
+			   task->task_status.stat == SAS_QUEUE_FULL) {
+			SAS_DPRINTK("task: q busy, sleeping...\n");
+			schedule_timeout_interruptible(HZ);
+		} else if (task->task_status.stat == SAM_CHECK_COND) {
+			struct scsi_sense_hdr shdr;
+
+			if (!scsi_normalize_sense(ts->buf, ts->buf_valid_size,
+						  &shdr)) {
+				SAS_DPRINTK("couldn't normalize sense\n");
+				continue;
+			}
+			if ((shdr.sense_key == 6 && shdr.asc == 0x29) ||
+			    (shdr.sense_key == 2 && shdr.asc == 4 &&
+			     shdr.ascq == 1)) {
+				SAS_DPRINTK("device %016llx LUN: %016llx "
+					    "powering up or not ready yet, "
+					    "sleeping...\n",
+					    SAS_ADDR(task->dev->sas_addr),
+					    SAS_ADDR(task->ssp_task.LUN));
+
+				schedule_timeout_interruptible(5*HZ);
+			} else if (shdr.sense_key == 1) {
+				res = 0;
+				break;
+			} else if (shdr.sense_key == 5) {
+				break;
+			} else {
+				SAS_DPRINTK("dev %016llx LUN: %016llx "
+					    "sense key:0x%x ASC:0x%x ASCQ:0x%x"
+					    "\n",
+					    SAS_ADDR(task->dev->sas_addr),
+					    SAS_ADDR(task->ssp_task.LUN),
+					    shdr.sense_key,
+					    shdr.asc, shdr.ascq);
+			}
+		} else if (task->task_status.resp != SAS_TASK_COMPLETE ||
+			   task->task_status.stat != SAM_GOOD) {
+			SAS_DPRINTK("task finished with resp:0x%x, "
+				    "stat:0x%x\n",
+				    task->task_status.resp,
+				    task->task_status.stat);
+			goto ex_err;
+		} else {
+			res = 0;
+			break;
+		}
+	}
+ex_err:
+	if (pci_dma_dir != PCI_DMA_NONE)
+		kfree(scatter);
+out:
+	return res;
+}
+
+/* ---------- Domain device discovery ---------- */
+
+/**
+ * sas_get_port_device -- Discover devices which caused port creation
+ * @port: pointer to struct sas_port of interest
+ *
+ * Devices directly attached to a HA port, have no parent.  This is
+ * how we know they are (domain) "root" devices.  All other devices
+ * do, and should have their "parent" pointer set appropriately as
+ * soon as a child device is discovered.
+ */
+static int sas_get_port_device(struct asd_sas_port *port)
+{
+	unsigned long flags;
+	struct asd_sas_phy *phy;
+	struct sas_rphy *rphy;
+	struct domain_device *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&port->phy_list_lock, flags);
+	if (list_empty(&port->phy_list)) {
+		spin_unlock_irqrestore(&port->phy_list_lock, flags);
+		kfree(dev);
+		return -ENODEV;
+	}
+	phy = container_of(port->phy_list.next, struct asd_sas_phy, port_phy_el);
+	spin_lock(&phy->frame_rcvd_lock);
+	memcpy(dev->frame_rcvd, phy->frame_rcvd, min(sizeof(dev->frame_rcvd),
+					     (size_t)phy->frame_rcvd_size));
+	spin_unlock(&phy->frame_rcvd_lock);
+	spin_unlock_irqrestore(&port->phy_list_lock, flags);
+
+	if (dev->frame_rcvd[0] == 0x34 && port->oob_mode == SATA_OOB_MODE) {
+		struct dev_to_host_fis *fis =
+			(struct dev_to_host_fis *) dev->frame_rcvd;
+		if (fis->interrupt_reason == 1 && fis->lbal == 1 &&
+		    fis->byte_count_low==0x69 && fis->byte_count_high == 0x96
+		    && (fis->device & ~0x10) == 0)
+			dev->dev_type = SATA_PM;
+		else
+			dev->dev_type = SATA_DEV;
+		dev->tproto = SATA_PROTO;
+	} else {
+		struct sas_identify_frame *id =
+			(struct sas_identify_frame *) dev->frame_rcvd;
+		dev->dev_type = id->dev_type;
+		dev->iproto = id->initiator_bits;
+		dev->tproto = id->target_bits;
+	}
+
+	sas_init_dev(dev);
+
+	switch (dev->dev_type) {
+	case SAS_END_DEV:
+		rphy = sas_end_device_alloc(port->port);
+		break;
+	case EDGE_DEV:
+		rphy = sas_expander_alloc(port->port,
+					  SAS_EDGE_EXPANDER_DEVICE);
+		break;
+	case FANOUT_DEV:
+		rphy = sas_expander_alloc(port->port,
+					  SAS_FANOUT_EXPANDER_DEVICE);
+		break;
+	case SATA_DEV:
+	default:
+		printk("ERROR: Unidentified device type %d\n", dev->dev_type);
+		rphy = NULL;
+		break;
+	}
+
+	if (!rphy) {
+		kfree(dev);
+		return -ENODEV;
+	}
+	rphy->identify.phy_identifier = phy->phy->identify.phy_identifier;
+	memcpy(dev->sas_addr, port->attached_sas_addr, SAS_ADDR_SIZE);
+	sas_fill_in_rphy(dev, rphy);
+	sas_hash_addr(dev->hashed_sas_addr, dev->sas_addr);
+	port->port_dev = dev;
+	dev->port = port;
+	dev->linkrate = port->linkrate;
+	dev->min_linkrate = port->linkrate;
+	dev->max_linkrate = port->linkrate;
+	dev->pathways = port->num_phys;
+	memset(port->disc.fanout_sas_addr, 0, SAS_ADDR_SIZE);
+	memset(port->disc.eeds_a, 0, SAS_ADDR_SIZE);
+	memset(port->disc.eeds_b, 0, SAS_ADDR_SIZE);
+	port->disc.max_level = 0;
+
+	dev->rphy = rphy;
+	spin_lock(&port->dev_list_lock);
+	list_add_tail(&dev->dev_list_node, &port->dev_list);
+	spin_unlock(&port->dev_list_lock);
+
+	return 0;
+}
+
+/* ---------- Discover and Revalidate ---------- */
+
+/* ---------- SATA ---------- */
+
+static void sas_get_ata_command_set(struct domain_device *dev)
+{
+	struct dev_to_host_fis *fis =
+		(struct dev_to_host_fis *) dev->frame_rcvd;
+
+	if ((fis->sector_count == 1 && /* ATA */
+	     fis->lbal         == 1 &&
+	     fis->lbam         == 0 &&
+	     fis->lbah         == 0 &&
+	     fis->device       == 0)
+	    ||
+	    (fis->sector_count == 0 && /* CE-ATA (mATA) */
+	     fis->lbal         == 0 &&
+	     fis->lbam         == 0xCE &&
+	     fis->lbah         == 0xAA &&
+	     (fis->device & ~0x10) == 0))
+
+		dev->sata_dev.command_set = ATA_COMMAND_SET;
+
+	else if ((fis->interrupt_reason == 1 &&	/* ATAPI */
+		  fis->lbal             == 1 &&
+		  fis->byte_count_low   == 0x14 &&
+		  fis->byte_count_high  == 0xEB &&
+		  (fis->device & ~0x10) == 0))
+
+		dev->sata_dev.command_set = ATAPI_COMMAND_SET;
+
+	else if ((fis->sector_count == 1 && /* SEMB */
+		  fis->lbal         == 1 &&
+		  fis->lbam         == 0x3C &&
+		  fis->lbah         == 0xC3 &&
+		  fis->device       == 0)
+		||
+		 (fis->interrupt_reason == 1 &&	/* SATA PM */
+		  fis->lbal             == 1 &&
+		  fis->byte_count_low   == 0x69 &&
+		  fis->byte_count_high  == 0x96 &&
+		  (fis->device & ~0x10) == 0))
+
+		/* Treat it as a superset? */
+		dev->sata_dev.command_set = ATAPI_COMMAND_SET;
+}
+
+/**
+ * sas_issue_ata_cmd -- Basic SATA command processing for discovery
+ * @dev: the device to send the command to
+ * @command: the command register
+ * @features: the features register
+ * @buffer: pointer to buffer to do I/O
+ * @size: size of @buffer
+ * @pci_dma_dir: PCI_DMA_...
+ */
+static int sas_issue_ata_cmd(struct domain_device *dev, u8 command,
+			     u8 features, void *buffer, int size,
+			     int pci_dma_dir)
+{
+	int res = 0;
+	struct sas_task *task;
+	struct dev_to_host_fis *d2h_fis = (struct dev_to_host_fis *)
+		&dev->frame_rcvd[0];
+
+	res = -ENOMEM;
+	task = sas_alloc_task(GFP_KERNEL);
+	if (!task)
+		goto out;
+
+	task->dev = dev;
+
+	task->ata_task.fis.command = command;
+	task->ata_task.fis.features = features;
+	task->ata_task.fis.device = d2h_fis->device;
+	task->ata_task.retry_count = 1;
+
+	res = sas_execute_task(task, buffer, size, pci_dma_dir);
+
+	sas_free_task(task);
+out:
+	return res;
+}
+
+static void sas_sata_propagate_sas_addr(struct domain_device *dev)
+{
+	unsigned long flags;
+	struct asd_sas_port *port = dev->port;
+	struct asd_sas_phy  *phy;
+
+	BUG_ON(dev->parent);
+
+	memcpy(port->attached_sas_addr, dev->sas_addr, SAS_ADDR_SIZE);
+	spin_lock_irqsave(&port->phy_list_lock, flags);
+	list_for_each_entry(phy, &port->phy_list, port_phy_el)
+		memcpy(phy->attached_sas_addr, dev->sas_addr, SAS_ADDR_SIZE);
+	spin_unlock_irqrestore(&port->phy_list_lock, flags);
+}
+
+#define ATA_IDENTIFY_DEV         0xEC
+#define ATA_IDENTIFY_PACKET_DEV  0xA1
+#define ATA_SET_FEATURES         0xEF
+#define ATA_FEATURE_PUP_STBY_SPIN_UP 0x07
+
+/**
+ * sas_discover_sata_dev -- discover a STP/SATA device (SATA_DEV)
+ * @dev: STP/SATA device of interest (ATA/ATAPI)
+ *
+ * The LLDD has already been notified of this device, so that we can
+ * send FISes to it.  Here we try to get IDENTIFY DEVICE or IDENTIFY
+ * PACKET DEVICE, if ATAPI device, so that the LLDD can fine-tune its
+ * performance for this device.
+ */
+static int sas_discover_sata_dev(struct domain_device *dev)
+{
+	int     res;
+	__le16  *identify_x;
+	u8      command;
+
+	identify_x = kzalloc(512, GFP_KERNEL);
+	if (!identify_x)
+		return -ENOMEM;
+
+	if (dev->sata_dev.command_set == ATA_COMMAND_SET) {
+		dev->sata_dev.identify_device = identify_x;
+		command = ATA_IDENTIFY_DEV;
+	} else {
+		dev->sata_dev.identify_packet_device = identify_x;
+		command = ATA_IDENTIFY_PACKET_DEV;
+	}
+
+	res = sas_issue_ata_cmd(dev, command, 0, identify_x, 512,
+				PCI_DMA_FROMDEVICE);
+	if (res)
+		goto out_err;
+
+	/* lives on the media? */
+	if (le16_to_cpu(identify_x[0]) & 4) {
+		/* incomplete response */
+		SAS_DPRINTK("sending SET FEATURE/PUP_STBY_SPIN_UP to "
+			    "dev %llx\n", SAS_ADDR(dev->sas_addr));
+		if (!le16_to_cpu(identify_x[83] & (1<<6)))
+			goto cont1;
+		res = sas_issue_ata_cmd(dev, ATA_SET_FEATURES,
+					ATA_FEATURE_PUP_STBY_SPIN_UP,
+					NULL, 0, PCI_DMA_NONE);
+		if (res)
+			goto cont1;
+
+		schedule_timeout_interruptible(5*HZ); /* More time? */
+		res = sas_issue_ata_cmd(dev, command, 0, identify_x, 512,
+					PCI_DMA_FROMDEVICE);
+		if (res)
+			goto out_err;
+	}
+cont1:
+	/* Get WWN */
+	if (dev->port->oob_mode != SATA_OOB_MODE) {
+		memcpy(dev->sas_addr, dev->sata_dev.rps_resp.rps.stp_sas_addr,
+		       SAS_ADDR_SIZE);
+	} else if (dev->sata_dev.command_set == ATA_COMMAND_SET &&
+		   (le16_to_cpu(dev->sata_dev.identify_device[108]) & 0xF000)
+		   == 0x5000) {
+		int i;
+
+		for (i = 0; i < 4; i++) {
+			dev->sas_addr[2*i] =
+	     (le16_to_cpu(dev->sata_dev.identify_device[108+i]) & 0xFF00) >> 8;
+			dev->sas_addr[2*i+1] =
+	      le16_to_cpu(dev->sata_dev.identify_device[108+i]) & 0x00FF;
+		}
+	}
+	sas_hash_addr(dev->hashed_sas_addr, dev->sas_addr);
+	if (!dev->parent)
+		sas_sata_propagate_sas_addr(dev);
+
+	/* XXX Hint: register this SATA device with SATL.
+	   When this returns, dev->sata_dev->lu is alive and
+	   present.
+	sas_satl_register_dev(dev);
+	*/
+	return 0;
+out_err:
+	dev->sata_dev.identify_packet_device = NULL;
+	dev->sata_dev.identify_device = NULL;
+	kfree(identify_x);
+	return res;
+}
+
+static int sas_discover_sata_pm(struct domain_device *dev)
+{
+	return -ENODEV;
+}
+
+int sas_notify_lldd_dev_found(struct domain_device *dev)
+{
+	int res = 0;
+	struct sas_ha_struct *sas_ha = dev->port->ha;
+	struct Scsi_Host *shost = sas_ha->core.shost;
+	struct sas_internal *i = to_sas_internal(shost->transportt);
+
+	if (i->dft->lldd_dev_found) {
+		res = i->dft->lldd_dev_found(dev);
+		if (res) {
+			printk("sas: driver on pcidev %s cannot handle "
+			       "device %llx, error:%d\n",
+			       pci_name(sas_ha->pcidev),
+			       SAS_ADDR(dev->sas_addr), res);
+		}
+	}
+	return res;
+}
+
+
+void sas_notify_lldd_dev_gone(struct domain_device *dev)
+{
+	struct sas_ha_struct *sas_ha = dev->port->ha;
+	struct Scsi_Host *shost = sas_ha->core.shost;
+	struct sas_internal *i = to_sas_internal(shost->transportt);
+
+	if (i->dft->lldd_dev_gone)
+		i->dft->lldd_dev_gone(dev);
+}
+
+/* ---------- Common/dispatchers ---------- */
+
+/**
+ * sas_discover_sata -- discover an STP/SATA domain device
+ * @dev: pointer to struct domain_device of interest
+ *
+ * First we notify the LLDD of this device, so we can send frames to
+ * it.  Then depending on the type of device we call the appropriate
+ * discover functions.  Once device discover is done, we notify the
+ * LLDD so that it can fine-tune its parameters for the device, by
+ * removing it and then adding it.  That is, the second time around,
+ * the driver would have certain fields, that it is looking at, set.
+ * Finally we initialize the kobj so that the device can be added to
+ * the system at registration time.  Devices directly attached to a HA
+ * port, have no parents.  All other devices do, and should have their
+ * "parent" pointer set appropriately before calling this function.
+ */
+int sas_discover_sata(struct domain_device *dev)
+{
+	int res;
+
+	sas_get_ata_command_set(dev);
+
+	res = sas_notify_lldd_dev_found(dev);
+	if (res)
+		return res;
+
+	switch (dev->dev_type) {
+	case SATA_DEV:
+		res = sas_discover_sata_dev(dev);
+		break;
+	case SATA_PM:
+		res = sas_discover_sata_pm(dev);
+		break;
+	default:
+		break;
+	}
+
+	sas_notify_lldd_dev_gone(dev);
+	if (!res) {
+		sas_notify_lldd_dev_found(dev);
+	}
+	return res;
+}
+
+/**
+ * sas_discover_end_dev -- discover an end device (SSP, etc)
+ * @end: pointer to domain device of interest
+ *
+ * See comment in sas_discover_sata().
+ */
+int sas_discover_end_dev(struct domain_device *dev)
+{
+	int res;
+
+	res = sas_notify_lldd_dev_found(dev);
+	if (res)
+		return res;
+
+	res = sas_rphy_add(dev->rphy);
+	if (res)
+		goto out_err;
+
+	/* do this to get the end device port attributes which will have
+	 * been scanned in sas_rphy_add */
+	sas_notify_lldd_dev_gone(dev);
+	sas_notify_lldd_dev_found(dev);
+
+	return 0;
+
+out_err:
+	sas_notify_lldd_dev_gone(dev);
+	return res;
+}
+
+/* ---------- Device registration and unregistration ---------- */
+
+static inline void sas_unregister_common_dev(struct domain_device *dev)
+{
+	sas_notify_lldd_dev_gone(dev);
+	if (!dev->parent)
+		dev->port->port_dev = NULL;
+	else
+		list_del_init(&dev->siblings);
+	list_del_init(&dev->dev_list_node);
+}
+
+void sas_unregister_dev(struct domain_device *dev)
+{
+	if (dev->rphy) {
+		sas_remove_children(&dev->rphy->dev);
+		sas_rphy_delete(dev->rphy);
+		dev->rphy = NULL;
+	}
+	if (dev->dev_type == EDGE_DEV || dev->dev_type == FANOUT_DEV) {
+		/* remove the phys and ports, everything else should be gone */
+		kfree(dev->ex_dev.ex_phy);
+		dev->ex_dev.ex_phy = NULL;
+	}
+	sas_unregister_common_dev(dev);
+}
+
+void sas_unregister_domain_devices(struct asd_sas_port *port)
+{
+	struct domain_device *dev, *n;
+
+	list_for_each_entry_safe_reverse(dev,n,&port->dev_list,dev_list_node)
+		sas_unregister_dev(dev);
+
+	port->port->rphy = NULL;
+
+}
+
+/* ---------- Discovery and Revalidation ---------- */
+
+/**
+ * sas_discover_domain -- discover the domain
+ * @port: port to the domain of interest
+ *
+ * NOTE: this process _must_ quit (return) as soon as any connection
+ * errors are encountered.  Connection recovery is done elsewhere.
+ * Discover process only interrogates devices in order to discover the
+ * domain.
+ */
+static void sas_discover_domain(void *data)
+{
+	int error = 0;
+	struct asd_sas_port *port = data;
+
+	sas_begin_event(DISCE_DISCOVER_DOMAIN, &port->disc.disc_event_lock,
+			&port->disc.pending);
+
+	if (port->port_dev)
+		return ;
+	else {
+		error = sas_get_port_device(port);
+		if (error)
+			return;
+	}
+
+	SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
+		    current->pid);
+
+	switch (port->port_dev->dev_type) {
+	case SAS_END_DEV:
+		error = sas_discover_end_dev(port->port_dev);
+		break;
+	case EDGE_DEV:
+	case FANOUT_DEV:
+		error = sas_discover_root_expander(port->port_dev);
+		break;
+	case SATA_DEV:
+	case SATA_PM:
+		error = sas_discover_sata(port->port_dev);
+		break;
+	default:
+		SAS_DPRINTK("unhandled device %d\n", port->port_dev->dev_type);
+		break;
+	}
+
+	if (error) {
+		kfree(port->port_dev); /* not kobject_register-ed yet */
+		port->port_dev = NULL;
+	}
+
+	SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
+		    current->pid, error);
+}
+
+static void sas_revalidate_domain(void *data)
+{
+	int res = 0;
+	struct asd_sas_port *port = data;
+
+	sas_begin_event(DISCE_REVALIDATE_DOMAIN, &port->disc.disc_event_lock,
+			&port->disc.pending);
+
+	SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
+		    current->pid);
+	if (port->port_dev)
+		res = sas_ex_revalidate_domain(port->port_dev);
+
+	SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
+		    port->id, current->pid, res);
+}
+
+/* ---------- Events ---------- */
+
+int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
+{
+	struct sas_discovery *disc;
+
+	if (!port)
+		return 0;
+	disc = &port->disc;
+
+	BUG_ON(ev >= DISC_NUM_EVENTS);
+
+	sas_queue_event(ev, &disc->disc_event_lock, &disc->pending,
+			&disc->disc_work[ev], port->ha->core.shost);
+
+	return 0;
+}
+
+/**
+ * sas_init_disc -- initialize the discovery struct in the port
+ * @port: pointer to struct port
+ *
+ * Called when the ports are being initialized.
+ */
+void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
+{
+	int i;
+
+	static void (*sas_event_fns[DISC_NUM_EVENTS])(void *) = {
+		[DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
+		[DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
+	};
+
+	spin_lock_init(&disc->disc_event_lock);
+	disc->pending = 0;
+	for (i = 0; i < DISC_NUM_EVENTS; i++)
+		INIT_WORK(&disc->disc_work[i], sas_event_fns[i], port);
+}
diff --git a/drivers/scsi/libsas/sas_dump.c b/drivers/scsi/libsas/sas_dump.c
new file mode 100644
index 0000000..f1246d2
--- /dev/null
+++ b/drivers/scsi/libsas/sas_dump.c
@@ -0,0 +1,76 @@
+/*
+ * Serial Attached SCSI (SAS) Dump/Debugging routines
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_dump.h"
+
+#ifdef SAS_DEBUG
+
+static const char *sas_hae_str[] = {
+	[0] = "HAE_RESET",
+};
+
+static const char *sas_porte_str[] = {
+	[0] = "PORTE_BYTES_DMAED",
+	[1] = "PORTE_BROADCAST_RCVD",
+	[2] = "PORTE_LINK_RESET_ERR",
+	[3] = "PORTE_TIMER_EVENT",
+	[4] = "PORTE_HARD_RESET",
+};
+
+static const char *sas_phye_str[] = {
+	[0] = "PHYE_LOSS_OF_SIGNAL",
+	[1] = "PHYE_OOB_DONE",
+	[2] = "PHYE_OOB_ERROR",
+	[3] = "PHYE_SPINUP_HOLD",
+};
+
+void sas_dprint_porte(int phyid, enum port_event pe)
+{
+	SAS_DPRINTK("phy%d: port event: %s\n", phyid, sas_porte_str[pe]);
+}
+void sas_dprint_phye(int phyid, enum phy_event pe)
+{
+	SAS_DPRINTK("phy%d: phy event: %s\n", phyid, sas_phye_str[pe]);
+}
+
+void sas_dprint_hae(struct sas_ha_struct *sas_ha, enum ha_event he)
+{
+	SAS_DPRINTK("ha %s: %s event\n", pci_name(sas_ha->pcidev),
+		    sas_hae_str[he]);
+}
+
+void sas_dump_port(struct asd_sas_port *port)
+{
+	SAS_DPRINTK("port%d: class:0x%x\n", port->id, port->class);
+	SAS_DPRINTK("port%d: sas_addr:%llx\n", port->id,
+		    SAS_ADDR(port->sas_addr));
+	SAS_DPRINTK("port%d: attached_sas_addr:%llx\n", port->id,
+		    SAS_ADDR(port->attached_sas_addr));
+	SAS_DPRINTK("port%d: iproto:0x%x\n", port->id, port->iproto);
+	SAS_DPRINTK("port%d: tproto:0x%x\n", port->id, port->tproto);
+	SAS_DPRINTK("port%d: oob_mode:0x%x\n", port->id, port->oob_mode);
+	SAS_DPRINTK("port%d: num_phys:%d\n", port->id, port->num_phys);
+}
+
+#endif /* SAS_DEBUG */
diff --git a/drivers/scsi/libsas/sas_dump.h b/drivers/scsi/libsas/sas_dump.h
new file mode 100644
index 0000000..47b45d4
--- /dev/null
+++ b/drivers/scsi/libsas/sas_dump.h
@@ -0,0 +1,42 @@
+/*
+ * Serial Attached SCSI (SAS) Dump/Debugging routines header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_internal.h"
+
+#ifdef SAS_DEBUG
+
+void sas_dprint_porte(int phyid, enum port_event pe);
+void sas_dprint_phye(int phyid, enum phy_event pe);
+void sas_dprint_hae(struct sas_ha_struct *sas_ha, enum ha_event he);
+void sas_dump_port(struct asd_sas_port *port);
+
+#else /* SAS_DEBUG */
+
+static inline void sas_dprint_porte(int phyid, enum port_event pe) { }
+static inline void sas_dprint_phye(int phyid, enum phy_event pe) { }
+static inline void sas_dprint_hae(struct sas_ha_struct *sas_ha,
+				  enum ha_event he) { }
+static inline void sas_dump_port(struct asd_sas_port *port) { }
+
+#endif /* SAS_DEBUG */
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
new file mode 100644
index 0000000..19110ed
--- /dev/null
+++ b/drivers/scsi/libsas/sas_event.c
@@ -0,0 +1,75 @@
+/*
+ * Serial Attached SCSI (SAS) Event processing
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <scsi/scsi_host.h>
+#include "sas_internal.h"
+#include "sas_dump.h"
+
+static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event)
+{
+	BUG_ON(event >= HA_NUM_EVENTS);
+
+	sas_queue_event(event, &sas_ha->event_lock, &sas_ha->pending,
+			&sas_ha->ha_events[event], sas_ha->core.shost);
+}
+
+static void notify_port_event(struct asd_sas_phy *phy, enum port_event event)
+{
+	struct sas_ha_struct *ha = phy->ha;
+
+	BUG_ON(event >= PORT_NUM_EVENTS);
+
+	sas_queue_event(event, &ha->event_lock, &phy->port_events_pending,
+			&phy->port_events[event], ha->core.shost);
+}
+
+static void notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
+{
+	struct sas_ha_struct *ha = phy->ha;
+
+	BUG_ON(event >= PHY_NUM_EVENTS);
+
+	sas_queue_event(event, &ha->event_lock, &phy->phy_events_pending,
+			&phy->phy_events[event], ha->core.shost);
+}
+
+int sas_init_events(struct sas_ha_struct *sas_ha)
+{
+	static void (*sas_ha_event_fns[HA_NUM_EVENTS])(void *) = {
+		[HAE_RESET] = sas_hae_reset,
+	};
+
+	int i;
+
+	spin_lock_init(&sas_ha->event_lock);
+
+	for (i = 0; i < HA_NUM_EVENTS; i++)
+		INIT_WORK(&sas_ha->ha_events[i], sas_ha_event_fns[i], sas_ha);
+
+	sas_ha->notify_ha_event = notify_ha_event;
+	sas_ha->notify_port_event = notify_port_event;
+	sas_ha->notify_phy_event = notify_phy_event;
+
+	return 0;
+}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
new file mode 100644
index 0000000..30b8014
--- /dev/null
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -0,0 +1,1855 @@
+/*
+ * Serial Attached SCSI (SAS) Expander discovery and configuration
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+
+#include "sas_internal.h"
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+static int sas_discover_expander(struct domain_device *dev);
+static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr);
+static int sas_configure_phy(struct domain_device *dev, int phy_id,
+			     u8 *sas_addr, int include);
+static int sas_disable_routing(struct domain_device *dev,  u8 *sas_addr);
+
+#if 0
+/* FIXME: smp needs to migrate into the sas class */
+static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t);
+static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t);
+#endif
+
+/* ---------- SMP task management ---------- */
+
+static void smp_task_timedout(unsigned long _task)
+{
+	struct sas_task *task = (void *) _task;
+	unsigned long flags;
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (!(task->task_state_flags & SAS_TASK_STATE_DONE))
+		task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	complete(&task->completion);
+}
+
+static void smp_task_done(struct sas_task *task)
+{
+	if (!del_timer(&task->timer))
+		return;
+	complete(&task->completion);
+}
+
+/* Give it some long enough timeout. In seconds. */
+#define SMP_TIMEOUT 10
+
+static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
+			    void *resp, int resp_size)
+{
+	int res;
+	struct sas_task *task = sas_alloc_task(GFP_KERNEL);
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->core.shost->transportt);
+
+	if (!task)
+		return -ENOMEM;
+
+	task->dev = dev;
+	task->task_proto = dev->tproto;
+	sg_init_one(&task->smp_task.smp_req, req, req_size);
+	sg_init_one(&task->smp_task.smp_resp, resp, resp_size);
+
+	task->task_done = smp_task_done;
+
+	task->timer.data = (unsigned long) task;
+	task->timer.function = smp_task_timedout;
+	task->timer.expires = jiffies + SMP_TIMEOUT*HZ;
+	add_timer(&task->timer);
+
+	res = i->dft->lldd_execute_task(task, 1, GFP_KERNEL);
+
+	if (res) {
+		del_timer(&task->timer);
+		SAS_DPRINTK("executing SMP task failed:%d\n", res);
+		goto ex_err;
+	}
+
+	wait_for_completion(&task->completion);
+	res = -ETASK;
+	if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
+		SAS_DPRINTK("smp task timed out or aborted\n");
+		i->dft->lldd_abort_task(task);
+		if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+			SAS_DPRINTK("SMP task aborted and not done\n");
+			goto ex_err;
+		}
+	}
+	if (task->task_status.resp == SAS_TASK_COMPLETE &&
+	    task->task_status.stat == SAM_GOOD)
+		res = 0;
+	else
+		SAS_DPRINTK("%s: task to dev %016llx response: 0x%x "
+			    "status 0x%x\n", __FUNCTION__,
+			    SAS_ADDR(dev->sas_addr),
+			    task->task_status.resp,
+			    task->task_status.stat);
+ex_err:
+	sas_free_task(task);
+	return res;
+}
+
+/* ---------- Allocations ---------- */
+
+static inline void *alloc_smp_req(int size)
+{
+	u8 *p = kzalloc(size, GFP_KERNEL);
+	if (p)
+		p[0] = SMP_REQUEST;
+	return p;
+}
+
+static inline void *alloc_smp_resp(int size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+/* ---------- Expander configuration ---------- */
+
+static void sas_set_ex_phy(struct domain_device *dev, int phy_id,
+			   void *disc_resp)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+	struct smp_resp *resp = disc_resp;
+	struct discover_resp *dr = &resp->disc;
+	struct sas_rphy *rphy = dev->rphy;
+	int rediscover = (phy->phy != NULL);
+
+	if (!rediscover) {
+		phy->phy = sas_phy_alloc(&rphy->dev, phy_id);
+
+		/* FIXME: error_handling */
+		BUG_ON(!phy->phy);
+	}
+
+	switch (resp->result) {
+	case SMP_RESP_PHY_VACANT:
+		phy->phy_state = PHY_VACANT;
+		return;
+	default:
+		phy->phy_state = PHY_NOT_PRESENT;
+		return;
+	case SMP_RESP_FUNC_ACC:
+		phy->phy_state = PHY_EMPTY; /* do not know yet */
+		break;
+	}
+
+	phy->phy_id = phy_id;
+	phy->attached_dev_type = dr->attached_dev_type;
+	phy->linkrate = dr->linkrate;
+	phy->attached_sata_host = dr->attached_sata_host;
+	phy->attached_sata_dev  = dr->attached_sata_dev;
+	phy->attached_sata_ps   = dr->attached_sata_ps;
+	phy->attached_iproto = dr->iproto << 1;
+	phy->attached_tproto = dr->tproto << 1;
+	memcpy(phy->attached_sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE);
+	phy->attached_phy_id = dr->attached_phy_id;
+	phy->phy_change_count = dr->change_count;
+	phy->routing_attr = dr->routing_attr;
+	phy->virtual = dr->virtual;
+	phy->last_da_index = -1;
+
+	phy->phy->identify.initiator_port_protocols = phy->attached_iproto;
+	phy->phy->identify.target_port_protocols = phy->attached_tproto;
+	phy->phy->identify.phy_identifier = phy_id;
+	phy->phy->minimum_linkrate_hw = dr->hmin_linkrate;
+	phy->phy->maximum_linkrate_hw = dr->hmax_linkrate;
+	phy->phy->minimum_linkrate = dr->pmin_linkrate;
+	phy->phy->maximum_linkrate = dr->pmax_linkrate;
+	phy->phy->negotiated_linkrate = phy->linkrate;
+
+	if (!rediscover)
+		sas_phy_add(phy->phy);
+
+	SAS_DPRINTK("ex %016llx phy%02d:%c attached: %016llx\n",
+		    SAS_ADDR(dev->sas_addr), phy->phy_id,
+		    phy->routing_attr == TABLE_ROUTING ? 'T' :
+		    phy->routing_attr == DIRECT_ROUTING ? 'D' :
+		    phy->routing_attr == SUBTRACTIVE_ROUTING ? 'S' : '?',
+		    SAS_ADDR(phy->attached_sas_addr));
+
+	return;
+}
+
+#define DISCOVER_REQ_SIZE  16
+#define DISCOVER_RESP_SIZE 56
+
+static int sas_ex_phy_discover(struct domain_device *dev, int single)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int  res = 0;
+	u8   *disc_req;
+	u8   *disc_resp;
+
+	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	if (!disc_req)
+		return -ENOMEM;
+
+	disc_resp = alloc_smp_req(DISCOVER_RESP_SIZE);
+	if (!disc_resp) {
+		kfree(disc_req);
+		return -ENOMEM;
+	}
+
+	disc_req[1] = SMP_DISCOVER;
+
+	if (0 <= single && single < ex->num_phys) {
+		disc_req[9] = single;
+		res = smp_execute_task(dev, disc_req, DISCOVER_REQ_SIZE,
+				       disc_resp, DISCOVER_RESP_SIZE);
+		if (res)
+			goto out_err;
+		sas_set_ex_phy(dev, single, disc_resp);
+	} else {
+		int i;
+
+		for (i = 0; i < ex->num_phys; i++) {
+			disc_req[9] = i;
+			res = smp_execute_task(dev, disc_req,
+					       DISCOVER_REQ_SIZE, disc_resp,
+					       DISCOVER_RESP_SIZE);
+			if (res)
+				goto out_err;
+			sas_set_ex_phy(dev, i, disc_resp);
+		}
+	}
+out_err:
+	kfree(disc_resp);
+	kfree(disc_req);
+	return res;
+}
+
+static int sas_expander_discover(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int res = -ENOMEM;
+
+	ex->ex_phy = kzalloc(sizeof(*ex->ex_phy)*ex->num_phys, GFP_KERNEL);
+	if (!ex->ex_phy)
+		return -ENOMEM;
+
+	res = sas_ex_phy_discover(dev, -1);
+	if (res)
+		goto out_err;
+
+	return 0;
+ out_err:
+	kfree(ex->ex_phy);
+	ex->ex_phy = NULL;
+	return res;
+}
+
+#define MAX_EXPANDER_PHYS 128
+
+static void ex_assign_report_general(struct domain_device *dev,
+					    struct smp_resp *resp)
+{
+	struct report_general_resp *rg = &resp->rg;
+
+	dev->ex_dev.ex_change_count = be16_to_cpu(rg->change_count);
+	dev->ex_dev.max_route_indexes = be16_to_cpu(rg->route_indexes);
+	dev->ex_dev.num_phys = min(rg->num_phys, (u8)MAX_EXPANDER_PHYS);
+	dev->ex_dev.conf_route_table = rg->conf_route_table;
+	dev->ex_dev.configuring = rg->configuring;
+	memcpy(dev->ex_dev.enclosure_logical_id, rg->enclosure_logical_id, 8);
+}
+
+#define RG_REQ_SIZE   8
+#define RG_RESP_SIZE 32
+
+static int sas_ex_general(struct domain_device *dev)
+{
+	u8 *rg_req;
+	struct smp_resp *rg_resp;
+	int res;
+	int i;
+
+	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	if (!rg_req)
+		return -ENOMEM;
+
+	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
+	if (!rg_resp) {
+		kfree(rg_req);
+		return -ENOMEM;
+	}
+
+	rg_req[1] = SMP_REPORT_GENERAL;
+
+	for (i = 0; i < 5; i++) {
+		res = smp_execute_task(dev, rg_req, RG_REQ_SIZE, rg_resp,
+				       RG_RESP_SIZE);
+
+		if (res) {
+			SAS_DPRINTK("RG to ex %016llx failed:0x%x\n",
+				    SAS_ADDR(dev->sas_addr), res);
+			goto out;
+		} else if (rg_resp->result != SMP_RESP_FUNC_ACC) {
+			SAS_DPRINTK("RG:ex %016llx returned SMP result:0x%x\n",
+				    SAS_ADDR(dev->sas_addr), rg_resp->result);
+			res = rg_resp->result;
+			goto out;
+		}
+
+		ex_assign_report_general(dev, rg_resp);
+
+		if (dev->ex_dev.configuring) {
+			SAS_DPRINTK("RG: ex %llx self-configuring...\n",
+				    SAS_ADDR(dev->sas_addr));
+			schedule_timeout_interruptible(5*HZ);
+		} else
+			break;
+	}
+out:
+	kfree(rg_req);
+	kfree(rg_resp);
+	return res;
+}
+
+static void ex_assign_manuf_info(struct domain_device *dev, void
+					*_mi_resp)
+{
+	u8 *mi_resp = _mi_resp;
+	struct sas_rphy *rphy = dev->rphy;
+	struct sas_expander_device *edev = rphy_to_expander_device(rphy);
+
+	memcpy(edev->vendor_id, mi_resp + 12, SAS_EXPANDER_VENDOR_ID_LEN);
+	memcpy(edev->product_id, mi_resp + 20, SAS_EXPANDER_PRODUCT_ID_LEN);
+	memcpy(edev->product_rev, mi_resp + 36,
+	       SAS_EXPANDER_PRODUCT_REV_LEN);
+
+	if (mi_resp[8] & 1) {
+		memcpy(edev->component_vendor_id, mi_resp + 40,
+		       SAS_EXPANDER_COMPONENT_VENDOR_ID_LEN);
+		edev->component_id = mi_resp[48] << 8 | mi_resp[49];
+		edev->component_revision_id = mi_resp[50];
+	}
+}
+
+#define MI_REQ_SIZE   8
+#define MI_RESP_SIZE 64
+
+static int sas_ex_manuf_info(struct domain_device *dev)
+{
+	u8 *mi_req;
+	u8 *mi_resp;
+	int res;
+
+	mi_req = alloc_smp_req(MI_REQ_SIZE);
+	if (!mi_req)
+		return -ENOMEM;
+
+	mi_resp = alloc_smp_resp(MI_RESP_SIZE);
+	if (!mi_resp) {
+		kfree(mi_req);
+		return -ENOMEM;
+	}
+
+	mi_req[1] = SMP_REPORT_MANUF_INFO;
+
+	res = smp_execute_task(dev, mi_req, MI_REQ_SIZE, mi_resp,MI_RESP_SIZE);
+	if (res) {
+		SAS_DPRINTK("MI: ex %016llx failed:0x%x\n",
+			    SAS_ADDR(dev->sas_addr), res);
+		goto out;
+	} else if (mi_resp[2] != SMP_RESP_FUNC_ACC) {
+		SAS_DPRINTK("MI ex %016llx returned SMP result:0x%x\n",
+			    SAS_ADDR(dev->sas_addr), mi_resp[2]);
+		goto out;
+	}
+
+	ex_assign_manuf_info(dev, mi_resp);
+out:
+	kfree(mi_req);
+	kfree(mi_resp);
+	return res;
+}
+
+#define PC_REQ_SIZE  44
+#define PC_RESP_SIZE 8
+
+int sas_smp_phy_control(struct domain_device *dev, int phy_id,
+			enum phy_func phy_func,
+			struct sas_phy_linkrates *rates)
+{
+	u8 *pc_req;
+	u8 *pc_resp;
+	int res;
+
+	pc_req = alloc_smp_req(PC_REQ_SIZE);
+	if (!pc_req)
+		return -ENOMEM;
+
+	pc_resp = alloc_smp_resp(PC_RESP_SIZE);
+	if (!pc_resp) {
+		kfree(pc_req);
+		return -ENOMEM;
+	}
+
+	pc_req[1] = SMP_PHY_CONTROL;
+	pc_req[9] = phy_id;
+	pc_req[10]= phy_func;
+	if (rates) {
+		pc_req[32] = rates->minimum_linkrate << 4;
+		pc_req[33] = rates->maximum_linkrate << 4;
+	}
+
+	res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE);
+
+	kfree(pc_resp);
+	kfree(pc_req);
+	return res;
+}
+
+static void sas_ex_disable_phy(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+
+	sas_smp_phy_control(dev, phy_id, PHY_FUNC_DISABLE, NULL);
+	phy->linkrate = SAS_PHY_DISABLED;
+}
+
+static void sas_ex_disable_port(struct domain_device *dev, u8 *sas_addr)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i;
+
+	for (i = 0; i < ex->num_phys; i++) {
+		struct ex_phy *phy = &ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if (SAS_ADDR(phy->attached_sas_addr) == SAS_ADDR(sas_addr))
+			sas_ex_disable_phy(dev, i);
+	}
+}
+
+static int sas_dev_present_in_domain(struct asd_sas_port *port,
+					    u8 *sas_addr)
+{
+	struct domain_device *dev;
+
+	if (SAS_ADDR(port->sas_addr) == SAS_ADDR(sas_addr))
+		return 1;
+	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+		if (SAS_ADDR(dev->sas_addr) == SAS_ADDR(sas_addr))
+			return 1;
+	}
+	return 0;
+}
+
+#define RPEL_REQ_SIZE	16
+#define RPEL_RESP_SIZE	32
+int sas_smp_get_phy_events(struct sas_phy *phy)
+{
+	int res;
+	struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
+	struct domain_device *dev = sas_find_dev_by_rphy(rphy);
+	u8 *req = alloc_smp_req(RPEL_REQ_SIZE);
+	u8 *resp = kzalloc(RPEL_RESP_SIZE, GFP_KERNEL);
+
+	if (!resp)
+		return -ENOMEM;
+
+	req[1] = SMP_REPORT_PHY_ERR_LOG;
+	req[9] = phy->number;
+
+	res = smp_execute_task(dev, req, RPEL_REQ_SIZE,
+			            resp, RPEL_RESP_SIZE);
+
+	if (!res)
+		goto out;
+
+	phy->invalid_dword_count = scsi_to_u32(&resp[12]);
+	phy->running_disparity_error_count = scsi_to_u32(&resp[16]);
+	phy->loss_of_dword_sync_count = scsi_to_u32(&resp[20]);
+	phy->phy_reset_problem_count = scsi_to_u32(&resp[24]);
+
+ out:
+	kfree(resp);
+	return res;
+
+}
+
+#define RPS_REQ_SIZE  16
+#define RPS_RESP_SIZE 60
+
+static int sas_get_report_phy_sata(struct domain_device *dev,
+					  int phy_id,
+					  struct smp_resp *rps_resp)
+{
+	int res;
+	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE);
+
+	if (!rps_req)
+		return -ENOMEM;
+
+	rps_req[1] = SMP_REPORT_PHY_SATA;
+	rps_req[9] = phy_id;
+
+	res = smp_execute_task(dev, rps_req, RPS_REQ_SIZE,
+			            rps_resp, RPS_RESP_SIZE);
+
+	kfree(rps_req);
+	return 0;
+}
+
+static void sas_ex_get_linkrate(struct domain_device *parent,
+				       struct domain_device *child,
+				       struct ex_phy *parent_phy)
+{
+	struct expander_device *parent_ex = &parent->ex_dev;
+	struct sas_port *port;
+	int i;
+
+	child->pathways = 0;
+
+	port = parent_phy->port;
+
+	for (i = 0; i < parent_ex->num_phys; i++) {
+		struct ex_phy *phy = &parent_ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if (SAS_ADDR(phy->attached_sas_addr) ==
+		    SAS_ADDR(child->sas_addr)) {
+
+			child->min_linkrate = min(parent->min_linkrate,
+						  phy->linkrate);
+			child->max_linkrate = max(parent->max_linkrate,
+						  phy->linkrate);
+			child->pathways++;
+			sas_port_add_phy(port, phy->phy);
+		}
+	}
+	child->linkrate = min(parent_phy->linkrate, child->max_linkrate);
+	child->pathways = min(child->pathways, parent->pathways);
+}
+
+static struct domain_device *sas_ex_discover_end_dev(
+	struct domain_device *parent, int phy_id)
+{
+	struct expander_device *parent_ex = &parent->ex_dev;
+	struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
+	struct domain_device *child = NULL;
+	struct sas_rphy *rphy;
+	int res;
+
+	if (phy->attached_sata_host || phy->attached_sata_ps)
+		return NULL;
+
+	child = kzalloc(sizeof(*child), GFP_KERNEL);
+	if (!child)
+		return NULL;
+
+	child->parent = parent;
+	child->port   = parent->port;
+	child->iproto = phy->attached_iproto;
+	memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE);
+	sas_hash_addr(child->hashed_sas_addr, child->sas_addr);
+	phy->port = sas_port_alloc(&parent->rphy->dev, phy_id);
+	BUG_ON(!phy->port);
+	/* FIXME: better error handling*/
+	BUG_ON(sas_port_add(phy->port) != 0);
+	sas_ex_get_linkrate(parent, child, phy);
+
+	if ((phy->attached_tproto & SAS_PROTO_STP) || phy->attached_sata_dev) {
+		child->dev_type = SATA_DEV;
+		if (phy->attached_tproto & SAS_PROTO_STP)
+			child->tproto = phy->attached_tproto;
+		if (phy->attached_sata_dev)
+			child->tproto |= SATA_DEV;
+		res = sas_get_report_phy_sata(parent, phy_id,
+					      &child->sata_dev.rps_resp);
+		if (res) {
+			SAS_DPRINTK("report phy sata to %016llx:0x%x returned "
+				    "0x%x\n", SAS_ADDR(parent->sas_addr),
+				    phy_id, res);
+			kfree(child);
+			return NULL;
+		}
+		memcpy(child->frame_rcvd, &child->sata_dev.rps_resp.rps.fis,
+		       sizeof(struct dev_to_host_fis));
+		sas_init_dev(child);
+		res = sas_discover_sata(child);
+		if (res) {
+			SAS_DPRINTK("sas_discover_sata() for device %16llx at "
+				    "%016llx:0x%x returned 0x%x\n",
+				    SAS_ADDR(child->sas_addr),
+				    SAS_ADDR(parent->sas_addr), phy_id, res);
+			kfree(child);
+			return NULL;
+		}
+	} else if (phy->attached_tproto & SAS_PROTO_SSP) {
+		child->dev_type = SAS_END_DEV;
+		rphy = sas_end_device_alloc(phy->port);
+		/* FIXME: error handling */
+		BUG_ON(!rphy);
+		child->tproto = phy->attached_tproto;
+		sas_init_dev(child);
+
+		child->rphy = rphy;
+		sas_fill_in_rphy(child, rphy);
+
+		spin_lock(&parent->port->dev_list_lock);
+		list_add_tail(&child->dev_list_node, &parent->port->dev_list);
+		spin_unlock(&parent->port->dev_list_lock);
+
+		res = sas_discover_end_dev(child);
+		if (res) {
+			SAS_DPRINTK("sas_discover_end_dev() for device %16llx "
+				    "at %016llx:0x%x returned 0x%x\n",
+				    SAS_ADDR(child->sas_addr),
+				    SAS_ADDR(parent->sas_addr), phy_id, res);
+			/* FIXME: this kfrees list elements without removing them */
+			//kfree(child);
+			return NULL;
+		}
+	} else {
+		SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
+			    phy->attached_tproto, SAS_ADDR(parent->sas_addr),
+			    phy_id);
+	}
+
+	list_add_tail(&child->siblings, &parent_ex->children);
+	return child;
+}
+
+static struct domain_device *sas_ex_discover_expander(
+	struct domain_device *parent, int phy_id)
+{
+	struct sas_expander_device *parent_ex = rphy_to_expander_device(parent->rphy);
+	struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id];
+	struct domain_device *child = NULL;
+	struct sas_rphy *rphy;
+	struct sas_expander_device *edev;
+	struct asd_sas_port *port;
+	int res;
+
+	if (phy->routing_attr == DIRECT_ROUTING) {
+		SAS_DPRINTK("ex %016llx:0x%x:D <--> ex %016llx:0x%x is not "
+			    "allowed\n",
+			    SAS_ADDR(parent->sas_addr), phy_id,
+			    SAS_ADDR(phy->attached_sas_addr),
+			    phy->attached_phy_id);
+		return NULL;
+	}
+	child = kzalloc(sizeof(*child), GFP_KERNEL);
+	if (!child)
+		return NULL;
+
+	phy->port = sas_port_alloc(&parent->rphy->dev, phy_id);
+	/* FIXME: better error handling */
+	BUG_ON(sas_port_add(phy->port) != 0);
+
+
+	switch (phy->attached_dev_type) {
+	case EDGE_DEV:
+		rphy = sas_expander_alloc(phy->port,
+					  SAS_EDGE_EXPANDER_DEVICE);
+		break;
+	case FANOUT_DEV:
+		rphy = sas_expander_alloc(phy->port,
+					  SAS_FANOUT_EXPANDER_DEVICE);
+		break;
+	default:
+		rphy = NULL;	/* shut gcc up */
+		BUG();
+	}
+	port = parent->port;
+	child->rphy = rphy;
+	edev = rphy_to_expander_device(rphy);
+	child->dev_type = phy->attached_dev_type;
+	child->parent = parent;
+	child->port = port;
+	child->iproto = phy->attached_iproto;
+	child->tproto = phy->attached_tproto;
+	memcpy(child->sas_addr, phy->attached_sas_addr, SAS_ADDR_SIZE);
+	sas_hash_addr(child->hashed_sas_addr, child->sas_addr);
+	sas_ex_get_linkrate(parent, child, phy);
+	edev->level = parent_ex->level + 1;
+	parent->port->disc.max_level = max(parent->port->disc.max_level,
+					   edev->level);
+	sas_init_dev(child);
+	sas_fill_in_rphy(child, rphy);
+	sas_rphy_add(rphy);
+
+	spin_lock(&parent->port->dev_list_lock);
+	list_add_tail(&child->dev_list_node, &parent->port->dev_list);
+	spin_unlock(&parent->port->dev_list_lock);
+
+	res = sas_discover_expander(child);
+	if (res) {
+		kfree(child);
+		return NULL;
+	}
+	list_add_tail(&child->siblings, &parent->ex_dev.children);
+	return child;
+}
+
+static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *ex_phy = &ex->ex_phy[phy_id];
+	struct domain_device *child = NULL;
+	int res = 0;
+
+	/* Phy state */
+	if (ex_phy->linkrate == SAS_SATA_SPINUP_HOLD) {
+		if (!sas_smp_phy_control(dev, phy_id, PHY_FUNC_LINK_RESET, NULL))
+			res = sas_ex_phy_discover(dev, phy_id);
+		if (res)
+			return res;
+	}
+
+	/* Parent and domain coherency */
+	if (!dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) ==
+			     SAS_ADDR(dev->port->sas_addr))) {
+		sas_add_parent_port(dev, phy_id);
+		return 0;
+	}
+	if (dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) ==
+			    SAS_ADDR(dev->parent->sas_addr))) {
+		sas_add_parent_port(dev, phy_id);
+		if (ex_phy->routing_attr == TABLE_ROUTING)
+			sas_configure_phy(dev, phy_id, dev->port->sas_addr, 1);
+		return 0;
+	}
+
+	if (sas_dev_present_in_domain(dev->port, ex_phy->attached_sas_addr))
+		sas_ex_disable_port(dev, ex_phy->attached_sas_addr);
+
+	if (ex_phy->attached_dev_type == NO_DEVICE) {
+		if (ex_phy->routing_attr == DIRECT_ROUTING) {
+			memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
+			sas_configure_routing(dev, ex_phy->attached_sas_addr);
+		}
+		return 0;
+	} else if (ex_phy->linkrate == SAS_LINK_RATE_UNKNOWN)
+		return 0;
+
+	if (ex_phy->attached_dev_type != SAS_END_DEV &&
+	    ex_phy->attached_dev_type != FANOUT_DEV &&
+	    ex_phy->attached_dev_type != EDGE_DEV) {
+		SAS_DPRINTK("unknown device type(0x%x) attached to ex %016llx "
+			    "phy 0x%x\n", ex_phy->attached_dev_type,
+			    SAS_ADDR(dev->sas_addr),
+			    phy_id);
+		return 0;
+	}
+
+	res = sas_configure_routing(dev, ex_phy->attached_sas_addr);
+	if (res) {
+		SAS_DPRINTK("configure routing for dev %016llx "
+			    "reported 0x%x. Forgotten\n",
+			    SAS_ADDR(ex_phy->attached_sas_addr), res);
+		sas_disable_routing(dev, ex_phy->attached_sas_addr);
+		return res;
+	}
+
+	switch (ex_phy->attached_dev_type) {
+	case SAS_END_DEV:
+		child = sas_ex_discover_end_dev(dev, phy_id);
+		break;
+	case FANOUT_DEV:
+		if (SAS_ADDR(dev->port->disc.fanout_sas_addr)) {
+			SAS_DPRINTK("second fanout expander %016llx phy 0x%x "
+				    "attached to ex %016llx phy 0x%x\n",
+				    SAS_ADDR(ex_phy->attached_sas_addr),
+				    ex_phy->attached_phy_id,
+				    SAS_ADDR(dev->sas_addr),
+				    phy_id);
+			sas_ex_disable_phy(dev, phy_id);
+			break;
+		} else
+			memcpy(dev->port->disc.fanout_sas_addr,
+			       ex_phy->attached_sas_addr, SAS_ADDR_SIZE);
+		/* fallthrough */
+	case EDGE_DEV:
+		child = sas_ex_discover_expander(dev, phy_id);
+		break;
+	default:
+		break;
+	}
+
+	if (child) {
+		int i;
+
+		for (i = 0; i < ex->num_phys; i++) {
+			if (ex->ex_phy[i].phy_state == PHY_VACANT ||
+			    ex->ex_phy[i].phy_state == PHY_NOT_PRESENT)
+				continue;
+
+			if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) ==
+			    SAS_ADDR(child->sas_addr))
+				ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED;
+		}
+	}
+
+	return res;
+}
+
+static int sas_find_sub_addr(struct domain_device *dev, u8 *sub_addr)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i;
+
+	for (i = 0; i < ex->num_phys; i++) {
+		struct ex_phy *phy = &ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if ((phy->attached_dev_type == EDGE_DEV ||
+		     phy->attached_dev_type == FANOUT_DEV) &&
+		    phy->routing_attr == SUBTRACTIVE_ROUTING) {
+
+			memcpy(sub_addr, phy->attached_sas_addr,SAS_ADDR_SIZE);
+
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int sas_check_level_subtractive_boundary(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct domain_device *child;
+	u8 sub_addr[8] = {0, };
+
+	list_for_each_entry(child, &ex->children, siblings) {
+		if (child->dev_type != EDGE_DEV &&
+		    child->dev_type != FANOUT_DEV)
+			continue;
+		if (sub_addr[0] == 0) {
+			sas_find_sub_addr(child, sub_addr);
+			continue;
+		} else {
+			u8 s2[8];
+
+			if (sas_find_sub_addr(child, s2) &&
+			    (SAS_ADDR(sub_addr) != SAS_ADDR(s2))) {
+
+				SAS_DPRINTK("ex %016llx->%016llx-?->%016llx "
+					    "diverges from subtractive "
+					    "boundary %016llx\n",
+					    SAS_ADDR(dev->sas_addr),
+					    SAS_ADDR(child->sas_addr),
+					    SAS_ADDR(s2),
+					    SAS_ADDR(sub_addr));
+
+				sas_ex_disable_port(child, s2);
+			}
+		}
+	}
+	return 0;
+}
+/**
+ * sas_ex_discover_devices -- discover devices attached to this expander
+ * dev: pointer to the expander domain device
+ * single: if you want to do a single phy, else set to -1;
+ *
+ * Configure this expander for use with its devices and register the
+ * devices of this expander.
+ */
+static int sas_ex_discover_devices(struct domain_device *dev, int single)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i = 0, end = ex->num_phys;
+	int res = 0;
+
+	if (0 <= single && single < end) {
+		i = single;
+		end = i+1;
+	}
+
+	for ( ; i < end; i++) {
+		struct ex_phy *ex_phy = &ex->ex_phy[i];
+
+		if (ex_phy->phy_state == PHY_VACANT ||
+		    ex_phy->phy_state == PHY_NOT_PRESENT ||
+		    ex_phy->phy_state == PHY_DEVICE_DISCOVERED)
+			continue;
+
+		switch (ex_phy->linkrate) {
+		case SAS_PHY_DISABLED:
+		case SAS_PHY_RESET_PROBLEM:
+		case SAS_SATA_PORT_SELECTOR:
+			continue;
+		default:
+			res = sas_ex_discover_dev(dev, i);
+			if (res)
+				break;
+			continue;
+		}
+	}
+
+	if (!res)
+		sas_check_level_subtractive_boundary(dev);
+
+	return res;
+}
+
+static int sas_check_ex_subtractive_boundary(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int i;
+	u8  *sub_sas_addr = NULL;
+
+	if (dev->dev_type != EDGE_DEV)
+		return 0;
+
+	for (i = 0; i < ex->num_phys; i++) {
+		struct ex_phy *phy = &ex->ex_phy[i];
+
+		if (phy->phy_state == PHY_VACANT ||
+		    phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if ((phy->attached_dev_type == FANOUT_DEV ||
+		     phy->attached_dev_type == EDGE_DEV) &&
+		    phy->routing_attr == SUBTRACTIVE_ROUTING) {
+
+			if (!sub_sas_addr)
+				sub_sas_addr = &phy->attached_sas_addr[0];
+			else if (SAS_ADDR(sub_sas_addr) !=
+				 SAS_ADDR(phy->attached_sas_addr)) {
+
+				SAS_DPRINTK("ex %016llx phy 0x%x "
+					    "diverges(%016llx) on subtractive "
+					    "boundary(%016llx). Disabled\n",
+					    SAS_ADDR(dev->sas_addr), i,
+					    SAS_ADDR(phy->attached_sas_addr),
+					    SAS_ADDR(sub_sas_addr));
+				sas_ex_disable_phy(dev, i);
+			}
+		}
+	}
+	return 0;
+}
+
+static void sas_print_parent_topology_bug(struct domain_device *child,
+						 struct ex_phy *parent_phy,
+						 struct ex_phy *child_phy)
+{
+	static const char ra_char[] = {
+		[DIRECT_ROUTING] = 'D',
+		[SUBTRACTIVE_ROUTING] = 'S',
+		[TABLE_ROUTING] = 'T',
+	};
+	static const char *ex_type[] = {
+		[EDGE_DEV] = "edge",
+		[FANOUT_DEV] = "fanout",
+	};
+	struct domain_device *parent = child->parent;
+
+	sas_printk("%s ex %016llx phy 0x%x <--> %s ex %016llx phy 0x%x "
+		   "has %c:%c routing link!\n",
+
+		   ex_type[parent->dev_type],
+		   SAS_ADDR(parent->sas_addr),
+		   parent_phy->phy_id,
+
+		   ex_type[child->dev_type],
+		   SAS_ADDR(child->sas_addr),
+		   child_phy->phy_id,
+
+		   ra_char[parent_phy->routing_attr],
+		   ra_char[child_phy->routing_attr]);
+}
+
+static int sas_check_eeds(struct domain_device *child,
+				 struct ex_phy *parent_phy,
+				 struct ex_phy *child_phy)
+{
+	int res = 0;
+	struct domain_device *parent = child->parent;
+
+	if (SAS_ADDR(parent->port->disc.fanout_sas_addr) != 0) {
+		res = -ENODEV;
+		SAS_DPRINTK("edge ex %016llx phy S:0x%x <--> edge ex %016llx "
+			    "phy S:0x%x, while there is a fanout ex %016llx\n",
+			    SAS_ADDR(parent->sas_addr),
+			    parent_phy->phy_id,
+			    SAS_ADDR(child->sas_addr),
+			    child_phy->phy_id,
+			    SAS_ADDR(parent->port->disc.fanout_sas_addr));
+	} else if (SAS_ADDR(parent->port->disc.eeds_a) == 0) {
+		memcpy(parent->port->disc.eeds_a, parent->sas_addr,
+		       SAS_ADDR_SIZE);
+		memcpy(parent->port->disc.eeds_b, child->sas_addr,
+		       SAS_ADDR_SIZE);
+	} else if (((SAS_ADDR(parent->port->disc.eeds_a) ==
+		    SAS_ADDR(parent->sas_addr)) ||
+		   (SAS_ADDR(parent->port->disc.eeds_a) ==
+		    SAS_ADDR(child->sas_addr)))
+		   &&
+		   ((SAS_ADDR(parent->port->disc.eeds_b) ==
+		     SAS_ADDR(parent->sas_addr)) ||
+		    (SAS_ADDR(parent->port->disc.eeds_b) ==
+		     SAS_ADDR(child->sas_addr))))
+		;
+	else {
+		res = -ENODEV;
+		SAS_DPRINTK("edge ex %016llx phy 0x%x <--> edge ex %016llx "
+			    "phy 0x%x link forms a third EEDS!\n",
+			    SAS_ADDR(parent->sas_addr),
+			    parent_phy->phy_id,
+			    SAS_ADDR(child->sas_addr),
+			    child_phy->phy_id);
+	}
+
+	return res;
+}
+
+/* Here we spill over 80 columns.  It is intentional.
+ */
+static int sas_check_parent_topology(struct domain_device *child)
+{
+	struct expander_device *child_ex = &child->ex_dev;
+	struct expander_device *parent_ex;
+	int i;
+	int res = 0;
+
+	if (!child->parent)
+		return 0;
+
+	if (child->parent->dev_type != EDGE_DEV &&
+	    child->parent->dev_type != FANOUT_DEV)
+		return 0;
+
+	parent_ex = &child->parent->ex_dev;
+
+	for (i = 0; i < parent_ex->num_phys; i++) {
+		struct ex_phy *parent_phy = &parent_ex->ex_phy[i];
+		struct ex_phy *child_phy;
+
+		if (parent_phy->phy_state == PHY_VACANT ||
+		    parent_phy->phy_state == PHY_NOT_PRESENT)
+			continue;
+
+		if (SAS_ADDR(parent_phy->attached_sas_addr) != SAS_ADDR(child->sas_addr))
+			continue;
+
+		child_phy = &child_ex->ex_phy[parent_phy->attached_phy_id];
+
+		switch (child->parent->dev_type) {
+		case EDGE_DEV:
+			if (child->dev_type == FANOUT_DEV) {
+				if (parent_phy->routing_attr != SUBTRACTIVE_ROUTING ||
+				    child_phy->routing_attr != TABLE_ROUTING) {
+					sas_print_parent_topology_bug(child, parent_phy, child_phy);
+					res = -ENODEV;
+				}
+			} else if (parent_phy->routing_attr == SUBTRACTIVE_ROUTING) {
+				if (child_phy->routing_attr == SUBTRACTIVE_ROUTING) {
+					res = sas_check_eeds(child, parent_phy, child_phy);
+				} else if (child_phy->routing_attr != TABLE_ROUTING) {
+					sas_print_parent_topology_bug(child, parent_phy, child_phy);
+					res = -ENODEV;
+				}
+			} else if (parent_phy->routing_attr == TABLE_ROUTING &&
+				   child_phy->routing_attr != SUBTRACTIVE_ROUTING) {
+				sas_print_parent_topology_bug(child, parent_phy, child_phy);
+				res = -ENODEV;
+			}
+			break;
+		case FANOUT_DEV:
+			if (parent_phy->routing_attr != TABLE_ROUTING ||
+			    child_phy->routing_attr != SUBTRACTIVE_ROUTING) {
+				sas_print_parent_topology_bug(child, parent_phy, child_phy);
+				res = -ENODEV;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	return res;
+}
+
+#define RRI_REQ_SIZE  16
+#define RRI_RESP_SIZE 44
+
+static int sas_configure_present(struct domain_device *dev, int phy_id,
+				 u8 *sas_addr, int *index, int *present)
+{
+	int i, res = 0;
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+	u8 *rri_req;
+	u8 *rri_resp;
+
+	*present = 0;
+	*index = 0;
+
+	rri_req = alloc_smp_req(RRI_REQ_SIZE);
+	if (!rri_req)
+		return -ENOMEM;
+
+	rri_resp = alloc_smp_resp(RRI_RESP_SIZE);
+	if (!rri_resp) {
+		kfree(rri_req);
+		return -ENOMEM;
+	}
+
+	rri_req[1] = SMP_REPORT_ROUTE_INFO;
+	rri_req[9] = phy_id;
+
+	for (i = 0; i < ex->max_route_indexes ; i++) {
+		*(__be16 *)(rri_req+6) = cpu_to_be16(i);
+		res = smp_execute_task(dev, rri_req, RRI_REQ_SIZE, rri_resp,
+				       RRI_RESP_SIZE);
+		if (res)
+			goto out;
+		res = rri_resp[2];
+		if (res == SMP_RESP_NO_INDEX) {
+			SAS_DPRINTK("overflow of indexes: dev %016llx "
+				    "phy 0x%x index 0x%x\n",
+				    SAS_ADDR(dev->sas_addr), phy_id, i);
+			goto out;
+		} else if (res != SMP_RESP_FUNC_ACC) {
+			SAS_DPRINTK("%s: dev %016llx phy 0x%x index 0x%x "
+				    "result 0x%x\n", __FUNCTION__,
+				    SAS_ADDR(dev->sas_addr), phy_id, i, res);
+			goto out;
+		}
+		if (SAS_ADDR(sas_addr) != 0) {
+			if (SAS_ADDR(rri_resp+16) == SAS_ADDR(sas_addr)) {
+				*index = i;
+				if ((rri_resp[12] & 0x80) == 0x80)
+					*present = 0;
+				else
+					*present = 1;
+				goto out;
+			} else if (SAS_ADDR(rri_resp+16) == 0) {
+				*index = i;
+				*present = 0;
+				goto out;
+			}
+		} else if (SAS_ADDR(rri_resp+16) == 0 &&
+			   phy->last_da_index < i) {
+			phy->last_da_index = i;
+			*index = i;
+			*present = 0;
+			goto out;
+		}
+	}
+	res = -1;
+out:
+	kfree(rri_req);
+	kfree(rri_resp);
+	return res;
+}
+
+#define CRI_REQ_SIZE  44
+#define CRI_RESP_SIZE  8
+
+static int sas_configure_set(struct domain_device *dev, int phy_id,
+			     u8 *sas_addr, int index, int include)
+{
+	int res;
+	u8 *cri_req;
+	u8 *cri_resp;
+
+	cri_req = alloc_smp_req(CRI_REQ_SIZE);
+	if (!cri_req)
+		return -ENOMEM;
+
+	cri_resp = alloc_smp_resp(CRI_RESP_SIZE);
+	if (!cri_resp) {
+		kfree(cri_req);
+		return -ENOMEM;
+	}
+
+	cri_req[1] = SMP_CONF_ROUTE_INFO;
+	*(__be16 *)(cri_req+6) = cpu_to_be16(index);
+	cri_req[9] = phy_id;
+	if (SAS_ADDR(sas_addr) == 0 || !include)
+		cri_req[12] |= 0x80;
+	memcpy(cri_req+16, sas_addr, SAS_ADDR_SIZE);
+
+	res = smp_execute_task(dev, cri_req, CRI_REQ_SIZE, cri_resp,
+			       CRI_RESP_SIZE);
+	if (res)
+		goto out;
+	res = cri_resp[2];
+	if (res == SMP_RESP_NO_INDEX) {
+		SAS_DPRINTK("overflow of indexes: dev %016llx phy 0x%x "
+			    "index 0x%x\n",
+			    SAS_ADDR(dev->sas_addr), phy_id, index);
+	}
+out:
+	kfree(cri_req);
+	kfree(cri_resp);
+	return res;
+}
+
+static int sas_configure_phy(struct domain_device *dev, int phy_id,
+				    u8 *sas_addr, int include)
+{
+	int index;
+	int present;
+	int res;
+
+	res = sas_configure_present(dev, phy_id, sas_addr, &index, &present);
+	if (res)
+		return res;
+	if (include ^ present)
+		return sas_configure_set(dev, phy_id, sas_addr, index,include);
+
+	return res;
+}
+
+/**
+ * sas_configure_parent -- configure routing table of parent
+ * parent: parent expander
+ * child: child expander
+ * sas_addr: SAS port identifier of device directly attached to child
+ */
+static int sas_configure_parent(struct domain_device *parent,
+				struct domain_device *child,
+				u8 *sas_addr, int include)
+{
+	struct expander_device *ex_parent = &parent->ex_dev;
+	int res = 0;
+	int i;
+
+	if (parent->parent) {
+		res = sas_configure_parent(parent->parent, parent, sas_addr,
+					   include);
+		if (res)
+			return res;
+	}
+
+	if (ex_parent->conf_route_table == 0) {
+		SAS_DPRINTK("ex %016llx has self-configuring routing table\n",
+			    SAS_ADDR(parent->sas_addr));
+		return 0;
+	}
+
+	for (i = 0; i < ex_parent->num_phys; i++) {
+		struct ex_phy *phy = &ex_parent->ex_phy[i];
+
+		if ((phy->routing_attr == TABLE_ROUTING) &&
+		    (SAS_ADDR(phy->attached_sas_addr) ==
+		     SAS_ADDR(child->sas_addr))) {
+			res = sas_configure_phy(parent, i, sas_addr, include);
+			if (res)
+				return res;
+		}
+	}
+
+	return res;
+}
+
+/**
+ * sas_configure_routing -- configure routing
+ * dev: expander device
+ * sas_addr: port identifier of device directly attached to the expander device
+ */
+static int sas_configure_routing(struct domain_device *dev, u8 *sas_addr)
+{
+	if (dev->parent)
+		return sas_configure_parent(dev->parent, dev, sas_addr, 1);
+	return 0;
+}
+
+static int sas_disable_routing(struct domain_device *dev,  u8 *sas_addr)
+{
+	if (dev->parent)
+		return sas_configure_parent(dev->parent, dev, sas_addr, 0);
+	return 0;
+}
+
+#if 0
+#define SMP_BIN_ATTR_NAME "smp_portal"
+
+static void sas_ex_smp_hook(struct domain_device *dev)
+{
+	struct expander_device *ex_dev = &dev->ex_dev;
+	struct bin_attribute *bin_attr = &ex_dev->smp_bin_attr;
+
+	memset(bin_attr, 0, sizeof(*bin_attr));
+
+	bin_attr->attr.name = SMP_BIN_ATTR_NAME;
+	bin_attr->attr.owner = THIS_MODULE;
+	bin_attr->attr.mode = 0600;
+
+	bin_attr->size = 0;
+	bin_attr->private = NULL;
+	bin_attr->read = smp_portal_read;
+	bin_attr->write= smp_portal_write;
+	bin_attr->mmap = NULL;
+
+	ex_dev->smp_portal_pid = -1;
+	init_MUTEX(&ex_dev->smp_sema);
+}
+#endif
+
+/**
+ * sas_discover_expander -- expander discovery
+ * @ex: pointer to expander domain device
+ *
+ * See comment in sas_discover_sata().
+ */
+static int sas_discover_expander(struct domain_device *dev)
+{
+	int res;
+
+	res = sas_notify_lldd_dev_found(dev);
+	if (res)
+		return res;
+
+	res = sas_ex_general(dev);
+	if (res)
+		goto out_err;
+	res = sas_ex_manuf_info(dev);
+	if (res)
+		goto out_err;
+
+	res = sas_expander_discover(dev);
+	if (res) {
+		SAS_DPRINTK("expander %016llx discovery failed(0x%x)\n",
+			    SAS_ADDR(dev->sas_addr), res);
+		goto out_err;
+	}
+
+	sas_check_ex_subtractive_boundary(dev);
+	res = sas_check_parent_topology(dev);
+	if (res)
+		goto out_err;
+	return 0;
+out_err:
+	sas_notify_lldd_dev_gone(dev);
+	return res;
+}
+
+static int sas_ex_level_discovery(struct asd_sas_port *port, const int level)
+{
+	int res = 0;
+	struct domain_device *dev;
+
+	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+		if (dev->dev_type == EDGE_DEV ||
+		    dev->dev_type == FANOUT_DEV) {
+			struct sas_expander_device *ex =
+				rphy_to_expander_device(dev->rphy);
+
+			if (level == ex->level)
+				res = sas_ex_discover_devices(dev, -1);
+			else if (level > 0)
+				res = sas_ex_discover_devices(port->port_dev, -1);
+
+		}
+	}
+
+	return res;
+}
+
+static int sas_ex_bfs_disc(struct asd_sas_port *port)
+{
+	int res;
+	int level;
+
+	do {
+		level = port->disc.max_level;
+		res = sas_ex_level_discovery(port, level);
+		mb();
+	} while (level < port->disc.max_level);
+
+	return res;
+}
+
+int sas_discover_root_expander(struct domain_device *dev)
+{
+	int res;
+	struct sas_expander_device *ex = rphy_to_expander_device(dev->rphy);
+
+	sas_rphy_add(dev->rphy);
+
+	ex->level = dev->port->disc.max_level; /* 0 */
+	res = sas_discover_expander(dev);
+	if (!res)
+		sas_ex_bfs_disc(dev->port);
+
+	return res;
+}
+
+/* ---------- Domain revalidation ---------- */
+
+static int sas_get_phy_discover(struct domain_device *dev,
+				int phy_id, struct smp_resp *disc_resp)
+{
+	int res;
+	u8 *disc_req;
+
+	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	if (!disc_req)
+		return -ENOMEM;
+
+	disc_req[1] = SMP_DISCOVER;
+	disc_req[9] = phy_id;
+
+	res = smp_execute_task(dev, disc_req, DISCOVER_REQ_SIZE,
+			       disc_resp, DISCOVER_RESP_SIZE);
+	if (res)
+		goto out;
+	else if (disc_resp->result != SMP_RESP_FUNC_ACC) {
+		res = disc_resp->result;
+		goto out;
+	}
+out:
+	kfree(disc_req);
+	return res;
+}
+
+static int sas_get_phy_change_count(struct domain_device *dev,
+				    int phy_id, int *pcc)
+{
+	int res;
+	struct smp_resp *disc_resp;
+
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	if (!disc_resp)
+		return -ENOMEM;
+
+	res = sas_get_phy_discover(dev, phy_id, disc_resp);
+	if (!res)
+		*pcc = disc_resp->disc.change_count;
+
+	kfree(disc_resp);
+	return res;
+}
+
+static int sas_get_phy_attached_sas_addr(struct domain_device *dev,
+					 int phy_id, u8 *attached_sas_addr)
+{
+	int res;
+	struct smp_resp *disc_resp;
+	struct discover_resp *dr;
+
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	if (!disc_resp)
+		return -ENOMEM;
+	dr = &disc_resp->disc;
+
+	res = sas_get_phy_discover(dev, phy_id, disc_resp);
+	if (!res) {
+		memcpy(attached_sas_addr,disc_resp->disc.attached_sas_addr,8);
+		if (dr->attached_dev_type == 0)
+			memset(attached_sas_addr, 0, 8);
+	}
+	kfree(disc_resp);
+	return res;
+}
+
+static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id,
+			      int from_phy)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int res = 0;
+	int i;
+
+	for (i = from_phy; i < ex->num_phys; i++) {
+		int phy_change_count = 0;
+
+		res = sas_get_phy_change_count(dev, i, &phy_change_count);
+		if (res)
+			goto out;
+		else if (phy_change_count != ex->ex_phy[i].phy_change_count) {
+			ex->ex_phy[i].phy_change_count = phy_change_count;
+			*phy_id = i;
+			return 0;
+		}
+	}
+out:
+	return res;
+}
+
+static int sas_get_ex_change_count(struct domain_device *dev, int *ecc)
+{
+	int res;
+	u8  *rg_req;
+	struct smp_resp  *rg_resp;
+
+	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	if (!rg_req)
+		return -ENOMEM;
+
+	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
+	if (!rg_resp) {
+		kfree(rg_req);
+		return -ENOMEM;
+	}
+
+	rg_req[1] = SMP_REPORT_GENERAL;
+
+	res = smp_execute_task(dev, rg_req, RG_REQ_SIZE, rg_resp,
+			       RG_RESP_SIZE);
+	if (res)
+		goto out;
+	if (rg_resp->result != SMP_RESP_FUNC_ACC) {
+		res = rg_resp->result;
+		goto out;
+	}
+
+	*ecc = be16_to_cpu(rg_resp->rg.change_count);
+out:
+	kfree(rg_resp);
+	kfree(rg_req);
+	return res;
+}
+
+static int sas_find_bcast_dev(struct domain_device *dev,
+			      struct domain_device **src_dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	int ex_change_count = -1;
+	int res;
+
+	res = sas_get_ex_change_count(dev, &ex_change_count);
+	if (res)
+		goto out;
+	if (ex_change_count != -1 &&
+	    ex_change_count != ex->ex_change_count) {
+		*src_dev = dev;
+		ex->ex_change_count = ex_change_count;
+	} else {
+		struct domain_device *ch;
+
+		list_for_each_entry(ch, &ex->children, siblings) {
+			if (ch->dev_type == EDGE_DEV ||
+			    ch->dev_type == FANOUT_DEV) {
+				res = sas_find_bcast_dev(ch, src_dev);
+				if (src_dev)
+					return res;
+			}
+		}
+	}
+out:
+	return res;
+}
+
+static void sas_unregister_ex_tree(struct domain_device *dev)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct domain_device *child, *n;
+
+	list_for_each_entry_safe(child, n, &ex->children, siblings) {
+		if (child->dev_type == EDGE_DEV ||
+		    child->dev_type == FANOUT_DEV)
+			sas_unregister_ex_tree(child);
+		else
+			sas_unregister_dev(child);
+	}
+	sas_unregister_dev(dev);
+}
+
+static void sas_unregister_devs_sas_addr(struct domain_device *parent,
+					 int phy_id)
+{
+	struct expander_device *ex_dev = &parent->ex_dev;
+	struct ex_phy *phy = &ex_dev->ex_phy[phy_id];
+	struct domain_device *child, *n;
+
+	list_for_each_entry_safe(child, n, &ex_dev->children, siblings) {
+		if (SAS_ADDR(child->sas_addr) ==
+		    SAS_ADDR(phy->attached_sas_addr)) {
+			if (child->dev_type == EDGE_DEV ||
+			    child->dev_type == FANOUT_DEV)
+				sas_unregister_ex_tree(child);
+			else
+				sas_unregister_dev(child);
+			break;
+		}
+	}
+	sas_disable_routing(parent, phy->attached_sas_addr);
+	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
+	sas_port_delete_phy(phy->port, phy->phy);
+	if (phy->port->num_phys == 0)
+		sas_port_delete(phy->port);
+	phy->port = NULL;
+}
+
+static int sas_discover_bfs_by_root_level(struct domain_device *root,
+					  const int level)
+{
+	struct expander_device *ex_root = &root->ex_dev;
+	struct domain_device *child;
+	int res = 0;
+
+	list_for_each_entry(child, &ex_root->children, siblings) {
+		if (child->dev_type == EDGE_DEV ||
+		    child->dev_type == FANOUT_DEV) {
+			struct sas_expander_device *ex =
+				rphy_to_expander_device(child->rphy);
+
+			if (level > ex->level)
+				res = sas_discover_bfs_by_root_level(child,
+								     level);
+			else if (level == ex->level)
+				res = sas_ex_discover_devices(child, -1);
+		}
+	}
+	return res;
+}
+
+static int sas_discover_bfs_by_root(struct domain_device *dev)
+{
+	int res;
+	struct sas_expander_device *ex = rphy_to_expander_device(dev->rphy);
+	int level = ex->level+1;
+
+	res = sas_ex_discover_devices(dev, -1);
+	if (res)
+		goto out;
+	do {
+		res = sas_discover_bfs_by_root_level(dev, level);
+		mb();
+		level += 1;
+	} while (level <= dev->port->disc.max_level);
+out:
+	return res;
+}
+
+static int sas_discover_new(struct domain_device *dev, int phy_id)
+{
+	struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id];
+	struct domain_device *child;
+	int res;
+
+	SAS_DPRINTK("ex %016llx phy%d new device attached\n",
+		    SAS_ADDR(dev->sas_addr), phy_id);
+	res = sas_ex_phy_discover(dev, phy_id);
+	if (res)
+		goto out;
+	res = sas_ex_discover_devices(dev, phy_id);
+	if (res)
+		goto out;
+	list_for_each_entry(child, &dev->ex_dev.children, siblings) {
+		if (SAS_ADDR(child->sas_addr) ==
+		    SAS_ADDR(ex_phy->attached_sas_addr)) {
+			if (child->dev_type == EDGE_DEV ||
+			    child->dev_type == FANOUT_DEV)
+				res = sas_discover_bfs_by_root(child);
+			break;
+		}
+	}
+out:
+	return res;
+}
+
+static int sas_rediscover_dev(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *phy = &ex->ex_phy[phy_id];
+	u8 attached_sas_addr[8];
+	int res;
+
+	res = sas_get_phy_attached_sas_addr(dev, phy_id, attached_sas_addr);
+	switch (res) {
+	case SMP_RESP_NO_PHY:
+		phy->phy_state = PHY_NOT_PRESENT;
+		sas_unregister_devs_sas_addr(dev, phy_id);
+		goto out; break;
+	case SMP_RESP_PHY_VACANT:
+		phy->phy_state = PHY_VACANT;
+		sas_unregister_devs_sas_addr(dev, phy_id);
+		goto out; break;
+	case SMP_RESP_FUNC_ACC:
+		break;
+	}
+
+	if (SAS_ADDR(attached_sas_addr) == 0) {
+		phy->phy_state = PHY_EMPTY;
+		sas_unregister_devs_sas_addr(dev, phy_id);
+	} else if (SAS_ADDR(attached_sas_addr) ==
+		   SAS_ADDR(phy->attached_sas_addr)) {
+		SAS_DPRINTK("ex %016llx phy 0x%x broadcast flutter\n",
+			    SAS_ADDR(dev->sas_addr), phy_id);
+		sas_ex_phy_discover(dev, phy_id);
+	} else
+		res = sas_discover_new(dev, phy_id);
+out:
+	return res;
+}
+
+static int sas_rediscover(struct domain_device *dev, const int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *changed_phy = &ex->ex_phy[phy_id];
+	int res = 0;
+	int i;
+
+	SAS_DPRINTK("ex %016llx phy%d originated BROADCAST(CHANGE)\n",
+		    SAS_ADDR(dev->sas_addr), phy_id);
+
+	if (SAS_ADDR(changed_phy->attached_sas_addr) != 0) {
+		for (i = 0; i < ex->num_phys; i++) {
+			struct ex_phy *phy = &ex->ex_phy[i];
+
+			if (i == phy_id)
+				continue;
+			if (SAS_ADDR(phy->attached_sas_addr) ==
+			    SAS_ADDR(changed_phy->attached_sas_addr)) {
+				SAS_DPRINTK("phy%d part of wide port with "
+					    "phy%d\n", phy_id, i);
+				goto out;
+			}
+		}
+		res = sas_rediscover_dev(dev, phy_id);
+	} else
+		res = sas_discover_new(dev, phy_id);
+out:
+	return res;
+}
+
+/**
+ * sas_revalidate_domain -- revalidate the domain
+ * @port: port to the domain of interest
+ *
+ * NOTE: this process _must_ quit (return) as soon as any connection
+ * errors are encountered.  Connection recovery is done elsewhere.
+ * Discover process only interrogates devices in order to discover the
+ * domain.
+ */
+int sas_ex_revalidate_domain(struct domain_device *port_dev)
+{
+	int res;
+	struct domain_device *dev = NULL;
+
+	res = sas_find_bcast_dev(port_dev, &dev);
+	if (res)
+		goto out;
+	if (dev) {
+		struct expander_device *ex = &dev->ex_dev;
+		int i = 0, phy_id;
+
+		do {
+			phy_id = -1;
+			res = sas_find_bcast_phy(dev, &phy_id, i);
+			if (phy_id == -1)
+				break;
+			res = sas_rediscover(dev, phy_id);
+			i = phy_id + 1;
+		} while (i < ex->num_phys);
+	}
+out:
+	return res;
+}
+
+#if 0
+/* ---------- SMP portal ---------- */
+
+static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs,
+				size_t size)
+{
+	struct domain_device *dev = to_dom_device(kobj);
+	struct expander_device *ex = &dev->ex_dev;
+
+	if (offs != 0)
+		return -EFBIG;
+	else if (size == 0)
+		return 0;
+
+	down_interruptible(&ex->smp_sema);
+	if (ex->smp_req)
+		kfree(ex->smp_req);
+	ex->smp_req = kzalloc(size, GFP_USER);
+	if (!ex->smp_req) {
+		up(&ex->smp_sema);
+		return -ENOMEM;
+	}
+	memcpy(ex->smp_req, buf, size);
+	ex->smp_req_size = size;
+	ex->smp_portal_pid = current->pid;
+	up(&ex->smp_sema);
+
+	return size;
+}
+
+static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs,
+			       size_t size)
+{
+	struct domain_device *dev = to_dom_device(kobj);
+	struct expander_device *ex = &dev->ex_dev;
+	u8 *smp_resp;
+	int res = -EINVAL;
+
+	/* XXX: sysfs gives us an offset of 0x10 or 0x8 while in fact
+	 *  it should be 0.
+	 */
+
+	down_interruptible(&ex->smp_sema);
+	if (!ex->smp_req || ex->smp_portal_pid != current->pid)
+		goto out;
+
+	res = 0;
+	if (size == 0)
+		goto out;
+
+	res = -ENOMEM;
+	smp_resp = alloc_smp_resp(size);
+	if (!smp_resp)
+		goto out;
+	res = smp_execute_task(dev, ex->smp_req, ex->smp_req_size,
+			       smp_resp, size);
+	if (!res) {
+		memcpy(buf, smp_resp, size);
+		res = size;
+	}
+
+	kfree(smp_resp);
+out:
+	kfree(ex->smp_req);
+	ex->smp_req = NULL;
+	ex->smp_req_size = 0;
+	ex->smp_portal_pid = -1;
+	up(&ex->smp_sema);
+	return res;
+}
+#endif
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
new file mode 100644
index 0000000..c836a23
--- /dev/null
+++ b/drivers/scsi/libsas/sas_init.c
@@ -0,0 +1,267 @@
+/*
+ * Serial Attached SCSI (SAS) Transport Layer initialization
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+
+#include "sas_internal.h"
+
+#include "../scsi_sas_internal.h"
+
+kmem_cache_t *sas_task_cache;
+
+/*------------ SAS addr hash -----------*/
+void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
+{
+        const u32 poly = 0x00DB2777;
+        u32     r = 0;
+        int     i;
+
+        for (i = 0; i < 8; i++) {
+                int b;
+                for (b = 7; b >= 0; b--) {
+                        r <<= 1;
+                        if ((1 << b) & sas_addr[i]) {
+                                if (!(r & 0x01000000))
+                                        r ^= poly;
+                        } else if (r & 0x01000000)
+                                r ^= poly;
+                }
+        }
+
+        hashed[0] = (r >> 16) & 0xFF;
+        hashed[1] = (r >> 8) & 0xFF ;
+        hashed[2] = r & 0xFF;
+}
+
+
+/* ---------- HA events ---------- */
+
+void sas_hae_reset(void *data)
+{
+	struct sas_ha_struct *ha = data;
+
+	sas_begin_event(HAE_RESET, &ha->event_lock,
+			&ha->pending);
+}
+
+int sas_register_ha(struct sas_ha_struct *sas_ha)
+{
+	int error = 0;
+
+	spin_lock_init(&sas_ha->phy_port_lock);
+	sas_hash_addr(sas_ha->hashed_sas_addr, sas_ha->sas_addr);
+
+	if (sas_ha->lldd_queue_size == 0)
+		sas_ha->lldd_queue_size = 1;
+	else if (sas_ha->lldd_queue_size == -1)
+		sas_ha->lldd_queue_size = 128; /* Sanity */
+
+	error = sas_register_phys(sas_ha);
+	if (error) {
+		printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
+		return error;
+	}
+
+	error = sas_register_ports(sas_ha);
+	if (error) {
+		printk(KERN_NOTICE "couldn't register sas ports:%d\n", error);
+		goto Undo_phys;
+	}
+
+	error = sas_init_events(sas_ha);
+	if (error) {
+		printk(KERN_NOTICE "couldn't start event thread:%d\n", error);
+		goto Undo_ports;
+	}
+
+	if (sas_ha->lldd_max_execute_num > 1) {
+		error = sas_init_queue(sas_ha);
+		if (error) {
+			printk(KERN_NOTICE "couldn't start queue thread:%d, "
+			       "running in direct mode\n", error);
+			sas_ha->lldd_max_execute_num = 1;
+		}
+	}
+
+	return 0;
+
+Undo_ports:
+	sas_unregister_ports(sas_ha);
+Undo_phys:
+
+	return error;
+}
+
+int sas_unregister_ha(struct sas_ha_struct *sas_ha)
+{
+	if (sas_ha->lldd_max_execute_num > 1) {
+		sas_shutdown_queue(sas_ha);
+	}
+
+	sas_unregister_ports(sas_ha);
+
+	return 0;
+}
+
+static int sas_get_linkerrors(struct sas_phy *phy)
+{
+	if (scsi_is_sas_phy_local(phy))
+		/* FIXME: we have no local phy stats
+		 * gathering at this time */
+		return -EINVAL;
+
+	return sas_smp_get_phy_events(phy);
+}
+
+static int sas_phy_reset(struct sas_phy *phy, int hard_reset)
+{
+	int ret;
+	enum phy_func reset_type;
+
+	if (hard_reset)
+		reset_type = PHY_FUNC_HARD_RESET;
+	else
+		reset_type = PHY_FUNC_LINK_RESET;
+
+	if (scsi_is_sas_phy_local(phy)) {
+		struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);
+		struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
+		struct asd_sas_phy *asd_phy = sas_ha->sas_phy[phy->number];
+		struct sas_internal *i =
+			to_sas_internal(sas_ha->core.shost->transportt);
+
+		ret = i->dft->lldd_control_phy(asd_phy, reset_type, NULL);
+	} else {
+		struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
+		struct domain_device *ddev = sas_find_dev_by_rphy(rphy);
+		ret = sas_smp_phy_control(ddev, phy->number, reset_type, NULL);
+	}
+	return ret;
+}
+
+static int sas_set_phy_speed(struct sas_phy *phy,
+			     struct sas_phy_linkrates *rates)
+{
+	int ret;
+
+	if ((rates->minimum_linkrate &&
+	     rates->minimum_linkrate > phy->maximum_linkrate) ||
+	    (rates->maximum_linkrate &&
+	     rates->maximum_linkrate < phy->minimum_linkrate))
+		return -EINVAL;
+
+	if (rates->minimum_linkrate &&
+	    rates->minimum_linkrate < phy->minimum_linkrate_hw)
+		rates->minimum_linkrate = phy->minimum_linkrate_hw;
+
+	if (rates->maximum_linkrate &&
+	    rates->maximum_linkrate > phy->maximum_linkrate_hw)
+		rates->maximum_linkrate = phy->maximum_linkrate_hw;
+
+	if (scsi_is_sas_phy_local(phy)) {
+		struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);
+		struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
+		struct asd_sas_phy *asd_phy = sas_ha->sas_phy[phy->number];
+		struct sas_internal *i =
+			to_sas_internal(sas_ha->core.shost->transportt);
+
+		ret = i->dft->lldd_control_phy(asd_phy, PHY_FUNC_SET_LINK_RATE,
+					       rates);
+	} else {
+		struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
+		struct domain_device *ddev = sas_find_dev_by_rphy(rphy);
+		ret = sas_smp_phy_control(ddev, phy->number,
+					  PHY_FUNC_LINK_RESET, rates);
+
+	}
+
+	return ret;
+}
+
+static struct sas_function_template sft = {
+	.phy_reset = sas_phy_reset,
+	.set_phy_speed = sas_set_phy_speed,
+	.get_linkerrors = sas_get_linkerrors,
+};
+
+struct scsi_transport_template *
+sas_domain_attach_transport(struct sas_domain_function_template *dft)
+{
+	struct scsi_transport_template *stt = sas_attach_transport(&sft);
+	struct sas_internal *i;
+
+	if (!stt)
+		return stt;
+
+	i = to_sas_internal(stt);
+	i->dft = dft;
+	stt->create_work_queue = 1;
+	stt->eh_timed_out = sas_scsi_timed_out;
+	stt->eh_strategy_handler = sas_scsi_recover_host;
+
+	return stt;
+}
+EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
+
+
+void sas_domain_release_transport(struct scsi_transport_template *stt)
+{
+	sas_release_transport(stt);
+}
+EXPORT_SYMBOL_GPL(sas_domain_release_transport);
+
+/* ---------- SAS Class register/unregister ---------- */
+
+static int __init sas_class_init(void)
+{
+	sas_task_cache = kmem_cache_create("sas_task", sizeof(struct sas_task),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!sas_task_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit sas_class_exit(void)
+{
+	kmem_cache_destroy(sas_task_cache);
+}
+
+MODULE_AUTHOR("Luben Tuikov <luben_tuikov@adaptec.com>");
+MODULE_DESCRIPTION("SAS Transport Layer");
+MODULE_LICENSE("GPL v2");
+
+module_init(sas_class_init);
+module_exit(sas_class_exit);
+
+EXPORT_SYMBOL_GPL(sas_register_ha);
+EXPORT_SYMBOL_GPL(sas_unregister_ha);
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
new file mode 100644
index 0000000..bffcee4
--- /dev/null
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -0,0 +1,146 @@
+/*
+ * Serial Attached SCSI (SAS) class internal header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#ifndef _SAS_INTERNAL_H_
+#define _SAS_INTERNAL_H_
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport_sas.h>
+#include <scsi/libsas.h>
+
+#define sas_printk(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__)
+
+#ifdef SAS_DEBUG
+#define SAS_DPRINTK(fmt, ...) printk(KERN_NOTICE "sas: " fmt, ## __VA_ARGS__)
+#else
+#define SAS_DPRINTK(fmt, ...)
+#endif
+
+void sas_scsi_recover_host(struct Scsi_Host *shost);
+
+int sas_show_class(enum sas_class class, char *buf);
+int sas_show_proto(enum sas_proto proto, char *buf);
+int sas_show_linkrate(enum sas_linkrate linkrate, char *buf);
+int sas_show_oob_mode(enum sas_oob_mode oob_mode, char *buf);
+
+int  sas_register_phys(struct sas_ha_struct *sas_ha);
+void sas_unregister_phys(struct sas_ha_struct *sas_ha);
+
+int  sas_register_ports(struct sas_ha_struct *sas_ha);
+void sas_unregister_ports(struct sas_ha_struct *sas_ha);
+
+enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
+
+int  sas_init_queue(struct sas_ha_struct *sas_ha);
+int  sas_init_events(struct sas_ha_struct *sas_ha);
+void sas_shutdown_queue(struct sas_ha_struct *sas_ha);
+
+void sas_deform_port(struct asd_sas_phy *phy);
+
+void sas_porte_bytes_dmaed(void *);
+void sas_porte_broadcast_rcvd(void *);
+void sas_porte_link_reset_err(void *);
+void sas_porte_timer_event(void *);
+void sas_porte_hard_reset(void *);
+
+int sas_notify_lldd_dev_found(struct domain_device *);
+void sas_notify_lldd_dev_gone(struct domain_device *);
+
+int sas_smp_phy_control(struct domain_device *dev, int phy_id,
+			enum phy_func phy_func, struct sas_phy_linkrates *);
+int sas_smp_get_phy_events(struct sas_phy *phy);
+
+struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy);
+
+void sas_hae_reset(void *);
+
+static inline void sas_queue_event(int event, spinlock_t *lock,
+				   unsigned long *pending,
+				   struct work_struct *work,
+				   struct Scsi_Host *shost)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	if (test_bit(event, pending)) {
+		spin_unlock_irqrestore(lock, flags);
+		return;
+	}
+	__set_bit(event, pending);
+	spin_unlock_irqrestore(lock, flags);
+	scsi_queue_work(shost, work);
+}
+
+static inline void sas_begin_event(int event, spinlock_t *lock,
+				   unsigned long *pending)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	__clear_bit(event, pending);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static inline void sas_fill_in_rphy(struct domain_device *dev,
+				    struct sas_rphy *rphy)
+{
+	rphy->identify.sas_address = SAS_ADDR(dev->sas_addr);
+	rphy->identify.initiator_port_protocols = dev->iproto;
+	rphy->identify.target_port_protocols = dev->tproto;
+	switch (dev->dev_type) {
+	case SATA_DEV:
+		/* FIXME: need sata device type */
+	case SAS_END_DEV:
+		rphy->identify.device_type = SAS_END_DEVICE;
+		break;
+	case EDGE_DEV:
+		rphy->identify.device_type = SAS_EDGE_EXPANDER_DEVICE;
+		break;
+	case FANOUT_DEV:
+		rphy->identify.device_type = SAS_FANOUT_EXPANDER_DEVICE;
+		break;
+	default:
+		rphy->identify.device_type = SAS_PHY_UNUSED;
+		break;
+	}
+}
+
+static inline void sas_add_parent_port(struct domain_device *dev, int phy_id)
+{
+	struct expander_device *ex = &dev->ex_dev;
+	struct ex_phy *ex_phy = &ex->ex_phy[phy_id];
+
+	if (!ex->parent_port) {
+		ex->parent_port = sas_port_alloc(&dev->rphy->dev, phy_id);
+		/* FIXME: error handling */
+		BUG_ON(!ex->parent_port);
+		BUG_ON(sas_port_add(ex->parent_port));
+		sas_port_mark_backlink(ex->parent_port);
+	}
+	sas_port_add_phy(ex->parent_port, ex_phy->phy);
+}
+
+#endif /* _SAS_INTERNAL_H_ */
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
new file mode 100644
index 0000000..9340cdb
--- /dev/null
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -0,0 +1,158 @@
+/*
+ * Serial Attached SCSI (SAS) Phy class
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_internal.h"
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+/* ---------- Phy events ---------- */
+
+static void sas_phye_loss_of_signal(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PHYE_LOSS_OF_SIGNAL, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+	phy->error = 0;
+	sas_deform_port(phy);
+}
+
+static void sas_phye_oob_done(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PHYE_OOB_DONE, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+	phy->error = 0;
+}
+
+static void sas_phye_oob_error(void *data)
+{
+	struct asd_sas_phy *phy = data;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct asd_sas_port *port = phy->port;
+	struct sas_internal *i =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	sas_begin_event(PHYE_OOB_ERROR, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+
+	sas_deform_port(phy);
+
+	if (!port && phy->enabled && i->dft->lldd_control_phy) {
+		phy->error++;
+		switch (phy->error) {
+		case 1:
+		case 2:
+			i->dft->lldd_control_phy(phy, PHY_FUNC_HARD_RESET,
+						 NULL);
+			break;
+		case 3:
+		default:
+			phy->error = 0;
+			phy->enabled = 0;
+			i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL);
+			break;
+		}
+	}
+}
+
+static void sas_phye_spinup_hold(void *data)
+{
+	struct asd_sas_phy *phy = data;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct sas_internal *i =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	sas_begin_event(PHYE_SPINUP_HOLD, &phy->ha->event_lock,
+			&phy->phy_events_pending);
+
+	phy->error = 0;
+	i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD, NULL);
+}
+
+/* ---------- Phy class registration ---------- */
+
+int sas_register_phys(struct sas_ha_struct *sas_ha)
+{
+	int i;
+
+	static void (*sas_phy_event_fns[PHY_NUM_EVENTS])(void *) = {
+		[PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
+		[PHYE_OOB_DONE] = sas_phye_oob_done,
+		[PHYE_OOB_ERROR] = sas_phye_oob_error,
+		[PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
+	};
+
+	static void (*sas_port_event_fns[PORT_NUM_EVENTS])(void *) = {
+		[PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
+		[PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
+		[PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
+		[PORTE_TIMER_EVENT] = sas_porte_timer_event,
+		[PORTE_HARD_RESET] = sas_porte_hard_reset,
+	};
+
+	/* Now register the phys. */
+	for (i = 0; i < sas_ha->num_phys; i++) {
+		int k;
+		struct asd_sas_phy *phy = sas_ha->sas_phy[i];
+
+		phy->error = 0;
+		INIT_LIST_HEAD(&phy->port_phy_el);
+		for (k = 0; k < PORT_NUM_EVENTS; k++)
+			INIT_WORK(&phy->port_events[k], sas_port_event_fns[k],
+				  phy);
+
+		for (k = 0; k < PHY_NUM_EVENTS; k++)
+			INIT_WORK(&phy->phy_events[k], sas_phy_event_fns[k],
+				  phy);
+		phy->port = NULL;
+		phy->ha = sas_ha;
+		spin_lock_init(&phy->frame_rcvd_lock);
+		spin_lock_init(&phy->sas_prim_lock);
+		phy->frame_rcvd_size = 0;
+
+		phy->phy = sas_phy_alloc(&sas_ha->core.shost->shost_gendev,
+					 i);
+		if (!phy->phy)
+			return -ENOMEM;
+
+		phy->phy->identify.initiator_port_protocols =
+			phy->iproto;
+		phy->phy->identify.target_port_protocols = phy->tproto;
+		phy->phy->identify.sas_address = SAS_ADDR(sas_ha->sas_addr);
+		phy->phy->identify.phy_identifier = i;
+		phy->phy->minimum_linkrate_hw = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->maximum_linkrate_hw = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->minimum_linkrate = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->maximum_linkrate = SAS_LINK_RATE_UNKNOWN;
+		phy->phy->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
+
+		sas_phy_add(phy->phy);
+	}
+
+	return 0;
+}
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
new file mode 100644
index 0000000..253cdcf
--- /dev/null
+++ b/drivers/scsi/libsas/sas_port.c
@@ -0,0 +1,279 @@
+/*
+ * Serial Attached SCSI (SAS) Port class
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include "sas_internal.h"
+
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+/**
+ * sas_form_port -- add this phy to a port
+ * @phy: the phy of interest
+ *
+ * This function adds this phy to an existing port, thus creating a wide
+ * port, or it creates a port and adds the phy to the port.
+ */
+static void sas_form_port(struct asd_sas_phy *phy)
+{
+	int i;
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct asd_sas_port *port = phy->port;
+	struct sas_internal *si =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	if (port) {
+		if (memcmp(port->attached_sas_addr, phy->attached_sas_addr,
+			   SAS_ADDR_SIZE) == 0)
+			sas_deform_port(phy);
+		else {
+			SAS_DPRINTK("%s: phy%d belongs to port%d already(%d)!\n",
+				    __FUNCTION__, phy->id, phy->port->id,
+				    phy->port->num_phys);
+			return;
+		}
+	}
+
+	/* find a port */
+	spin_lock(&sas_ha->phy_port_lock);
+	for (i = 0; i < sas_ha->num_phys; i++) {
+		port = sas_ha->sas_port[i];
+		spin_lock(&port->phy_list_lock);
+		if (*(u64 *) port->sas_addr &&
+		    memcmp(port->attached_sas_addr,
+			   phy->attached_sas_addr, SAS_ADDR_SIZE) == 0 &&
+		    port->num_phys > 0) {
+			/* wide port */
+			SAS_DPRINTK("phy%d matched wide port%d\n", phy->id,
+				    port->id);
+			break;
+		} else if (*(u64 *) port->sas_addr == 0 && port->num_phys==0) {
+			memcpy(port->sas_addr, phy->sas_addr, SAS_ADDR_SIZE);
+			break;
+		}
+		spin_unlock(&port->phy_list_lock);
+	}
+
+	if (i >= sas_ha->num_phys) {
+		printk(KERN_NOTICE "%s: couldn't find a free port, bug?\n",
+		       __FUNCTION__);
+		spin_unlock(&sas_ha->phy_port_lock);
+		return;
+	}
+
+	/* add the phy to the port */
+	list_add_tail(&phy->port_phy_el, &port->phy_list);
+	phy->port = port;
+	port->num_phys++;
+	port->phy_mask |= (1U << phy->id);
+
+	if (!port->phy)
+		port->phy = phy->phy;
+
+	SAS_DPRINTK("phy%d added to port%d, phy_mask:0x%x\n", phy->id,
+		    port->id, port->phy_mask);
+
+	if (*(u64 *)port->attached_sas_addr == 0) {
+		port->class = phy->class;
+		memcpy(port->attached_sas_addr, phy->attached_sas_addr,
+		       SAS_ADDR_SIZE);
+		port->iproto = phy->iproto;
+		port->tproto = phy->tproto;
+		port->oob_mode = phy->oob_mode;
+		port->linkrate = phy->linkrate;
+	} else
+		port->linkrate = max(port->linkrate, phy->linkrate);
+	spin_unlock(&port->phy_list_lock);
+	spin_unlock(&sas_ha->phy_port_lock);
+
+	if (!port->port) {
+		port->port = sas_port_alloc(phy->phy->dev.parent, port->id);
+		BUG_ON(!port->port);
+		sas_port_add(port->port);
+	}
+	sas_port_add_phy(port->port, phy->phy);
+
+	if (port->port_dev)
+		port->port_dev->pathways = port->num_phys;
+
+	/* Tell the LLDD about this port formation. */
+	if (si->dft->lldd_port_formed)
+		si->dft->lldd_port_formed(phy);
+
+	sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN);
+}
+
+/**
+ * sas_deform_port -- remove this phy from the port it belongs to
+ * @phy: the phy of interest
+ *
+ * This is called when the physical link to the other phy has been
+ * lost (on this phy), in Event thread context. We cannot delay here.
+ */
+void sas_deform_port(struct asd_sas_phy *phy)
+{
+	struct sas_ha_struct *sas_ha = phy->ha;
+	struct asd_sas_port *port = phy->port;
+	struct sas_internal *si =
+		to_sas_internal(sas_ha->core.shost->transportt);
+
+	if (!port)
+		return;		  /* done by a phy event */
+
+	if (port->port_dev)
+		port->port_dev->pathways--;
+
+	if (port->num_phys == 1) {
+		sas_unregister_domain_devices(port);
+		sas_port_delete(port->port);
+		port->port = NULL;
+	} else
+		sas_port_delete_phy(port->port, phy->phy);
+
+
+	if (si->dft->lldd_port_deformed)
+		si->dft->lldd_port_deformed(phy);
+
+	spin_lock(&sas_ha->phy_port_lock);
+	spin_lock(&port->phy_list_lock);
+
+	list_del_init(&phy->port_phy_el);
+	phy->port = NULL;
+	port->num_phys--;
+	port->phy_mask &= ~(1U << phy->id);
+
+	if (port->num_phys == 0) {
+		INIT_LIST_HEAD(&port->phy_list);
+		memset(port->sas_addr, 0, SAS_ADDR_SIZE);
+		memset(port->attached_sas_addr, 0, SAS_ADDR_SIZE);
+		port->class = 0;
+		port->iproto = 0;
+		port->tproto = 0;
+		port->oob_mode = 0;
+		port->phy_mask = 0;
+	}
+	spin_unlock(&port->phy_list_lock);
+	spin_unlock(&sas_ha->phy_port_lock);
+
+	return;
+}
+
+/* ---------- SAS port events ---------- */
+
+void sas_porte_bytes_dmaed(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_BYTES_DMAED, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_form_port(phy);
+}
+
+void sas_porte_broadcast_rcvd(void *data)
+{
+	unsigned long flags;
+	u32 prim;
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_BROADCAST_RCVD, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	spin_lock_irqsave(&phy->sas_prim_lock, flags);
+	prim = phy->sas_prim;
+	spin_unlock_irqrestore(&phy->sas_prim_lock, flags);
+
+	SAS_DPRINTK("broadcast received: %d\n", prim);
+	sas_discover_event(phy->port, DISCE_REVALIDATE_DOMAIN);
+}
+
+void sas_porte_link_reset_err(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_LINK_RESET_ERR, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_deform_port(phy);
+}
+
+void sas_porte_timer_event(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_TIMER_EVENT, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_deform_port(phy);
+}
+
+void sas_porte_hard_reset(void *data)
+{
+	struct asd_sas_phy *phy = data;
+
+	sas_begin_event(PORTE_HARD_RESET, &phy->ha->event_lock,
+			&phy->port_events_pending);
+
+	sas_deform_port(phy);
+}
+
+/* ---------- SAS port registration ---------- */
+
+static void sas_init_port(struct asd_sas_port *port,
+			  struct sas_ha_struct *sas_ha, int i)
+{
+	port->id = i;
+	INIT_LIST_HEAD(&port->dev_list);
+	spin_lock_init(&port->phy_list_lock);
+	INIT_LIST_HEAD(&port->phy_list);
+	port->num_phys = 0;
+	port->phy_mask = 0;
+	port->ha = sas_ha;
+
+	spin_lock_init(&port->dev_list_lock);
+}
+
+int sas_register_ports(struct sas_ha_struct *sas_ha)
+{
+	int i;
+
+	/* initialize the ports and discovery */
+	for (i = 0; i < sas_ha->num_phys; i++) {
+		struct asd_sas_port *port = sas_ha->sas_port[i];
+
+		sas_init_port(port, sas_ha, i);
+		sas_init_disc(&port->disc, port);
+	}
+	return 0;
+}
+
+void sas_unregister_ports(struct sas_ha_struct *sas_ha)
+{
+	int i;
+
+	for (i = 0; i < sas_ha->num_phys; i++)
+		if (sas_ha->sas_phy[i]->port)
+			sas_deform_port(sas_ha->sas_phy[i]);
+
+}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
new file mode 100644
index 0000000..43e0e4e
--- /dev/null
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -0,0 +1,786 @@
+/*
+ * Serial Attached SCSI (SAS) class SCSI Host glue.
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include "sas_internal.h"
+
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_transport.h>
+#include <scsi/scsi_transport_sas.h>
+#include "../scsi_sas_internal.h"
+
+#include <linux/err.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+
+/* ---------- SCSI Host glue ---------- */
+
+#define TO_SAS_TASK(_scsi_cmd)  ((void *)(_scsi_cmd)->host_scribble)
+#define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0)
+
+static void sas_scsi_task_done(struct sas_task *task)
+{
+	struct task_status_struct *ts = &task->task_status;
+	struct scsi_cmnd *sc = task->uldd_task;
+	unsigned ts_flags = task->task_state_flags;
+	int hs = 0, stat = 0;
+
+	if (unlikely(!sc)) {
+		SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n");
+		list_del_init(&task->list);
+		sas_free_task(task);
+		return;
+	}
+
+	if (ts->resp == SAS_TASK_UNDELIVERED) {
+		/* transport error */
+		hs = DID_NO_CONNECT;
+	} else { /* ts->resp == SAS_TASK_COMPLETE */
+		/* task delivered, what happened afterwards? */
+		switch (ts->stat) {
+		case SAS_DEV_NO_RESPONSE:
+		case SAS_INTERRUPTED:
+		case SAS_PHY_DOWN:
+		case SAS_NAK_R_ERR:
+		case SAS_OPEN_TO:
+			hs = DID_NO_CONNECT;
+			break;
+		case SAS_DATA_UNDERRUN:
+			sc->resid = ts->residual;
+			if (sc->request_bufflen - sc->resid < sc->underflow)
+				hs = DID_ERROR;
+			break;
+		case SAS_DATA_OVERRUN:
+			hs = DID_ERROR;
+			break;
+		case SAS_QUEUE_FULL:
+			hs = DID_SOFT_ERROR; /* retry */
+			break;
+		case SAS_DEVICE_UNKNOWN:
+			hs = DID_BAD_TARGET;
+			break;
+		case SAS_SG_ERR:
+			hs = DID_PARITY;
+			break;
+		case SAS_OPEN_REJECT:
+			if (ts->open_rej_reason == SAS_OREJ_RSVD_RETRY)
+				hs = DID_SOFT_ERROR; /* retry */
+			else
+				hs = DID_ERROR;
+			break;
+		case SAS_PROTO_RESPONSE:
+			SAS_DPRINTK("LLDD:%s sent SAS_PROTO_RESP for an SSP "
+				    "task; please report this\n",
+				    task->dev->port->ha->sas_ha_name);
+			break;
+		case SAS_ABORTED_TASK:
+			hs = DID_ABORT;
+			break;
+		case SAM_CHECK_COND:
+			memcpy(sc->sense_buffer, ts->buf,
+			       max(SCSI_SENSE_BUFFERSIZE, ts->buf_valid_size));
+			stat = SAM_CHECK_COND;
+			break;
+		default:
+			stat = ts->stat;
+			break;
+		}
+	}
+	ASSIGN_SAS_TASK(sc, NULL);
+	sc->result = (hs << 16) | stat;
+	list_del_init(&task->list);
+	sas_free_task(task);
+	/* This is very ugly but this is how SCSI Core works. */
+	if (ts_flags & SAS_TASK_STATE_ABORTED)
+		scsi_finish_command(sc);
+	else
+		sc->scsi_done(sc);
+}
+
+static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd)
+{
+	enum task_attribute ta = TASK_ATTR_SIMPLE;
+	if (cmd->request && blk_rq_tagged(cmd->request)) {
+		if (cmd->device->ordered_tags &&
+		    (cmd->request->flags & REQ_HARDBARRIER))
+			ta = TASK_ATTR_HOQ;
+	}
+	return ta;
+}
+
+static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
+					       struct domain_device *dev,
+					       unsigned long gfp_flags)
+{
+	struct sas_task *task = sas_alloc_task(gfp_flags);
+	struct scsi_lun lun;
+
+	if (!task)
+		return NULL;
+
+	*(u32 *)cmd->sense_buffer = 0;
+	task->uldd_task = cmd;
+	ASSIGN_SAS_TASK(cmd, task);
+
+	task->dev = dev;
+	task->task_proto = task->dev->tproto; /* BUG_ON(!SSP) */
+
+	task->ssp_task.retry_count = 1;
+	int_to_scsilun(cmd->device->lun, &lun);
+	memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8);
+	task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd);
+	memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
+
+	task->scatter = cmd->request_buffer;
+	task->num_scatter = cmd->use_sg;
+	task->total_xfer_len = cmd->request_bufflen;
+	task->data_dir = cmd->sc_data_direction;
+
+	task->task_done = sas_scsi_task_done;
+
+	return task;
+}
+
+static int sas_queue_up(struct sas_task *task)
+{
+	struct sas_ha_struct *sas_ha = task->dev->port->ha;
+	struct scsi_core *core = &sas_ha->core;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&core->task_queue_lock, flags);
+	if (sas_ha->lldd_queue_size < core->task_queue_size + 1) {
+		spin_unlock_irqrestore(&core->task_queue_lock, flags);
+		return -SAS_QUEUE_FULL;
+	}
+	list_add_tail(&task->list, &core->task_queue);
+	core->task_queue_size += 1;
+	spin_unlock_irqrestore(&core->task_queue_lock, flags);
+	up(&core->queue_thread_sema);
+
+	return 0;
+}
+
+/**
+ * sas_queuecommand -- Enqueue a command for processing
+ * @parameters: See SCSI Core documentation
+ *
+ * Note: XXX: Remove the host unlock/lock pair when SCSI Core can
+ * call us without holding an IRQ spinlock...
+ */
+int sas_queuecommand(struct scsi_cmnd *cmd,
+		     void (*scsi_done)(struct scsi_cmnd *))
+{
+	int res = 0;
+	struct domain_device *dev = cmd_to_domain_dev(cmd);
+	struct Scsi_Host *host = cmd->device->host;
+	struct sas_internal *i = to_sas_internal(host->transportt);
+
+	spin_unlock_irq(host->host_lock);
+
+	{
+		struct sas_ha_struct *sas_ha = dev->port->ha;
+		struct sas_task *task;
+
+		res = -ENOMEM;
+		task = sas_create_task(cmd, dev, GFP_ATOMIC);
+		if (!task)
+			goto out;
+
+		cmd->scsi_done = scsi_done;
+		/* Queue up, Direct Mode or Task Collector Mode. */
+		if (sas_ha->lldd_max_execute_num < 2)
+			res = i->dft->lldd_execute_task(task, 1, GFP_ATOMIC);
+		else
+			res = sas_queue_up(task);
+
+		/* Examine */
+		if (res) {
+			SAS_DPRINTK("lldd_execute_task returned: %d\n", res);
+			ASSIGN_SAS_TASK(cmd, NULL);
+			sas_free_task(task);
+			if (res == -SAS_QUEUE_FULL) {
+				cmd->result = DID_SOFT_ERROR << 16; /* retry */
+				res = 0;
+				scsi_done(cmd);
+			}
+			goto out;
+		}
+	}
+out:
+	spin_lock_irq(host->host_lock);
+	return res;
+}
+
+static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
+{
+	struct scsi_cmnd *cmd, *n;
+
+	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
+		if (cmd == my_cmd)
+			list_del_init(&cmd->eh_entry);
+	}
+}
+
+static void sas_scsi_clear_queue_I_T(struct list_head *error_q,
+				     struct domain_device *dev)
+{
+	struct scsi_cmnd *cmd, *n;
+
+	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
+		struct domain_device *x = cmd_to_domain_dev(cmd);
+
+		if (x == dev)
+			list_del_init(&cmd->eh_entry);
+	}
+}
+
+static void sas_scsi_clear_queue_port(struct list_head *error_q,
+				      struct asd_sas_port *port)
+{
+	struct scsi_cmnd *cmd, *n;
+
+	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
+		struct domain_device *dev = cmd_to_domain_dev(cmd);
+		struct asd_sas_port *x = dev->port;
+
+		if (x == port)
+			list_del_init(&cmd->eh_entry);
+	}
+}
+
+enum task_disposition {
+	TASK_IS_DONE,
+	TASK_IS_ABORTED,
+	TASK_IS_AT_LU,
+	TASK_IS_NOT_AT_LU,
+};
+
+static enum task_disposition sas_scsi_find_task(struct sas_task *task)
+{
+	struct sas_ha_struct *ha = task->dev->port->ha;
+	unsigned long flags;
+	int i, res;
+	struct sas_internal *si =
+		to_sas_internal(task->dev->port->ha->core.shost->transportt);
+
+	if (ha->lldd_max_execute_num > 1) {
+		struct scsi_core *core = &ha->core;
+		struct sas_task *t, *n;
+
+		spin_lock_irqsave(&core->task_queue_lock, flags);
+		list_for_each_entry_safe(t, n, &core->task_queue, list) {
+			if (task == t) {
+				list_del_init(&t->list);
+				spin_unlock_irqrestore(&core->task_queue_lock,
+						       flags);
+				SAS_DPRINTK("%s: task 0x%p aborted from "
+					    "task_queue\n",
+					    __FUNCTION__, task);
+				return TASK_IS_ABORTED;
+			}
+		}
+		spin_unlock_irqrestore(&core->task_queue_lock, flags);
+	}
+
+	for (i = 0; i < 5; i++) {
+		SAS_DPRINTK("%s: aborting task 0x%p\n", __FUNCTION__, task);
+		res = si->dft->lldd_abort_task(task);
+
+		spin_lock_irqsave(&task->task_state_lock, flags);
+		if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+			spin_unlock_irqrestore(&task->task_state_lock, flags);
+			SAS_DPRINTK("%s: task 0x%p is done\n", __FUNCTION__,
+				    task);
+			return TASK_IS_DONE;
+		}
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+		if (res == TMF_RESP_FUNC_COMPLETE) {
+			SAS_DPRINTK("%s: task 0x%p is aborted\n",
+				    __FUNCTION__, task);
+			return TASK_IS_ABORTED;
+		} else if (si->dft->lldd_query_task) {
+			SAS_DPRINTK("%s: querying task 0x%p\n",
+				    __FUNCTION__, task);
+			res = si->dft->lldd_query_task(task);
+			if (res == TMF_RESP_FUNC_SUCC) {
+				SAS_DPRINTK("%s: task 0x%p at LU\n",
+					    __FUNCTION__, task);
+				return TASK_IS_AT_LU;
+			} else if (res == TMF_RESP_FUNC_COMPLETE) {
+				SAS_DPRINTK("%s: task 0x%p not at LU\n",
+					    __FUNCTION__, task);
+				return TASK_IS_NOT_AT_LU;
+			}
+		}
+	}
+	return res;
+}
+
+static int sas_recover_lu(struct domain_device *dev, struct scsi_cmnd *cmd)
+{
+	int res = TMF_RESP_FUNC_FAILED;
+	struct scsi_lun lun;
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->core.shost->transportt);
+
+	int_to_scsilun(cmd->device->lun, &lun);
+
+	SAS_DPRINTK("eh: device %llx LUN %x has the task\n",
+		    SAS_ADDR(dev->sas_addr),
+		    cmd->device->lun);
+
+	if (i->dft->lldd_abort_task_set)
+		res = i->dft->lldd_abort_task_set(dev, lun.scsi_lun);
+
+	if (res == TMF_RESP_FUNC_FAILED) {
+		if (i->dft->lldd_clear_task_set)
+			res = i->dft->lldd_clear_task_set(dev, lun.scsi_lun);
+	}
+
+	if (res == TMF_RESP_FUNC_FAILED) {
+		if (i->dft->lldd_lu_reset)
+			res = i->dft->lldd_lu_reset(dev, lun.scsi_lun);
+	}
+
+	return res;
+}
+
+static int sas_recover_I_T(struct domain_device *dev)
+{
+	int res = TMF_RESP_FUNC_FAILED;
+	struct sas_internal *i =
+		to_sas_internal(dev->port->ha->core.shost->transportt);
+
+	SAS_DPRINTK("I_T nexus reset for dev %016llx\n",
+		    SAS_ADDR(dev->sas_addr));
+
+	if (i->dft->lldd_I_T_nexus_reset)
+		res = i->dft->lldd_I_T_nexus_reset(dev);
+
+	return res;
+}
+
+void sas_scsi_recover_host(struct Scsi_Host *shost)
+{
+	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+	unsigned long flags;
+	LIST_HEAD(error_q);
+	struct scsi_cmnd *cmd, *n;
+	enum task_disposition res = TASK_IS_DONE;
+	int tmf_resp;
+	struct sas_internal *i = to_sas_internal(shost->transportt);
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	list_splice_init(&shost->eh_cmd_q, &error_q);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	SAS_DPRINTK("Enter %s\n", __FUNCTION__);
+
+	/* All tasks on this list were marked SAS_TASK_STATE_ABORTED
+	 * by sas_scsi_timed_out() callback.
+	 */
+Again:
+	SAS_DPRINTK("going over list...\n");
+	list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
+		struct sas_task *task = TO_SAS_TASK(cmd);
+
+		SAS_DPRINTK("trying to find task 0x%p\n", task);
+		list_del_init(&cmd->eh_entry);
+		res = sas_scsi_find_task(task);
+
+		cmd->eh_eflags = 0;
+		shost->host_failed--;
+
+		switch (res) {
+		case TASK_IS_DONE:
+			SAS_DPRINTK("%s: task 0x%p is done\n", __FUNCTION__,
+				    task);
+			task->task_done(task);
+			continue;
+		case TASK_IS_ABORTED:
+			SAS_DPRINTK("%s: task 0x%p is aborted\n",
+				    __FUNCTION__, task);
+			task->task_done(task);
+			continue;
+		case TASK_IS_AT_LU:
+			SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
+			tmf_resp = sas_recover_lu(task->dev, cmd);
+			if (tmf_resp == TMF_RESP_FUNC_COMPLETE) {
+				SAS_DPRINTK("dev %016llx LU %x is "
+					    "recovered\n",
+					    SAS_ADDR(task->dev),
+					    cmd->device->lun);
+				task->task_done(task);
+				sas_scsi_clear_queue_lu(&error_q, cmd);
+				goto Again;
+			}
+			/* fallthrough */
+		case TASK_IS_NOT_AT_LU:
+			SAS_DPRINTK("task 0x%p is not at LU: I_T recover\n",
+				    task);
+			tmf_resp = sas_recover_I_T(task->dev);
+			if (tmf_resp == TMF_RESP_FUNC_COMPLETE) {
+				SAS_DPRINTK("I_T %016llx recovered\n",
+					    SAS_ADDR(task->dev->sas_addr));
+				task->task_done(task);
+				sas_scsi_clear_queue_I_T(&error_q, task->dev);
+				goto Again;
+			}
+			/* Hammer time :-) */
+			if (i->dft->lldd_clear_nexus_port) {
+				struct asd_sas_port *port = task->dev->port;
+				SAS_DPRINTK("clearing nexus for port:%d\n",
+					    port->id);
+				res = i->dft->lldd_clear_nexus_port(port);
+				if (res == TMF_RESP_FUNC_COMPLETE) {
+					SAS_DPRINTK("clear nexus port:%d "
+						    "succeeded\n", port->id);
+					task->task_done(task);
+					sas_scsi_clear_queue_port(&error_q,
+								  port);
+					goto Again;
+				}
+			}
+			if (i->dft->lldd_clear_nexus_ha) {
+				SAS_DPRINTK("clear nexus ha\n");
+				res = i->dft->lldd_clear_nexus_ha(ha);
+				if (res == TMF_RESP_FUNC_COMPLETE) {
+					SAS_DPRINTK("clear nexus ha "
+						    "succeeded\n");
+					task->task_done(task);
+					goto out;
+				}
+			}
+			/* If we are here -- this means that no amount
+			 * of effort could recover from errors.  Quite
+			 * possibly the HA just disappeared.
+			 */
+			SAS_DPRINTK("error from  device %llx, LUN %x "
+				    "couldn't be recovered in any way\n",
+				    SAS_ADDR(task->dev->sas_addr),
+				    cmd->device->lun);
+
+			task->task_done(task);
+			goto clear_q;
+		}
+	}
+out:
+	SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
+	return;
+clear_q:
+	SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__);
+	list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
+		struct sas_task *task = TO_SAS_TASK(cmd);
+		list_del_init(&cmd->eh_entry);
+		task->task_done(task);
+	}
+}
+
+enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
+{
+	struct sas_task *task = TO_SAS_TASK(cmd);
+	unsigned long flags;
+
+	if (!task) {
+		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n",
+			    cmd, task);
+		return EH_HANDLED;
+	}
+
+	spin_lock_irqsave(&task->task_state_lock, flags);
+	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
+		spin_unlock_irqrestore(&task->task_state_lock, flags);
+		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n",
+			    cmd, task);
+		return EH_HANDLED;
+	}
+	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+	spin_unlock_irqrestore(&task->task_state_lock, flags);
+
+	SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n",
+		    cmd, task);
+
+	return EH_NOT_HANDLED;
+}
+
+struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy)
+{
+	struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent);
+	struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
+	struct domain_device *found_dev = NULL;
+	int i;
+
+	spin_lock(&ha->phy_port_lock);
+	for (i = 0; i < ha->num_phys; i++) {
+		struct asd_sas_port *port = ha->sas_port[i];
+		struct domain_device *dev;
+
+		spin_lock(&port->dev_list_lock);
+		list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+			if (rphy == dev->rphy) {
+				found_dev = dev;
+				spin_unlock(&port->dev_list_lock);
+				goto found;
+			}
+		}
+		spin_unlock(&port->dev_list_lock);
+	}
+ found:
+	spin_unlock(&ha->phy_port_lock);
+
+	return found_dev;
+}
+
+static inline struct domain_device *sas_find_target(struct scsi_target *starget)
+{
+	struct sas_rphy *rphy = dev_to_rphy(starget->dev.parent);
+
+	return sas_find_dev_by_rphy(rphy);
+}
+
+int sas_target_alloc(struct scsi_target *starget)
+{
+	struct domain_device *found_dev = sas_find_target(starget);
+
+	if (!found_dev)
+		return -ENODEV;
+
+	starget->hostdata = found_dev;
+	return 0;
+}
+
+#define SAS_DEF_QD 32
+#define SAS_MAX_QD 64
+
+int sas_slave_configure(struct scsi_device *scsi_dev)
+{
+	struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+	struct sas_ha_struct *sas_ha;
+
+	BUG_ON(dev->rphy->identify.device_type != SAS_END_DEVICE);
+
+	sas_ha = dev->port->ha;
+
+	sas_read_port_mode_page(scsi_dev);
+
+	if (scsi_dev->tagged_supported) {
+		scsi_set_tag_type(scsi_dev, MSG_SIMPLE_TAG);
+		scsi_activate_tcq(scsi_dev, SAS_DEF_QD);
+	} else {
+		SAS_DPRINTK("device %llx, LUN %x doesn't support "
+			    "TCQ\n", SAS_ADDR(dev->sas_addr),
+			    scsi_dev->lun);
+		scsi_dev->tagged_supported = 0;
+		scsi_set_tag_type(scsi_dev, 0);
+		scsi_deactivate_tcq(scsi_dev, 1);
+	}
+
+	return 0;
+}
+
+void sas_slave_destroy(struct scsi_device *scsi_dev)
+{
+}
+
+int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth)
+{
+	int res = min(new_depth, SAS_MAX_QD);
+
+	if (scsi_dev->tagged_supported)
+		scsi_adjust_queue_depth(scsi_dev, scsi_get_tag_type(scsi_dev),
+					res);
+	else {
+		struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
+		sas_printk("device %llx LUN %x queue depth changed to 1\n",
+			   SAS_ADDR(dev->sas_addr),
+			   scsi_dev->lun);
+		scsi_adjust_queue_depth(scsi_dev, 0, 1);
+		res = 1;
+	}
+
+	return res;
+}
+
+int sas_change_queue_type(struct scsi_device *scsi_dev, int qt)
+{
+	if (!scsi_dev->tagged_supported)
+		return 0;
+
+	scsi_deactivate_tcq(scsi_dev, 1);
+
+	scsi_set_tag_type(scsi_dev, qt);
+	scsi_activate_tcq(scsi_dev, scsi_dev->queue_depth);
+
+	return qt;
+}
+
+int sas_bios_param(struct scsi_device *scsi_dev,
+			  struct block_device *bdev,
+			  sector_t capacity, int *hsc)
+{
+	hsc[0] = 255;
+	hsc[1] = 63;
+	sector_div(capacity, 255*63);
+	hsc[2] = capacity;
+
+	return 0;
+}
+
+/* ---------- Task Collector Thread implementation ---------- */
+
+static void sas_queue(struct sas_ha_struct *sas_ha)
+{
+	struct scsi_core *core = &sas_ha->core;
+	unsigned long flags;
+	LIST_HEAD(q);
+	int can_queue;
+	int res;
+	struct sas_internal *i = to_sas_internal(core->shost->transportt);
+
+	spin_lock_irqsave(&core->task_queue_lock, flags);
+	while (!core->queue_thread_kill &&
+	       !list_empty(&core->task_queue)) {
+
+		can_queue = sas_ha->lldd_queue_size - core->task_queue_size;
+		if (can_queue >= 0) {
+			can_queue = core->task_queue_size;
+			list_splice_init(&core->task_queue, &q);
+		} else {
+			struct list_head *a, *n;
+
+			can_queue = sas_ha->lldd_queue_size;
+			list_for_each_safe(a, n, &core->task_queue) {
+				list_move_tail(a, &q);
+				if (--can_queue == 0)
+					break;
+			}
+			can_queue = sas_ha->lldd_queue_size;
+		}
+		core->task_queue_size -= can_queue;
+		spin_unlock_irqrestore(&core->task_queue_lock, flags);
+		{
+			struct sas_task *task = list_entry(q.next,
+							   struct sas_task,
+							   list);
+			list_del_init(&q);
+			res = i->dft->lldd_execute_task(task, can_queue,
+							GFP_KERNEL);
+			if (unlikely(res))
+				__list_add(&q, task->list.prev, &task->list);
+		}
+		spin_lock_irqsave(&core->task_queue_lock, flags);
+		if (res) {
+			list_splice_init(&q, &core->task_queue); /*at head*/
+			core->task_queue_size += can_queue;
+		}
+	}
+	spin_unlock_irqrestore(&core->task_queue_lock, flags);
+}
+
+static DECLARE_COMPLETION(queue_th_comp);
+
+/**
+ * sas_queue_thread -- The Task Collector thread
+ * @_sas_ha: pointer to struct sas_ha
+ */
+static int sas_queue_thread(void *_sas_ha)
+{
+	struct sas_ha_struct *sas_ha = _sas_ha;
+	struct scsi_core *core = &sas_ha->core;
+
+	daemonize("sas_queue_%d", core->shost->host_no);
+	current->flags |= PF_NOFREEZE;
+
+	complete(&queue_th_comp);
+
+	while (1) {
+		down_interruptible(&core->queue_thread_sema);
+		sas_queue(sas_ha);
+		if (core->queue_thread_kill)
+			break;
+	}
+
+	complete(&queue_th_comp);
+
+	return 0;
+}
+
+int sas_init_queue(struct sas_ha_struct *sas_ha)
+{
+	int res;
+	struct scsi_core *core = &sas_ha->core;
+
+	spin_lock_init(&core->task_queue_lock);
+	core->task_queue_size = 0;
+	INIT_LIST_HEAD(&core->task_queue);
+	init_MUTEX_LOCKED(&core->queue_thread_sema);
+
+	res = kernel_thread(sas_queue_thread, sas_ha, 0);
+	if (res >= 0)
+		wait_for_completion(&queue_th_comp);
+
+	return res < 0 ? res : 0;
+}
+
+void sas_shutdown_queue(struct sas_ha_struct *sas_ha)
+{
+	unsigned long flags;
+	struct scsi_core *core = &sas_ha->core;
+	struct sas_task *task, *n;
+
+	init_completion(&queue_th_comp);
+	core->queue_thread_kill = 1;
+	up(&core->queue_thread_sema);
+	wait_for_completion(&queue_th_comp);
+
+	if (!list_empty(&core->task_queue))
+		SAS_DPRINTK("HA: %llx: scsi core task queue is NOT empty!?\n",
+			    SAS_ADDR(sas_ha->sas_addr));
+
+	spin_lock_irqsave(&core->task_queue_lock, flags);
+	list_for_each_entry_safe(task, n, &core->task_queue, list) {
+		struct scsi_cmnd *cmd = task->uldd_task;
+
+		list_del_init(&task->list);
+
+		ASSIGN_SAS_TASK(cmd, NULL);
+		sas_free_task(task);
+		cmd->result = DID_ABORT << 16;
+		cmd->scsi_done(cmd);
+	}
+	spin_unlock_irqrestore(&core->task_queue_lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(sas_queuecommand);
+EXPORT_SYMBOL_GPL(sas_target_alloc);
+EXPORT_SYMBOL_GPL(sas_slave_configure);
+EXPORT_SYMBOL_GPL(sas_slave_destroy);
+EXPORT_SYMBOL_GPL(sas_change_queue_depth);
+EXPORT_SYMBOL_GPL(sas_change_queue_type);
+EXPORT_SYMBOL_GPL(sas_bios_param);
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index d44f9aa..3f7f5f8 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -285,6 +285,7 @@
 	uint32_t cfg_log_verbose;
 	uint32_t cfg_lun_queue_depth;
 	uint32_t cfg_nodev_tmo;
+	uint32_t cfg_devloss_tmo;
 	uint32_t cfg_hba_queue_depth;
 	uint32_t cfg_fcp_class;
 	uint32_t cfg_use_adisc;
@@ -302,6 +303,9 @@
 	uint32_t cfg_poll_tmo;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
+	uint64_t cfg_soft_wwpn;
+
+	uint32_t dev_loss_tmo_changed;
 
 	lpfc_vpd_t vpd;		/* vital product data */
 
@@ -351,6 +355,8 @@
 #define VPD_PORT            0x8         /* valid vpd port data */
 #define VPD_MASK            0xf         /* mask for any vpd data */
 
+	uint8_t soft_wwpn_enable;
+
 	struct timer_list fcp_poll_timer;
 	struct timer_list els_tmofunc;
 
@@ -391,3 +397,5 @@
 	struct list_head list;
 	uint32_t data;
 };
+
+#define FC_REG_DUMP_EVENT	0x10	/* Register for Dump events */
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index d384c16..9496e87 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -39,6 +39,9 @@
 #include "lpfc_compat.h"
 #include "lpfc_crtn.h"
 
+#define LPFC_DEF_DEVLOSS_TMO 30
+#define LPFC_MIN_DEVLOSS_TMO 1
+#define LPFC_MAX_DEVLOSS_TMO 255
 
 static void
 lpfc_jedec_to_ascii(int incr, char hdw[])
@@ -548,6 +551,119 @@
 			 lpfc_board_mode_show, lpfc_board_mode_store);
 static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset);
 
+
+static char *lpfc_soft_wwpn_key = "C99G71SL8032A";
+
+static ssize_t
+lpfc_soft_wwpn_enable_store(struct class_device *cdev, const char *buf,
+				size_t count)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	unsigned int cnt = count;
+
+	/*
+	 * We're doing a simple sanity check for soft_wwpn setting.
+	 * We require that the user write a specific key to enable
+	 * the soft_wwpn attribute to be settable. Once the attribute
+	 * is written, the enable key resets. If further updates are
+	 * desired, the key must be written again to re-enable the
+	 * attribute.
+	 *
+	 * The "key" is not secret - it is a hardcoded string shown
+	 * here. The intent is to protect against the random user or
+	 * application that is just writing attributes.
+	 */
+
+	/* count may include a LF at end of string */
+	if (buf[cnt-1] == '\n')
+		cnt--;
+
+	if ((cnt != strlen(lpfc_soft_wwpn_key)) ||
+	    (strncmp(buf, lpfc_soft_wwpn_key, strlen(lpfc_soft_wwpn_key)) != 0))
+		return -EINVAL;
+
+	phba->soft_wwpn_enable = 1;
+	return count;
+}
+static CLASS_DEVICE_ATTR(lpfc_soft_wwpn_enable, S_IWUSR, NULL,
+				lpfc_soft_wwpn_enable_store);
+
+static ssize_t
+lpfc_soft_wwpn_show(struct class_device *cdev, char *buf)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	return snprintf(buf, PAGE_SIZE, "0x%llx\n", phba->cfg_soft_wwpn);
+}
+
+
+static ssize_t
+lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	struct completion online_compl;
+	int stat1=0, stat2=0;
+	unsigned int i, j, cnt=count;
+	u8 wwpn[8];
+
+	/* count may include a LF at end of string */
+	if (buf[cnt-1] == '\n')
+		cnt--;
+
+	if (!phba->soft_wwpn_enable || (cnt < 16) || (cnt > 18) ||
+	    ((cnt == 17) && (*buf++ != 'x')) ||
+	    ((cnt == 18) && ((*buf++ != '0') || (*buf++ != 'x'))))
+		return -EINVAL;
+
+	phba->soft_wwpn_enable = 0;
+
+	memset(wwpn, 0, sizeof(wwpn));
+
+	/* Validate and store the new name */
+	for (i=0, j=0; i < 16; i++) {
+		if ((*buf >= 'a') && (*buf <= 'f'))
+			j = ((j << 4) | ((*buf++ -'a') + 10));
+		else if ((*buf >= 'A') && (*buf <= 'F'))
+			j = ((j << 4) | ((*buf++ -'A') + 10));
+		else if ((*buf >= '0') && (*buf <= '9'))
+			j = ((j << 4) | (*buf++ -'0'));
+		else
+			return -EINVAL;
+		if (i % 2) {
+			wwpn[i/2] = j & 0xff;
+			j = 0;
+		}
+	}
+	phba->cfg_soft_wwpn = wwn_to_u64(wwpn);
+	fc_host_port_name(host) = phba->cfg_soft_wwpn;
+
+	dev_printk(KERN_NOTICE, &phba->pcidev->dev,
+		   "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no);
+
+	init_completion(&online_compl);
+	lpfc_workq_post_event(phba, &stat1, &online_compl, LPFC_EVT_OFFLINE);
+	wait_for_completion(&online_compl);
+	if (stat1)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0463 lpfc_soft_wwpn attribute set failed to reinit "
+			"adapter - %d\n", phba->brd_no, stat1);
+
+	init_completion(&online_compl);
+	lpfc_workq_post_event(phba, &stat2, &online_compl, LPFC_EVT_ONLINE);
+	wait_for_completion(&online_compl);
+	if (stat2)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0464 lpfc_soft_wwpn attribute set failed to reinit "
+			"adapter - %d\n", phba->brd_no, stat2);
+
+	return (stat1 || stat2) ? -EIO : count;
+}
+static CLASS_DEVICE_ATTR(lpfc_soft_wwpn, S_IRUGO | S_IWUSR,\
+			 lpfc_soft_wwpn_show, lpfc_soft_wwpn_store);
+
+
 static int lpfc_poll = 0;
 module_param(lpfc_poll, int, 0);
 MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
@@ -559,6 +675,123 @@
 			 lpfc_poll_show, lpfc_poll_store);
 
 /*
+# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
+# until the timer expires. Value range is [0,255]. Default value is 30.
+*/
+static int lpfc_nodev_tmo = LPFC_DEF_DEVLOSS_TMO;
+static int lpfc_devloss_tmo = LPFC_DEF_DEVLOSS_TMO;
+module_param(lpfc_nodev_tmo, int, 0);
+MODULE_PARM_DESC(lpfc_nodev_tmo,
+		 "Seconds driver will hold I/O waiting "
+		 "for a device to come back");
+static ssize_t
+lpfc_nodev_tmo_show(struct class_device *cdev, char *buf)
+{
+	struct Scsi_Host *host = class_to_shost(cdev);
+	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+	int val = 0;
+	val = phba->cfg_devloss_tmo;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			phba->cfg_devloss_tmo);
+}
+
+static int
+lpfc_nodev_tmo_init(struct lpfc_hba *phba, int val)
+{
+	static int warned;
+	if (phba->cfg_devloss_tmo !=  LPFC_DEF_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = phba->cfg_devloss_tmo;
+		if (!warned && val != LPFC_DEF_DEVLOSS_TMO) {
+			warned = 1;
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"%d:0402 Ignoring nodev_tmo module "
+					"parameter because devloss_tmo is"
+					" set.\n",
+					phba->brd_no);
+		}
+		return 0;
+	}
+
+	if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = val;
+		phba->cfg_devloss_tmo = val;
+		return 0;
+	}
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0400 lpfc_nodev_tmo attribute cannot be set to %d, "
+			"allowed range is [%d, %d]\n",
+			phba->brd_no, val,
+			LPFC_MIN_DEVLOSS_TMO, LPFC_MAX_DEVLOSS_TMO);
+	phba->cfg_nodev_tmo = LPFC_DEF_DEVLOSS_TMO;
+	return -EINVAL;
+}
+
+static int
+lpfc_nodev_tmo_set(struct lpfc_hba *phba, int val)
+{
+	if (phba->dev_loss_tmo_changed ||
+		(lpfc_devloss_tmo != LPFC_DEF_DEVLOSS_TMO)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"%d:0401 Ignoring change to nodev_tmo "
+				"because devloss_tmo is set.\n",
+				phba->brd_no);
+		return 0;
+	}
+
+	if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = val;
+		phba->cfg_devloss_tmo = val;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0403 lpfc_nodev_tmo attribute cannot be set to %d, "
+			"allowed range is [%d, %d]\n",
+			phba->brd_no, val, LPFC_MIN_DEVLOSS_TMO,
+			LPFC_MAX_DEVLOSS_TMO);
+	return -EINVAL;
+}
+
+lpfc_param_store(nodev_tmo)
+
+static CLASS_DEVICE_ATTR(lpfc_nodev_tmo, S_IRUGO | S_IWUSR,
+			 lpfc_nodev_tmo_show, lpfc_nodev_tmo_store);
+
+/*
+# lpfc_devloss_tmo: If set, it will hold all I/O errors on devices that
+# disappear until the timer expires. Value range is [0,255]. Default
+# value is 30.
+*/
+module_param(lpfc_devloss_tmo, int, 0);
+MODULE_PARM_DESC(lpfc_devloss_tmo,
+		 "Seconds driver will hold I/O waiting "
+		 "for a device to come back");
+lpfc_param_init(devloss_tmo, LPFC_DEF_DEVLOSS_TMO,
+		LPFC_MIN_DEVLOSS_TMO, LPFC_MAX_DEVLOSS_TMO)
+lpfc_param_show(devloss_tmo)
+static int
+lpfc_devloss_tmo_set(struct lpfc_hba *phba, int val)
+{
+	if (val >= LPFC_MIN_DEVLOSS_TMO && val <= LPFC_MAX_DEVLOSS_TMO) {
+		phba->cfg_nodev_tmo = val;
+		phba->cfg_devloss_tmo = val;
+		phba->dev_loss_tmo_changed = 1;
+		return 0;
+	}
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0404 lpfc_devloss_tmo attribute cannot be set to"
+			" %d, allowed range is [%d, %d]\n",
+			phba->brd_no, val, LPFC_MIN_DEVLOSS_TMO,
+			LPFC_MAX_DEVLOSS_TMO);
+	return -EINVAL;
+}
+
+lpfc_param_store(devloss_tmo)
+static CLASS_DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
+	lpfc_devloss_tmo_show, lpfc_devloss_tmo_store);
+
+/*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
 # You can set a bit mask to record specific types of verbose messages:
@@ -617,14 +850,6 @@
 	     "Start scanning for devices from highest ALPA to lowest");
 
 /*
-# lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
-# until the timer expires. Value range is [0,255]. Default value is 30.
-# NOTE: this MUST be less then the SCSI Layer command timeout - 1.
-*/
-LPFC_ATTR_RW(nodev_tmo, 30, 0, 255,
-	     "Seconds driver will hold I/O waiting for a device to come back");
-
-/*
 # lpfc_topology:  link topology for init link
 #            0x0  = attempt loop mode then point-to-point
 #            0x01 = internal loopback mode
@@ -720,6 +945,7 @@
 LPFC_ATTR_RW(poll_tmo, 10, 1, 255,
 	     "Milliseconds driver will wait between polling FCP ring");
 
+
 struct class_device_attribute *lpfc_host_attrs[] = {
 	&class_device_attr_info,
 	&class_device_attr_serialnum,
@@ -737,6 +963,7 @@
 	&class_device_attr_lpfc_lun_queue_depth,
 	&class_device_attr_lpfc_hba_queue_depth,
 	&class_device_attr_lpfc_nodev_tmo,
+	&class_device_attr_lpfc_devloss_tmo,
 	&class_device_attr_lpfc_fcp_class,
 	&class_device_attr_lpfc_use_adisc,
 	&class_device_attr_lpfc_ack0,
@@ -754,6 +981,8 @@
 	&class_device_attr_issue_reset,
 	&class_device_attr_lpfc_poll,
 	&class_device_attr_lpfc_poll_tmo,
+	&class_device_attr_lpfc_soft_wwpn,
+	&class_device_attr_lpfc_soft_wwpn_enable,
 	NULL,
 };
 
@@ -1204,6 +1433,15 @@
 	fc_host_fabric_name(shost) = node_name;
 }
 
+static void
+lpfc_get_host_symbolic_name (struct Scsi_Host *shost)
+{
+	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
+
+	spin_lock_irq(shost->host_lock);
+	lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost));
+	spin_unlock_irq(shost->host_lock);
+}
 
 static struct fc_host_statistics *
 lpfc_get_stats(struct Scsi_Host *shost)
@@ -1441,27 +1679,12 @@
 }
 
 static void
-lpfc_get_rport_loss_tmo(struct fc_rport *rport)
-{
-	/*
-	 * Return the driver's global value for device loss timeout plus
-	 * five seconds to allow the driver's nodev timer to run.
-	 */
-	rport->dev_loss_tmo = lpfc_nodev_tmo + 5;
-}
-
-static void
 lpfc_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
 {
-	/*
-	 * The driver doesn't have a per-target timeout setting.  Set
-	 * this value globally. lpfc_nodev_tmo should be greater then 0.
-	 */
 	if (timeout)
-		lpfc_nodev_tmo = timeout;
+		rport->dev_loss_tmo = timeout;
 	else
-		lpfc_nodev_tmo = 1;
-	rport->dev_loss_tmo = lpfc_nodev_tmo + 5;
+		rport->dev_loss_tmo = 1;
 }
 
 
@@ -1486,7 +1709,6 @@
 	.show_host_port_name = 1,
 	.show_host_supported_classes = 1,
 	.show_host_supported_fc4s = 1,
-	.show_host_symbolic_name = 1,
 	.show_host_supported_speeds = 1,
 	.show_host_maxframe_size = 1,
 
@@ -1509,6 +1731,9 @@
 	.get_host_fabric_name = lpfc_get_host_fabric_name,
 	.show_host_fabric_name = 1,
 
+	.get_host_symbolic_name = lpfc_get_host_symbolic_name,
+	.show_host_symbolic_name = 1,
+
 	/*
 	 * The LPFC driver treats linkdown handling as target loss events
 	 * so there are no sysfs handlers for link_down_tmo.
@@ -1521,7 +1746,6 @@
 	.show_rport_maxframe_size = 1,
 	.show_rport_supported_classes = 1,
 
-	.get_rport_dev_loss_tmo = lpfc_get_rport_loss_tmo,
 	.set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo,
 	.show_rport_dev_loss_tmo = 1,
 
@@ -1535,6 +1759,8 @@
 	.show_starget_port_name = 1,
 
 	.issue_fc_host_lip = lpfc_issue_lip,
+	.dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk,
+	.terminate_rport_io = lpfc_terminate_rport_io,
 };
 
 void
@@ -1550,14 +1776,15 @@
 	lpfc_ack0_init(phba, lpfc_ack0);
 	lpfc_topology_init(phba, lpfc_topology);
 	lpfc_scan_down_init(phba, lpfc_scan_down);
-	lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
 	lpfc_link_speed_init(phba, lpfc_link_speed);
 	lpfc_fdmi_on_init(phba, lpfc_fdmi_on);
 	lpfc_discovery_threads_init(phba, lpfc_discovery_threads);
 	lpfc_max_luns_init(phba, lpfc_max_luns);
 	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
-
+	lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo);
+	lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
 	phba->cfg_poll = lpfc_poll;
+	phba->cfg_soft_wwpn = 0L;
 
 	/*
 	 * The total number of segments is the configuration value plus 2
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 2a17646..3d68449 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -18,6 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
+struct fc_rport;
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
 void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb,
@@ -200,6 +201,8 @@
 extern struct fc_function_template lpfc_transport_functions;
 
 void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp);
+void lpfc_terminate_rport_io(struct fc_rport *);
+void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport);
 
 #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
 #define HBA_EVENT_RSCN                   5
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index bbb7310..ae41064 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -324,7 +324,6 @@
 	struct lpfc_sli_ct_request *Response =
 		(struct lpfc_sli_ct_request *) mp->virt;
 	struct lpfc_nodelist *ndlp = NULL;
-	struct lpfc_nodelist *next_ndlp;
 	struct lpfc_dmabuf *mlast, *next_mp;
 	uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
 	uint32_t Did;
@@ -399,30 +398,6 @@
  	 * current driver state.
  	 */
 	if (phba->hba_state == LPFC_HBA_READY) {
-
-		/*
-		 * Switch ports that connect a loop of multiple targets need
-		 * special consideration.  The driver wants to unregister the
-		 * rpi only on the target that was pulled from the loop.  On
-		 * RSCN, the driver wants to rediscover an NPort only if the
-		 * driver flagged it as NLP_NPR_2B_DISC.  Provided adisc is
-		 * not enabled and the NPort is not capable of retransmissions
-		 * (FC Tape) prevent timing races with the scsi error handler by
-		 * unregistering the Nport's RPI.  This action causes all
-		 * outstanding IO to flush back to the midlayer.
-		 */
-		list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_npr_list,
-					 nlp_listp) {
-			if (!(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
-			    (lpfc_rscn_payload_check(phba, ndlp->nlp_DID))) {
-				if ((phba->cfg_use_adisc == 0) &&
-				    !(ndlp->nlp_fcp_info &
-				      NLP_FCP_2_DEVICE)) {
-					lpfc_unreg_rpi(phba, ndlp);
-					ndlp->nlp_flag &= ~NLP_NPR_ADISC;
-				}
-			}
-		}
 		lpfc_els_flush_rscn(phba);
 		spin_lock_irq(phba->host->host_lock);
 		phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 41cf5d3..9766f90 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -30,7 +30,6 @@
 
 /* worker thread events */
 enum lpfc_work_type {
-	LPFC_EVT_NODEV_TMO,
 	LPFC_EVT_ONLINE,
 	LPFC_EVT_OFFLINE,
 	LPFC_EVT_WARM_START,
@@ -74,11 +73,9 @@
 #define NLP_FCP_2_DEVICE   0x10			/* FCP-2 device */
 
 	struct timer_list   nlp_delayfunc;	/* Used for delayed ELS cmds */
-	struct timer_list   nlp_tmofunc;	/* Used for nodev tmo */
 	struct fc_rport *rport;			/* Corresponding FC transport
 						   port structure */
 	struct lpfc_hba      *nlp_phba;
-	struct lpfc_work_evt nodev_timeout_evt;
 	struct lpfc_work_evt els_retry_evt;
 	unsigned long last_ramp_up_time;        /* jiffy of last ramp up */
 	unsigned long last_q_full_time;		/* jiffy of last queue full */
@@ -102,7 +99,6 @@
 #define NLP_LOGO_SND       0x100	/* sent LOGO request for this entry */
 #define NLP_RNID_SND       0x400	/* sent RNID request for this entry */
 #define NLP_ELS_SND_MASK   0x7e0	/* sent ELS request for this entry */
-#define NLP_NODEV_TMO      0x10000	/* nodev timeout is running for node */
 #define NLP_DELAY_TMO      0x20000	/* delay timeout is running for node */
 #define NLP_NPR_2B_DISC    0x40000	/* node is included in num_disc_nodes */
 #define NLP_RCV_PLOGI      0x80000	/* Rcv'ed PLOGI from remote system */
@@ -169,7 +165,7 @@
  */
 /*
  * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped
- * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers
+ * lists will receive a DEVICE_RECOVERY event. If the linkdown or devloss timers
  * expire, all effected nodes will receive a DEVICE_RM event.
  */
 /*
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3567de61..71864cdc 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2506,6 +2506,7 @@
 	uint32_t *lp;
 	IOCB_t *icmd;
 	uint32_t payload_len, cmd;
+	int i;
 
 	icmd = &cmdiocb->iocb;
 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
@@ -2524,6 +2525,10 @@
 			phba->brd_no,
 			phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt);
 
+	for (i = 0; i < payload_len/sizeof(uint32_t); i++)
+		fc_host_post_event(phba->host, fc_get_event_number(),
+			FCH_EVT_RSCN, lp[i]);
+
 	/* If we are about to begin discovery, just ACC the RSCN.
 	 * Discovery processing will satisfy it.
 	 */
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index b2f1552..d586c3d 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -56,28 +56,63 @@
 
 static void lpfc_disc_timeout_handler(struct lpfc_hba *);
 
-static void
-lpfc_process_nodev_timeout(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+void
+lpfc_terminate_rport_io(struct fc_rport *rport)
 {
-	uint8_t *name = (uint8_t *)&ndlp->nlp_portname;
-	int warn_on = 0;
+	struct lpfc_rport_data *rdata;
+	struct lpfc_nodelist * ndlp;
+	struct lpfc_hba *phba;
 
-	spin_lock_irq(phba->host->host_lock);
-	if (!(ndlp->nlp_flag & NLP_NODEV_TMO)) {
-		spin_unlock_irq(phba->host->host_lock);
+	rdata = rport->dd_data;
+	ndlp = rdata->pnode;
+
+	if (!ndlp) {
+		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
+			printk(KERN_ERR "Cannot find remote node"
+			" to terminate I/O Data x%x\n",
+			rport->port_id);
 		return;
 	}
 
-	/*
-	 * If a discovery event readded nodev_timer after timer
-	 * firing and before processing the timer, cancel the
-	 * nlp_tmofunc.
-	 */
-	spin_unlock_irq(phba->host->host_lock);
-	del_timer_sync(&ndlp->nlp_tmofunc);
-	spin_lock_irq(phba->host->host_lock);
+	phba = ndlp->nlp_phba;
 
-	ndlp->nlp_flag &= ~NLP_NODEV_TMO;
+	spin_lock_irq(phba->host->host_lock);
+	if (ndlp->nlp_sid != NLP_NO_SID) {
+		lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+			ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+	}
+	spin_unlock_irq(phba->host->host_lock);
+
+	return;
+}
+
+/*
+ * This function will be called when dev_loss_tmo fire.
+ */
+void
+lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
+{
+	struct lpfc_rport_data *rdata;
+	struct lpfc_nodelist * ndlp;
+	uint8_t *name;
+	int warn_on = 0;
+	struct lpfc_hba *phba;
+
+	rdata = rport->dd_data;
+	ndlp = rdata->pnode;
+
+	if (!ndlp) {
+		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
+			printk(KERN_ERR "Cannot find remote node"
+			" for rport in dev_loss_tmo_callbk x%x\n",
+			rport->port_id);
+		return;
+	}
+
+	name = (uint8_t *)&ndlp->nlp_portname;
+	phba = ndlp->nlp_phba;
+
+	spin_lock_irq(phba->host->host_lock);
 
 	if (ndlp->nlp_sid != NLP_NO_SID) {
 		warn_on = 1;
@@ -85,11 +120,14 @@
 		lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
 			ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
 	}
+	if (phba->fc_flag & FC_UNLOADING)
+		warn_on = 0;
+
 	spin_unlock_irq(phba->host->host_lock);
 
 	if (warn_on) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
-				"%d:0203 Nodev timeout on "
+				"%d:0203 Devloss timeout on "
 				"WWPN %x:%x:%x:%x:%x:%x:%x:%x "
 				"NPort x%x Data: x%x x%x x%x\n",
 				phba->brd_no,
@@ -99,7 +137,7 @@
 				ndlp->nlp_state, ndlp->nlp_rpi);
 	} else {
 		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
-				"%d:0204 Nodev timeout on "
+				"%d:0204 Devloss timeout on "
 				"WWPN %x:%x:%x:%x:%x:%x:%x:%x "
 				"NPort x%x Data: x%x x%x x%x\n",
 				phba->brd_no,
@@ -109,7 +147,12 @@
 				ndlp->nlp_state, ndlp->nlp_rpi);
 	}
 
-	lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
+	ndlp->rport = NULL;
+	rdata->pnode = NULL;
+
+	if (!(phba->fc_flag & FC_UNLOADING))
+		lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
+
 	return;
 }
 
@@ -127,11 +170,6 @@
 		spin_unlock_irq(phba->host->host_lock);
 		free_evt = 1;
 		switch (evtp->evt) {
-		case LPFC_EVT_NODEV_TMO:
-			ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
-			lpfc_process_nodev_timeout(phba, ndlp);
-			free_evt = 0;
-			break;
 		case LPFC_EVT_ELS_RETRY:
 			ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
 			lpfc_els_retry_delay_handler(ndlp);
@@ -340,6 +378,9 @@
 		spin_unlock_irq(phba->host->host_lock);
 	}
 
+	fc_host_post_event(phba->host, fc_get_event_number(),
+			FCH_EVT_LINKDOWN, 0);
+
 	/* Clean up any firmware default rpi's */
 	if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
 		lpfc_unreg_did(phba, 0xffffffff, mb);
@@ -374,16 +415,6 @@
 			rc = lpfc_disc_state_machine(phba, ndlp, NULL,
 					     NLP_EVT_DEVICE_RECOVERY);
 
-			/* Check config parameter use-adisc or FCP-2 */
-			if ((rc != NLP_STE_FREED_NODE) &&
-				(phba->cfg_use_adisc == 0) &&
-				!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE)) {
-				/* We know we will have to relogin, so
-				 * unreglogin the rpi right now to fail
-				 * any outstanding I/Os quickly.
-				 */
-				lpfc_unreg_rpi(phba, ndlp);
-			}
 		}
 	}
 
@@ -427,6 +458,9 @@
 	struct list_head *listp, *node_list[7];
 	int i;
 
+	fc_host_post_event(phba->host, fc_get_event_number(),
+			FCH_EVT_LINKUP, 0);
+
 	spin_lock_irq(phba->host->host_lock);
 	phba->hba_state = LPFC_LINK_UP;
 	phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
@@ -638,6 +672,8 @@
 
 	memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt,
 	       sizeof (struct serv_parm));
+	if (phba->cfg_soft_wwpn)
+		u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
 	memcpy((uint8_t *) & phba->fc_nodename,
 	       (uint8_t *) & phba->fc_sparam.nodeName,
 	       sizeof (struct lpfc_name));
@@ -1098,8 +1134,11 @@
 	struct fc_rport *rport = ndlp->rport;
 	struct lpfc_rport_data *rdata = rport->dd_data;
 
-	ndlp->rport = NULL;
-	rdata->pnode = NULL;
+	if (rport->scsi_target_id == -1) {
+		ndlp->rport = NULL;
+		rdata->pnode = NULL;
+	}
+
 	fc_remote_port_delete(rport);
 
 	return;
@@ -1227,17 +1266,6 @@
 		list_add_tail(&nlp->nlp_listp, &phba->fc_nlpunmap_list);
 		phba->fc_unmap_cnt++;
 		phba->nport_event_cnt++;
-		/* stop nodev tmo if running */
-		if (nlp->nlp_flag & NLP_NODEV_TMO) {
-			nlp->nlp_flag &= ~NLP_NODEV_TMO;
-			spin_unlock_irq(phba->host->host_lock);
-			del_timer_sync(&nlp->nlp_tmofunc);
-			spin_lock_irq(phba->host->host_lock);
-			if (!list_empty(&nlp->nodev_timeout_evt.evt_listp))
-				list_del_init(&nlp->nodev_timeout_evt.
-						evt_listp);
-
-		}
 		nlp->nlp_flag &= ~NLP_NODEV_REMOVE;
 		nlp->nlp_type |= NLP_FC_NODE;
 		break;
@@ -1248,17 +1276,6 @@
 		list_add_tail(&nlp->nlp_listp, &phba->fc_nlpmap_list);
 		phba->fc_map_cnt++;
 		phba->nport_event_cnt++;
-		/* stop nodev tmo if running */
-		if (nlp->nlp_flag & NLP_NODEV_TMO) {
-			nlp->nlp_flag &= ~NLP_NODEV_TMO;
-			spin_unlock_irq(phba->host->host_lock);
-			del_timer_sync(&nlp->nlp_tmofunc);
-			spin_lock_irq(phba->host->host_lock);
-			if (!list_empty(&nlp->nodev_timeout_evt.evt_listp))
-				list_del_init(&nlp->nodev_timeout_evt.
-						evt_listp);
-
-		}
 		nlp->nlp_flag &= ~NLP_NODEV_REMOVE;
 		break;
 	case NLP_NPR_LIST:
@@ -1267,11 +1284,6 @@
 		list_add_tail(&nlp->nlp_listp, &phba->fc_npr_list);
 		phba->fc_npr_cnt++;
 
-		if (!(nlp->nlp_flag & NLP_NODEV_TMO))
-			mod_timer(&nlp->nlp_tmofunc,
-		 			jiffies + HZ * phba->cfg_nodev_tmo);
-
-		nlp->nlp_flag |= NLP_NODEV_TMO;
 		nlp->nlp_flag &= ~NLP_RCV_PLOGI;
 		break;
 	case NLP_JUST_DQ:
@@ -1301,7 +1313,8 @@
 			 * already. If we have, and it's a scsi entity, be
 			 * sure to unblock any attached scsi devices
 			 */
-			if (!nlp->rport)
+			if ((!nlp->rport) || (nlp->rport->port_state ==
+					FC_PORTSTATE_BLOCKED))
 				lpfc_register_remote_port(phba, nlp);
 
 			/*
@@ -1575,15 +1588,12 @@
 
 	lpfc_els_abort(phba,ndlp,0);
 	spin_lock_irq(phba->host->host_lock);
-	ndlp->nlp_flag &= ~(NLP_NODEV_TMO|NLP_DELAY_TMO);
+	ndlp->nlp_flag &= ~NLP_DELAY_TMO;
 	spin_unlock_irq(phba->host->host_lock);
-	del_timer_sync(&ndlp->nlp_tmofunc);
 
 	ndlp->nlp_last_elscmd = 0;
 	del_timer_sync(&ndlp->nlp_delayfunc);
 
-	if (!list_empty(&ndlp->nodev_timeout_evt.evt_listp))
-		list_del_init(&ndlp->nodev_timeout_evt.evt_listp);
 	if (!list_empty(&ndlp->els_retry_evt.evt_listp))
 		list_del_init(&ndlp->els_retry_evt.evt_listp);
 
@@ -1600,16 +1610,6 @@
 int
 lpfc_nlp_remove(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
 {
-	if (ndlp->nlp_flag & NLP_NODEV_TMO) {
-		spin_lock_irq(phba->host->host_lock);
-		ndlp->nlp_flag &= ~NLP_NODEV_TMO;
-		spin_unlock_irq(phba->host->host_lock);
-		del_timer_sync(&ndlp->nlp_tmofunc);
-		if (!list_empty(&ndlp->nodev_timeout_evt.evt_listp))
-			list_del_init(&ndlp->nodev_timeout_evt.evt_listp);
-
-	}
-
 
 	if (ndlp->nlp_flag & NLP_DELAY_TMO) {
 		lpfc_cancel_retry_delay_tmo(phba, ndlp);
@@ -2424,34 +2424,6 @@
 	return;
 }
 
-static void
-lpfc_nodev_timeout(unsigned long ptr)
-{
-	struct lpfc_hba *phba;
-	struct lpfc_nodelist *ndlp;
-	unsigned long iflag;
-	struct lpfc_work_evt  *evtp;
-
-	ndlp = (struct lpfc_nodelist *)ptr;
-	phba = ndlp->nlp_phba;
-	evtp = &ndlp->nodev_timeout_evt;
-	spin_lock_irqsave(phba->host->host_lock, iflag);
-
-	if (!list_empty(&evtp->evt_listp)) {
-		spin_unlock_irqrestore(phba->host->host_lock, iflag);
-		return;
-	}
-	evtp->evt_arg1  = ndlp;
-	evtp->evt       = LPFC_EVT_NODEV_TMO;
-	list_add_tail(&evtp->evt_listp, &phba->work_list);
-	if (phba->work_wait)
-		wake_up(phba->work_wait);
-
-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
-	return;
-}
-
-
 /*
  * This routine handles processing a NameServer REG_LOGIN mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
@@ -2575,11 +2547,7 @@
 		 uint32_t did)
 {
 	memset(ndlp, 0, sizeof (struct lpfc_nodelist));
-	INIT_LIST_HEAD(&ndlp->nodev_timeout_evt.evt_listp);
 	INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
-	init_timer(&ndlp->nlp_tmofunc);
-	ndlp->nlp_tmofunc.function = lpfc_nodev_timeout;
-	ndlp->nlp_tmofunc.data = (unsigned long)ndlp;
 	init_timer(&ndlp->nlp_delayfunc);
 	ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
 	ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index f6948ff..4cdf346 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -268,6 +268,8 @@
 	kfree(mp);
 	pmb->context1 = NULL;
 
+	if (phba->cfg_soft_wwpn)
+		u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
 	memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName,
 	       sizeof (struct lpfc_name));
 	memcpy(&phba->fc_portname, &phba->fc_sparam.portName,
@@ -511,6 +513,7 @@
 {
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring  *pring;
+	uint32_t event_data;
 
 	if (phba->work_hs & HS_FFER6) {
 		/* Re-establishing Link */
@@ -555,6 +558,11 @@
 				phba->brd_no, phba->work_hs,
 				phba->work_status[0], phba->work_status[1]);
 
+		event_data = FC_REG_DUMP_EVENT;
+		fc_host_post_vendor_event(phba->host, fc_get_event_number(),
+				sizeof(event_data), (char *) &event_data,
+				SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
+
 		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
 		lpfc_offline(phba);
 		phba->hba_state = LPFC_HBA_ERROR;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 20449a8..d5f4150 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1813,7 +1813,7 @@
  */
 /*
  * For a Link Down, all nodes on the ADISC, PLOGI, unmapped or mapped
- * lists will receive a DEVICE_RECOVERY event. If the linkdown or nodev timers
+ * lists will receive a DEVICE_RECOVERY event. If the linkdown or devloss timers
  * expire, all effected nodes will receive a DEVICE_RM event.
  */
 /*
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index a8816a8..97ae98d 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -935,7 +935,7 @@
 			schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ);
 		spin_lock_irq(phba->host->host_lock);
 		if (++loop_count
-		    > (2 * phba->cfg_nodev_tmo)/LPFC_ABORT_WAIT)
+		    > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT)
 			break;
 	}
 
@@ -978,7 +978,7 @@
 	spin_lock_irq(shost->host_lock);
 	/*
 	 * If target is not in a MAPPED state, delay the reset until
-	 * target is rediscovered or nodev timeout expires.
+	 * target is rediscovered or devloss timeout expires.
 	 */
 	while ( 1 ) {
 		if (!pnode)
@@ -1050,7 +1050,7 @@
 		spin_lock_irq(phba->host->host_lock);
 
 		if (++loopcnt
-		    > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT)
+		    > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
 			break;
 
 		cnt = lpfc_sli_sum_iocb(phba,
@@ -1151,7 +1151,7 @@
 		spin_lock_irq(phba->host->host_lock);
 
 		if (++loopcnt
-		    > (2 * phba->cfg_nodev_tmo)/LPFC_RESET_WAIT)
+		    > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
 			break;
 
 		cnt = lpfc_sli_sum_iocb(phba,
@@ -1249,7 +1249,7 @@
 	 * target pointer is stored in the starget_data for the
 	 * driver's sysfs entry point functions.
 	 */
-	rport->dev_loss_tmo = phba->cfg_nodev_tmo + 5;
+	rport->dev_loss_tmo = phba->cfg_devloss_tmo;
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_poll_fcp_ring(phba);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index c7091ea..ac41790 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.1.9"
+#define LPFC_DRIVER_VERSION "8.1.10"
 
 #define LPFC_DRIVER_NAME "lpfc"
 
diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c
index 89ef34d..6422de7 100644
--- a/drivers/scsi/mac53c94.c
+++ b/drivers/scsi/mac53c94.c
@@ -431,7 +431,7 @@
 	struct fsc_state *state;
 	struct Scsi_Host *host;
 	void *dma_cmd_space;
-	unsigned char *clkprop;
+	const unsigned char *clkprop;
 	int proplen, rc = -ENODEV;
 
 	if (macio_resource_count(mdev) != 2 || macio_irq_count(mdev) != 2) {
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 76edbb6..b87bef6 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2822,9 +2822,7 @@
 
 	i = scsi_inq[0] & 0x1f;
 
-	len += sprintf(page+len, "  Type:   %s ",
-		i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] :
-		   "Unknown          ");
+	len += sprintf(page+len, "  Type:   %s ", scsi_device_type(i));
 
 	len += sprintf(page+len,
 	"                 ANSI SCSI revision: %02x", scsi_inq[2] & 0x07);
@@ -3658,8 +3656,9 @@
 			 * Send the request sense data also, irrespective of
 			 * whether the user has asked for it or not.
 			 */
-			copy_to_user(upthru->reqsensearea,
-					pthru->reqsensearea, 14);
+			if (copy_to_user(upthru->reqsensearea,
+					pthru->reqsensearea, 14))
+				rval = -EFAULT;
 
 freemem_and_return:
 			if( pthru->dataxferlen ) {
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index cd982c8..266b391 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -330,6 +330,21 @@
 	NULL,
 };
 
+/**
+ * megaraid_change_queue_depth - Change the device's queue depth
+ * @sdev:	scsi device struct
+ * @qdepth:	depth to set
+ *
+ * Return value:
+ * 	actual depth set
+ **/
+static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth)
+{
+	if (qdepth > MBOX_MAX_SCSI_CMDS)
+		qdepth = MBOX_MAX_SCSI_CMDS;
+	scsi_adjust_queue_depth(sdev, 0, qdepth);
+	return sdev->queue_depth;
+}
 
 /*
  * Scsi host template for megaraid unified driver
@@ -343,6 +358,7 @@
 	.eh_device_reset_handler	= megaraid_reset_handler,
 	.eh_bus_reset_handler		= megaraid_reset_handler,
 	.eh_host_reset_handler		= megaraid_reset_handler,
+	.change_queue_depth		= megaraid_change_queue_depth,
 	.use_clustering			= ENABLE_CLUSTERING,
 	.sdev_attrs			= megaraid_sdev_attrs,
 	.shost_attrs			= megaraid_shost_attrs,
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index a8c9627..4cab5b5 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -53,31 +53,15 @@
  */
 static struct pci_device_id megasas_pci_table[] = {
 
-	{
-	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_SAS1064R, /* xscale IOP */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	 },
-	{
-	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_SAS1078R, /* ppc IOP */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	},
-	{
-	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_VERDE_ZCR,	/* xscale IOP, vega */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	 },
-	{
-	 PCI_VENDOR_ID_DELL,
-	 PCI_DEVICE_ID_DELL_PERC5, /* xscale IOP */
-	 PCI_ANY_ID,
-	 PCI_ANY_ID,
-	 },
-	{0}			/* Terminating entry */
+	{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064R)},
+	/* xscale IOP */
+	{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1078R)},
+	/* ppc IOP */
+	{PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_VERDE_ZCR)},
+	/* xscale IOP, vega */
+	{PCI_DEVICE(PCI_VENDOR_ID_DELL, PCI_DEVICE_ID_DELL_PERC5)},
+	/* xscale IOP */
+	{}
 };
 
 MODULE_DEVICE_TABLE(pci, megasas_pci_table);
@@ -2854,7 +2838,7 @@
 	/*
 	 * Register ourselves as PCI hotplug module
 	 */
-	rval = pci_module_init(&megasas_pci_driver);
+	rval = pci_register_driver(&megasas_pci_driver);
 
 	if (rval) {
 		printk(KERN_DEBUG "megasas: PCI hotplug regisration failed \n");
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 5572981..592b52a 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1850,7 +1850,8 @@
 {
 	struct device_node *mesh = macio_get_of_node(mdev);
 	struct pci_dev* pdev = macio_get_pci_dev(mdev);
-	int tgt, *cfp, minper;
+	int tgt, minper;
+	const int *cfp;
 	struct mesh_state *ms;
 	struct Scsi_Host *mesh_host;
 	void *dma_cmd_space;
@@ -1939,7 +1940,7 @@
 	       	ms->tgts[tgt].current_req = NULL;
        	}
 
-	if ((cfp = (int *) get_property(mesh, "clock-frequency", NULL)))
+	if ((cfp = get_property(mesh, "clock-frequency", NULL)))
        		ms->clk_freq = *cfp;
 	else {
        		printk(KERN_INFO "mesh: assuming 50MHz clock frequency\n");
diff --git a/drivers/scsi/mvme147.c b/drivers/scsi/mvme147.c
index cb367c2..9b991b7 100644
--- a/drivers/scsi/mvme147.c
+++ b/drivers/scsi/mvme147.c
@@ -29,7 +29,7 @@
     return IRQ_HANDLED;
 }
 
-static int dma_setup (Scsi_Cmnd *cmd, int dir_in)
+static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
     unsigned char flags = 0x01;
     unsigned long addr = virt_to_bus(cmd->SCp.ptr);
@@ -57,7 +57,7 @@
     return 0;
 }
 
-static void dma_stop (struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 		      int status)
 {
     m147_pcc->dma_cntrl = 0;
@@ -112,7 +112,7 @@
     return 0;
 }
 
-static int mvme147_bus_reset(Scsi_Cmnd *cmd)
+static int mvme147_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/mvme147.h b/drivers/scsi/mvme147.h
index 2f56d69..32aee85 100644
--- a/drivers/scsi/mvme147.h
+++ b/drivers/scsi/mvme147.h
@@ -12,10 +12,6 @@
 
 int mvme147_detect(struct scsi_host_template *);
 int mvme147_release(struct Scsi_Host *);
-const char *wd33c93_info(void);
-int wd33c93_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-int wd33c93_abort(Scsi_Cmnd *);
-int wd33c93_reset(Scsi_Cmnd *, unsigned int);
 
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN 2
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index b332cad..c51b576 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -96,24 +96,40 @@
 EXPORT_SYMBOL(scsi_logging_level);
 #endif
 
-const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] = {
-	"Direct-Access    ",
-	"Sequential-Access",
+static const char *const scsi_device_types[] = {
+	"Direct access    ",
+	"Sequential access",
 	"Printer          ",
 	"Processor        ",
 	"WORM             ",
-	"CD-ROM           ",
+	"CD/DVD           ",
 	"Scanner          ",
-	"Optical Device   ",
-	"Medium Changer   ",
+	"Optical memory   ",
+	"Media changer    ",
 	"Communications   ",
-	"Unknown          ",
-	"Unknown          ",
+	"ASC IT8          ",
+	"ASC IT8          ",
 	"RAID             ",
 	"Enclosure        ",
-	"Direct-Access-RBC",
+	"Direct access RBC",
+	"Optical card     ",
+	"Bridge controller",
+	"Object storage   ",
+	"Automation/Drive ",
 };
-EXPORT_SYMBOL(scsi_device_types);
+
+const char * scsi_device_type(unsigned type)
+{
+	if (type == 0x1e)
+		return "Well-known LUN   ";
+	if (type == 0x1f)
+		return "No Device        ";
+	if (type > ARRAY_SIZE(scsi_device_types))
+		return "Unknown          ";
+	return scsi_device_types[type];
+}
+
+EXPORT_SYMBOL(scsi_device_type);
 
 struct scsi_host_cmd_pool {
 	kmem_cache_t	*slab;
@@ -835,14 +851,14 @@
  */
 int scsi_device_get(struct scsi_device *sdev)
 {
-	if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL)
+	if (sdev->sdev_state == SDEV_DEL)
 		return -ENXIO;
 	if (!get_device(&sdev->sdev_gendev))
 		return -ENXIO;
-	if (!try_module_get(sdev->host->hostt->module)) {
-		put_device(&sdev->sdev_gendev);
-		return -ENXIO;
-	}
+	/* We can fail this if we're doing SCSI operations
+	 * from module exit (like cache flush) */
+	try_module_get(sdev->host->hostt->module);
+
 	return 0;
 }
 EXPORT_SYMBOL(scsi_device_get);
@@ -857,7 +873,14 @@
  */
 void scsi_device_put(struct scsi_device *sdev)
 {
-	module_put(sdev->host->hostt->module);
+	struct module *module = sdev->host->hostt->module;
+
+#ifdef CONFIG_MODULE_UNLOAD
+	/* The module refcount will be zero if scsi_device_get()
+	 * was called from a module removal routine */
+	if (module && module_refcount(module) != 0)
+		module_put(module);
+#endif
 	put_device(&sdev->sdev_gendev);
 }
 EXPORT_SYMBOL(scsi_device_put);
@@ -1099,6 +1122,8 @@
 	for_each_possible_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
 
+	scsi_netlink_init();
+
 	printk(KERN_NOTICE "SCSI subsystem initialized\n");
 	return 0;
 
@@ -1119,6 +1144,7 @@
 
 static void __exit exit_scsi(void)
 {
+	scsi_netlink_exit();
 	scsi_sysfs_unregister();
 	scsi_exit_sysctl();
 	scsi_exit_hosts();
diff --git a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
index f51e466..d5a55fa 100644
--- a/drivers/scsi/scsi.h
+++ b/drivers/scsi/scsi.h
@@ -20,8 +20,6 @@
 #ifndef _SCSI_H
 #define _SCSI_H
 
-#include <linux/config.h>	    /* for CONFIG_SCSI_LOGGING */
-
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_eh.h>
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a80303c..9c0f358 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1,5 +1,4 @@
 /*
- *  linux/kernel/scsi_debug.c
  * vvvvvvvvvvvvvvvvvvvvvvv Original vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
  *  Copyright (C) 1992  Eric Youngdale
  *  Simulate a host adapter with 2 disks attached.  Do a lot of checking
@@ -8,7 +7,9 @@
  * ^^^^^^^^^^^^^^^^^^^^^^^ Original ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  *
  *  This version is more generic, simulating a variable number of disk
- *  (or disk like devices) sharing a common amount of RAM
+ *  (or disk like devices) sharing a common amount of RAM. To be more
+ *  realistic, the simulated devices have the transport attributes of
+ *  SAS disks.
  *
  *
  *  For documentation see http://www.torque.net/sg/sdebug26.html
@@ -50,8 +51,8 @@
 #include "scsi_logging.h"
 #include "scsi_debug.h"
 
-#define SCSI_DEBUG_VERSION "1.79"
-static const char * scsi_debug_version_date = "20060604";
+#define SCSI_DEBUG_VERSION "1.80"
+static const char * scsi_debug_version_date = "20060914";
 
 /* Additional Sense Code (ASC) used */
 #define NO_ADDITIONAL_SENSE 0x0
@@ -86,6 +87,8 @@
 #define DEF_D_SENSE   0
 #define DEF_NO_LUN_0   0
 #define DEF_VIRTUAL_GB   0
+#define DEF_FAKE_RW	0
+#define DEF_VPD_USE_HOSTNO 1
 
 /* bit mask values for scsi_debug_opts */
 #define SCSI_DEBUG_OPT_NOISE   1
@@ -127,6 +130,8 @@
 static int scsi_debug_dsense = DEF_D_SENSE;
 static int scsi_debug_no_lun_0 = DEF_NO_LUN_0;
 static int scsi_debug_virtual_gb = DEF_VIRTUAL_GB;
+static int scsi_debug_fake_rw = DEF_FAKE_RW;
+static int scsi_debug_vpd_use_hostno = DEF_VPD_USE_HOSTNO;
 
 static int scsi_debug_cmnd_count = 0;
 
@@ -423,6 +428,8 @@
 	case READ_6:
 		if ((errsts = check_readiness(SCpnt, 0, devip)))
 			break;
+		if (scsi_debug_fake_rw)
+			break;
 		if ((*cmd) == READ_16) {
 			for (lba = 0, j = 0; j < 8; ++j) {
 				if (j > 0)
@@ -465,6 +472,8 @@
 	case WRITE_6:
 		if ((errsts = check_readiness(SCpnt, 0, devip)))
 			break;
+		if (scsi_debug_fake_rw)
+			break;
 		if ((*cmd) == WRITE_16) {
 			for (lba = 0, j = 0; j < 8; ++j) {
 				if (j > 0)
@@ -941,6 +950,8 @@
 		char lu_id_str[6];
 		int host_no = devip->sdbg_host->shost->host_no;
 		
+		if (0 == scsi_debug_vpd_use_hostno)
+			host_no = 0;
 		lu_id_num = devip->wlun ? -1 : (((host_no + 1) * 2000) +
 			    (devip->target * 1000) + devip->lun);
 		target_dev_id = ((host_no + 1) * 2000) +
@@ -1059,19 +1070,6 @@
 			arr[12] = THRESHOLD_EXCEEDED;
 			arr[13] = 0xff;		/* TEST set and MRIE==6 */
 		}
-	} else if (devip->stopped) {
-		if (want_dsense) {
-			arr[0] = 0x72;
-			arr[1] = 0x0;		/* NO_SENSE in sense_key */
-			arr[2] = LOW_POWER_COND_ON;
-			arr[3] = 0x0;		/* TEST set and MRIE==6 */
-		} else {
-			arr[0] = 0x70;
-			arr[2] = 0x0;		/* NO_SENSE in sense_key */
-			arr[7] = 0xa;   	/* 18 byte sense buffer */
-			arr[12] = LOW_POWER_COND_ON;
-			arr[13] = 0x0;		/* TEST set and MRIE==6 */
-		}
 	} else {
 		memcpy(arr, sbuff, SDEBUG_SENSE_LEN);
 		if ((cmd[1] & 1) && (! scsi_debug_dsense)) {
@@ -1325,21 +1323,26 @@
 static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 			   struct sdebug_dev_info * devip)
 {
-	unsigned char dbd;
-	int pcontrol, pcode, subpcode;
+	unsigned char dbd, llbaa;
+	int pcontrol, pcode, subpcode, bd_len;
 	unsigned char dev_spec;
-	int alloc_len, msense_6, offset, len, errsts, target_dev_id;
+	int k, alloc_len, msense_6, offset, len, errsts, target_dev_id;
 	unsigned char * ap;
 	unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 
 	if ((errsts = check_readiness(scp, 1, devip)))
 		return errsts;
-	dbd = cmd[1] & 0x8;
+	dbd = !!(cmd[1] & 0x8);
 	pcontrol = (cmd[2] & 0xc0) >> 6;
 	pcode = cmd[2] & 0x3f;
 	subpcode = cmd[3];
 	msense_6 = (MODE_SENSE == cmd[0]);
+	llbaa = msense_6 ? 0 : !!(cmd[1] & 0x10);
+	if ((0 == scsi_debug_ptype) && (0 == dbd))
+		bd_len = llbaa ? 16 : 8;
+	else
+		bd_len = 0;
 	alloc_len = msense_6 ? cmd[4] : ((cmd[7] << 8) | cmd[8]);
 	memset(arr, 0, SDEBUG_MAX_MSENSE_SZ);
 	if (0x3 == pcontrol) {  /* Saving values not supported */
@@ -1349,15 +1352,58 @@
 	}
 	target_dev_id = ((devip->sdbg_host->shost->host_no + 1) * 2000) +
 			(devip->target * 1000) - 3;
-	dev_spec = DEV_READONLY(target) ? 0x80 : 0x0;
+	/* set DPOFUA bit for disks */
+	if (0 == scsi_debug_ptype)
+		dev_spec = (DEV_READONLY(target) ? 0x80 : 0x0) | 0x10;
+	else
+		dev_spec = 0x0;
 	if (msense_6) {
 		arr[2] = dev_spec;
+		arr[3] = bd_len;
 		offset = 4;
 	} else {
 		arr[3] = dev_spec;
+		if (16 == bd_len)
+			arr[4] = 0x1;	/* set LONGLBA bit */
+		arr[7] = bd_len;	/* assume 255 or less */
 		offset = 8;
 	}
 	ap = arr + offset;
+	if ((bd_len > 0) && (0 == sdebug_capacity)) {
+		if (scsi_debug_virtual_gb > 0) {
+			sdebug_capacity = 2048 * 1024;
+			sdebug_capacity *= scsi_debug_virtual_gb;
+		} else
+			sdebug_capacity = sdebug_store_sectors;
+	}
+	if (8 == bd_len) {
+		if (sdebug_capacity > 0xfffffffe) {
+			ap[0] = 0xff;
+			ap[1] = 0xff;
+			ap[2] = 0xff;
+			ap[3] = 0xff;
+		} else {
+			ap[0] = (sdebug_capacity >> 24) & 0xff;
+			ap[1] = (sdebug_capacity >> 16) & 0xff;
+			ap[2] = (sdebug_capacity >> 8) & 0xff;
+			ap[3] = sdebug_capacity & 0xff;
+		}
+        	ap[6] = (SECT_SIZE_PER(target) >> 8) & 0xff;
+        	ap[7] = SECT_SIZE_PER(target) & 0xff;
+		offset += bd_len;
+		ap = arr + offset;
+	} else if (16 == bd_len) {
+		unsigned long long capac = sdebug_capacity;
+
+        	for (k = 0; k < 8; ++k, capac >>= 8)
+                	ap[7 - k] = capac & 0xff;
+        	ap[12] = (SECT_SIZE_PER(target) >> 24) & 0xff;
+        	ap[13] = (SECT_SIZE_PER(target) >> 16) & 0xff;
+        	ap[14] = (SECT_SIZE_PER(target) >> 8) & 0xff;
+        	ap[15] = SECT_SIZE_PER(target) & 0xff;
+		offset += bd_len;
+		ap = arr + offset;
+	}
 
 	if ((subpcode > 0x0) && (subpcode < 0xff) && (0x19 != pcode)) {
 		/* TODO: Control Extension page */
@@ -1471,7 +1517,7 @@
                        " IO sent=%d bytes\n", param_len, res);
 	md_len = mselect6 ? (arr[0] + 1) : ((arr[0] << 8) + arr[1] + 2);
 	bd_len = mselect6 ? arr[3] : ((arr[6] << 8) + arr[7]);
-	if ((md_len > 2) || (0 != bd_len)) {
+	if (md_len > 2) {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST,
 				INVALID_FIELD_IN_PARAM_LIST, 0);
 		return check_condition_result;
@@ -1544,7 +1590,7 @@
 static int resp_log_sense(struct scsi_cmnd * scp,
                           struct sdebug_dev_info * devip)
 {
-	int ppc, sp, pcontrol, pcode, alloc_len, errsts, len, n;
+	int ppc, sp, pcontrol, pcode, subpcode, alloc_len, errsts, len, n;
 	unsigned char arr[SDEBUG_MAX_LSENSE_SZ];
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 
@@ -1560,23 +1606,63 @@
 	}
 	pcontrol = (cmd[2] & 0xc0) >> 6;
 	pcode = cmd[2] & 0x3f;
+	subpcode = cmd[3] & 0xff;
 	alloc_len = (cmd[7] << 8) + cmd[8];
 	arr[0] = pcode;
-	switch (pcode) {
-	case 0x0:	/* Supported log pages log page */
-		n = 4;
-		arr[n++] = 0x0;		/* this page */
-		arr[n++] = 0xd;		/* Temperature */
-		arr[n++] = 0x2f;	/* Informational exceptions */
-		arr[3] = n - 4;
-		break;
-	case 0xd:	/* Temperature log page */
-		arr[3] = resp_temp_l_pg(arr + 4);
-		break;
-	case 0x2f:	/* Informational exceptions log page */
-		arr[3] = resp_ie_l_pg(arr + 4);
-		break;
-	default:
+	if (0 == subpcode) {
+		switch (pcode) {
+		case 0x0:	/* Supported log pages log page */
+			n = 4;
+			arr[n++] = 0x0;		/* this page */
+			arr[n++] = 0xd;		/* Temperature */
+			arr[n++] = 0x2f;	/* Informational exceptions */
+			arr[3] = n - 4;
+			break;
+		case 0xd:	/* Temperature log page */
+			arr[3] = resp_temp_l_pg(arr + 4);
+			break;
+		case 0x2f:	/* Informational exceptions log page */
+			arr[3] = resp_ie_l_pg(arr + 4);
+			break;
+		default:
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_FIELD_IN_CDB, 0);
+			return check_condition_result;
+		}
+	} else if (0xff == subpcode) {
+		arr[0] |= 0x40;
+		arr[1] = subpcode;
+		switch (pcode) {
+		case 0x0:	/* Supported log pages and subpages log page */
+			n = 4;
+			arr[n++] = 0x0;
+			arr[n++] = 0x0;		/* 0,0 page */
+			arr[n++] = 0x0;
+			arr[n++] = 0xff;	/* this page */
+			arr[n++] = 0xd;
+			arr[n++] = 0x0;		/* Temperature */
+			arr[n++] = 0x2f;
+			arr[n++] = 0x0;	/* Informational exceptions */
+			arr[3] = n - 4;
+			break;
+		case 0xd:	/* Temperature subpages */
+			n = 4;
+			arr[n++] = 0xd;
+			arr[n++] = 0x0;		/* Temperature */
+			arr[3] = n - 4;
+			break;
+		case 0x2f:	/* Informational exceptions subpages */
+			n = 4;
+			arr[n++] = 0x2f;
+			arr[n++] = 0x0;		/* Informational exceptions */
+			arr[3] = n - 4;
+			break;
+		default:
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_FIELD_IN_CDB, 0);
+			return check_condition_result;
+		}
+	} else {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST,
 				INVALID_FIELD_IN_CDB, 0);
 		return check_condition_result;
@@ -2151,11 +2237,18 @@
 	}
 }
 
+/* Note: The following macros create attribute files in the
+   /sys/module/scsi_debug/parameters directory. Unfortunately this
+   driver is unaware of a change and cannot trigger auxiliary actions
+   as it can when the corresponding attribute in the
+   /sys/bus/pseudo/drivers/scsi_debug directory is changed.
+ */
 module_param_named(add_host, scsi_debug_add_host, int, S_IRUGO | S_IWUSR);
 module_param_named(delay, scsi_debug_delay, int, S_IRUGO | S_IWUSR);
 module_param_named(dev_size_mb, scsi_debug_dev_size_mb, int, S_IRUGO);
 module_param_named(dsense, scsi_debug_dsense, int, S_IRUGO | S_IWUSR);
 module_param_named(every_nth, scsi_debug_every_nth, int, S_IRUGO | S_IWUSR);
+module_param_named(fake_rw, scsi_debug_fake_rw, int, S_IRUGO | S_IWUSR);
 module_param_named(max_luns, scsi_debug_max_luns, int, S_IRUGO | S_IWUSR);
 module_param_named(no_lun_0, scsi_debug_no_lun_0, int, S_IRUGO | S_IWUSR);
 module_param_named(num_parts, scsi_debug_num_parts, int, S_IRUGO);
@@ -2164,6 +2257,8 @@
 module_param_named(ptype, scsi_debug_ptype, int, S_IRUGO | S_IWUSR);
 module_param_named(scsi_level, scsi_debug_scsi_level, int, S_IRUGO);
 module_param_named(virtual_gb, scsi_debug_virtual_gb, int, S_IRUGO | S_IWUSR);
+module_param_named(vpd_use_hostno, scsi_debug_vpd_use_hostno, int,
+		   S_IRUGO | S_IWUSR);
 
 MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert");
 MODULE_DESCRIPTION("SCSI debug adapter driver");
@@ -2175,6 +2270,7 @@
 MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)");
 MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)");
 MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)");
+MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)");
 MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
 MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)");
 MODULE_PARM_DESC(num_parts, "number of partitions(def=0)");
@@ -2183,6 +2279,7 @@
 MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
 MODULE_PARM_DESC(scsi_level, "SCSI level to simulate(def=5[SPC-3])");
 MODULE_PARM_DESC(virtual_gb, "virtual gigabyte size (def=0 -> use dev_size_mb)");
+MODULE_PARM_DESC(vpd_use_hostno, "0 -> dev ids ignore hostno (def=1 -> unique dev ids)");
 
 
 static char sdebug_info[256];
@@ -2334,6 +2431,24 @@
 DRIVER_ATTR(dsense, S_IRUGO | S_IWUSR, sdebug_dsense_show,
 	    sdebug_dsense_store);
 
+static ssize_t sdebug_fake_rw_show(struct device_driver * ddp, char * buf)
+{
+        return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_fake_rw);
+}
+static ssize_t sdebug_fake_rw_store(struct device_driver * ddp,
+				    const char * buf, size_t count)
+{
+        int n;
+
+	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+		scsi_debug_fake_rw = n;
+		return count;
+	}
+	return -EINVAL;
+}
+DRIVER_ATTR(fake_rw, S_IRUGO | S_IWUSR, sdebug_fake_rw_show,
+	    sdebug_fake_rw_store);
+
 static ssize_t sdebug_no_lun_0_show(struct device_driver * ddp, char * buf)
 {
         return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_no_lun_0);
@@ -2487,6 +2602,31 @@
 DRIVER_ATTR(add_host, S_IRUGO | S_IWUSR, sdebug_add_host_show, 
 	    sdebug_add_host_store);
 
+static ssize_t sdebug_vpd_use_hostno_show(struct device_driver * ddp,
+					  char * buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_vpd_use_hostno);
+}
+static ssize_t sdebug_vpd_use_hostno_store(struct device_driver * ddp,
+					   const char * buf, size_t count)
+{
+	int n;
+
+	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+		scsi_debug_vpd_use_hostno = n;
+		return count;
+	}
+	return -EINVAL;
+}
+DRIVER_ATTR(vpd_use_hostno, S_IRUGO | S_IWUSR, sdebug_vpd_use_hostno_show,
+	    sdebug_vpd_use_hostno_store);
+
+/* Note: The following function creates attribute files in the
+   /sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these
+   files (over those found in the /sys/module/scsi_debug/parameters
+   directory) is that auxiliary actions can be triggered when an attribute
+   is changed. For example see: sdebug_add_host_store() above.
+ */
 static int do_create_driverfs_files(void)
 {
 	int ret;
@@ -2496,23 +2636,31 @@
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_dev_size_mb);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_dsense);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_every_nth);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_fake_rw);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_max_luns);
-	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_tgts);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_no_lun_0);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_parts);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_num_tgts);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_ptype);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_opts);
 	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_scsi_level);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_virtual_gb);
+	ret |= driver_create_file(&sdebug_driverfs_driver, &driver_attr_vpd_use_hostno);
 	return ret;
 }
 
 static void do_remove_driverfs_files(void)
 {
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_vpd_use_hostno);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_virtual_gb);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_scsi_level);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_opts);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_ptype);
-	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_parts);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_tgts);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_num_parts);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_no_lun_0);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_max_luns);
+	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_fake_rw);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_every_nth);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_dsense);
 	driver_remove_file(&sdebug_driverfs_driver, &driver_attr_dev_size_mb);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 077c1c6..d6743b9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -551,7 +551,15 @@
 		list_del_init(&sdev->starved_entry);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 
-		blk_run_queue(sdev->request_queue);
+
+		if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
+		    !test_and_set_bit(QUEUE_FLAG_REENTER,
+				      &sdev->request_queue->queue_flags)) {
+			blk_run_queue(sdev->request_queue);
+			clear_bit(QUEUE_FLAG_REENTER,
+				  &sdev->request_queue->queue_flags);
+		} else
+			blk_run_queue(sdev->request_queue);
 
 		spin_lock_irqsave(shost->host_lock, flags);
 		if (unlikely(!list_empty(&sdev->starved_entry)))
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
new file mode 100644
index 0000000..1b59b27
--- /dev/null
+++ b/drivers/scsi/scsi_netlink.c
@@ -0,0 +1,199 @@
+/*
+ *  scsi_netlink.c  - SCSI Transport Netlink Interface
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/security.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+
+#include <scsi/scsi_netlink.h>
+#include "scsi_priv.h"
+
+struct sock *scsi_nl_sock = NULL;
+EXPORT_SYMBOL_GPL(scsi_nl_sock);
+
+
+/**
+ * scsi_nl_rcv_msg -
+ *    Receive message handler. Extracts message from a receive buffer.
+ *    Validates message header and calls appropriate transport message handler
+ *
+ * @skb:		socket receive buffer
+ *
+ **/
+static void
+scsi_nl_rcv_msg(struct sk_buff *skb)
+{
+	struct nlmsghdr *nlh;
+	struct scsi_nl_hdr *hdr;
+	uint32_t rlen;
+	int err;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		err = 0;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) ||
+		    (skb->len < nlh->nlmsg_len)) {
+			printk(KERN_WARNING "%s: discarding partial skb\n",
+				 __FUNCTION__);
+			return;
+		}
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+
+		if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) {
+			err = -EBADMSG;
+			goto next_msg;
+		}
+
+		hdr = NLMSG_DATA(nlh);
+		if ((hdr->version != SCSI_NL_VERSION) ||
+		    (hdr->magic != SCSI_NL_MAGIC)) {
+			err = -EPROTOTYPE;
+			goto next_msg;
+		}
+
+		if (security_netlink_recv(skb, CAP_SYS_ADMIN)) {
+			err = -EPERM;
+			goto next_msg;
+		}
+
+		if (nlh->nlmsg_len < (sizeof(*nlh) + hdr->msglen)) {
+			printk(KERN_WARNING "%s: discarding partial message\n",
+				 __FUNCTION__);
+			return;
+		}
+
+		/*
+		 * We currently don't support anyone sending us a message
+		 */
+
+next_msg:
+		if ((err) || (nlh->nlmsg_flags & NLM_F_ACK))
+			netlink_ack(skb, nlh, err);
+
+		skb_pull(skb, rlen);
+	}
+}
+
+
+/**
+ * scsi_nl_rcv_msg -
+ *    Receive handler for a socket. Extracts a received message buffer from
+ *    the socket, and starts message processing.
+ *
+ * @sk:		socket
+ * @len:	unused
+ *
+ **/
+static void
+scsi_nl_rcv(struct sock *sk, int len)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
+		scsi_nl_rcv_msg(skb);
+		kfree_skb(skb);
+	}
+}
+
+
+/**
+ * scsi_nl_rcv_event -
+ *    Event handler for a netlink socket.
+ *
+ * @this:		event notifier block
+ * @event:		event type
+ * @ptr:		event payload
+ *
+ **/
+static int
+scsi_nl_rcv_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+
+	if (n->protocol != NETLINK_SCSITRANSPORT)
+		return NOTIFY_DONE;
+
+	/*
+	 * Currently, we are not tracking PID's, etc. There is nothing
+	 * to handle.
+	 */
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block scsi_netlink_notifier = {
+	.notifier_call  = scsi_nl_rcv_event,
+};
+
+
+/**
+ * scsi_netlink_init -
+ *    Called by SCSI subsystem to intialize the SCSI transport netlink
+ *    interface
+ *
+ **/
+void
+scsi_netlink_init(void)
+{
+	int error;
+
+	error = netlink_register_notifier(&scsi_netlink_notifier);
+	if (error) {
+		printk(KERN_ERR "%s: register of event handler failed - %d\n",
+				__FUNCTION__, error);
+		return;
+	}
+
+	scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
+				SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE);
+	if (!scsi_nl_sock) {
+		printk(KERN_ERR "%s: register of recieve handler failed\n",
+				__FUNCTION__);
+		netlink_unregister_notifier(&scsi_netlink_notifier);
+	}
+
+	return;
+}
+
+
+/**
+ * scsi_netlink_exit -
+ *    Called by SCSI subsystem to disable the SCSI transport netlink
+ *    interface
+ *
+ **/
+void
+scsi_netlink_exit(void)
+{
+	if (scsi_nl_sock) {
+		sock_release(scsi_nl_sock->sk_socket);
+		netlink_unregister_notifier(&scsi_netlink_notifier);
+	}
+
+	return;
+}
+
+
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index ae24c85..5d023d4 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -8,6 +8,7 @@
 struct scsi_device;
 struct scsi_host_template;
 struct Scsi_Host;
+struct scsi_nl_hdr;
 
 
 /*
@@ -110,6 +111,16 @@
 
 extern struct bus_type scsi_bus_type;
 
+/* scsi_netlink.c */
+#ifdef CONFIG_SCSI_NETLINK
+extern void scsi_netlink_init(void);
+extern void scsi_netlink_exit(void);
+extern struct sock *scsi_nl_sock;
+#else
+static inline void scsi_netlink_init(void) {}
+static inline void scsi_netlink_exit(void) {}
+#endif
+
 /* 
  * internal scsi timeout functions: for use by mid-layer and transport
  * classes.
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 55200e4..524a5f7 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -178,9 +178,7 @@
 
 	seq_printf(s, "\n");
 
-	seq_printf(s, "  Type:   %s ",
-		     sdev->type < MAX_SCSI_DEVICE_CODE ?
-	       scsi_device_types[(int) sdev->type] : "Unknown          ");
+	seq_printf(s, "  Type:   %s ", scsi_device_type(sdev->type));
 	seq_printf(s, "               ANSI"
 		     " SCSI revision: %02x", (sdev->scsi_level - 1) ?
 		     sdev->scsi_level - 1 : 1);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 1bd92b9..fd9e281 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -134,59 +134,6 @@
 }
 
 /**
- * print_inquiry - printk the inquiry information
- * @inq_result:	printk this SCSI INQUIRY
- *
- * Description:
- *     printk the vendor, model, and other information found in the
- *     INQUIRY data in @inq_result.
- *
- * Notes:
- *     Remove this, and replace with a hotplug event that logs any
- *     relevant information.
- **/
-static void print_inquiry(unsigned char *inq_result)
-{
-	int i;
-
-	printk(KERN_NOTICE "  Vendor: ");
-	for (i = 8; i < 16; i++)
-		if (inq_result[i] >= 0x20 && i < inq_result[4] + 5)
-			printk("%c", inq_result[i]);
-		else
-			printk(" ");
-
-	printk("  Model: ");
-	for (i = 16; i < 32; i++)
-		if (inq_result[i] >= 0x20 && i < inq_result[4] + 5)
-			printk("%c", inq_result[i]);
-		else
-			printk(" ");
-
-	printk("  Rev: ");
-	for (i = 32; i < 36; i++)
-		if (inq_result[i] >= 0x20 && i < inq_result[4] + 5)
-			printk("%c", inq_result[i]);
-		else
-			printk(" ");
-
-	printk("\n");
-
-	i = inq_result[0] & 0x1f;
-
-	printk(KERN_NOTICE "  Type:   %s ",
-	       i <
-	       MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] :
-	       "Unknown          ");
-	printk("                 ANSI SCSI revision: %02x",
-	       inq_result[2] & 0x07);
-	if ((inq_result[2] & 0x07) == 1 && (inq_result[3] & 0x0f) == 1)
-		printk(" CCS\n");
-	else
-		printk("\n");
-}
-
-/**
  * scsi_alloc_sdev - allocate and setup a scsi_Device
  *
  * Description:
@@ -319,6 +266,18 @@
 	return found_starget;
 }
 
+/**
+ * scsi_alloc_target - allocate a new or find an existing target
+ * @parent:	parent of the target (need not be a scsi host)
+ * @channel:	target channel number (zero if no channels)
+ * @id:		target id number
+ *
+ * Return an existing target if one exists, provided it hasn't already
+ * gone into STARGET_DEL state, otherwise allocate a new target.
+ *
+ * The target is returned with an incremented reference, so the caller
+ * is responsible for both reaping and doing a last put
+ */
 static struct scsi_target *scsi_alloc_target(struct device *parent,
 					     int channel, uint id)
 {
@@ -384,14 +343,15 @@
 			return NULL;
 		}
 	}
+	get_device(dev);
 
 	return starget;
 
  found:
 	found_target->reap_ref++;
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	put_device(parent);
 	if (found_target->state != STARGET_DEL) {
+		put_device(parent);
 		kfree(starget);
 		return found_target;
 	}
@@ -450,6 +410,32 @@
 }
 
 /**
+ * sanitize_inquiry_string - remove non-graphical chars from an INQUIRY result string
+ * @s: INQUIRY result string to sanitize
+ * @len: length of the string
+ *
+ * Description:
+ *	The SCSI spec says that INQUIRY vendor, product, and revision
+ *	strings must consist entirely of graphic ASCII characters,
+ *	padded on the right with spaces.  Since not all devices obey
+ *	this rule, we will replace non-graphic or non-ASCII characters
+ *	with spaces.  Exception: a NUL character is interpreted as a
+ *	string terminator, so all the following characters are set to
+ *	spaces.
+ **/
+static void sanitize_inquiry_string(unsigned char *s, int len)
+{
+	int terminated = 0;
+
+	for (; len > 0; (--len, ++s)) {
+		if (*s == 0)
+			terminated = 1;
+		if (terminated || *s < 0x20 || *s > 0x7e)
+			*s = ' ';
+	}
+}
+
+/**
  * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY
  * @sdev:	scsi_device to probe
  * @inq_result:	area to store the INQUIRY result
@@ -463,7 +449,7 @@
  *     INQUIRY data is in @inq_result; the scsi_level and INQUIRY length
  *     are copied to the scsi_device any flags value is stored in *@bflags.
  **/
-static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result,
+static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
 			  int result_len, int *bflags)
 {
 	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -522,7 +508,11 @@
 	}
 
 	if (result == 0) {
-		response_len = (unsigned char) inq_result[4] + 5;
+		sanitize_inquiry_string(&inq_result[8], 8);
+		sanitize_inquiry_string(&inq_result[16], 16);
+		sanitize_inquiry_string(&inq_result[32], 4);
+
+		response_len = inq_result[4] + 5;
 		if (response_len > 255)
 			response_len = first_inquiry_len;	/* sanity */
 
@@ -628,7 +618,8 @@
  *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
  *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
-static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
+static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
+		int *bflags)
 {
 	/*
 	 * XXX do not save the inquiry, since it can change underneath us,
@@ -653,9 +644,8 @@
 	if (*bflags & BLIST_ISROM) {
 		/*
 		 * It would be better to modify sdev->type, and set
-		 * sdev->removable, but then the print_inquiry() output
-		 * would not show TYPE_ROM; if print_inquiry() is removed
-		 * the issue goes away.
+		 * sdev->removable; this can now be done since
+		 * print_inquiry has gone away.
 		 */
 		inq_result[0] = TYPE_ROM;
 		inq_result[1] |= 0x80;	/* removable */
@@ -684,8 +674,6 @@
 		printk(KERN_INFO "scsi: unknown device type %d\n", sdev->type);
 	}
 
-	print_inquiry(inq_result);
-
 	/*
 	 * For a peripheral qualifier (PQ) value of 1 (001b), the SCSI
 	 * spec says: The device server is capable of supporting the
@@ -715,6 +703,12 @@
 	if (inq_result[7] & 0x10)
 		sdev->sdtr = 1;
 
+	sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d "
+			"ANSI: %d%s\n", scsi_device_type(sdev->type),
+			sdev->vendor, sdev->model, sdev->rev,
+			sdev->inq_periph_qual, inq_result[2] & 0x07,
+			(inq_result[3] & 0x0f) == 1 ? " CCS" : "");
+
 	/*
 	 * End sysfs code.
 	 */
@@ -943,11 +937,26 @@
 	}
 
 	/*
-	 * Non-standard SCSI targets may set the PDT to 0x1f (unknown or
-	 * no device type) instead of using the Peripheral Qualifier to
-	 * indicate that no LUN is present.  For example, USB UFI does this.
+	 * Some targets may set slight variations of PQ and PDT to signal
+	 * that no LUN is present, so don't add sdev in these cases.
+	 * Two specific examples are:
+	 * 1) NetApp targets: return PQ=1, PDT=0x1f
+	 * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved"
+	 *    in the UFI 1.0 spec (we cannot rely on reserved bits).
+	 *
+	 * References:
+	 * 1) SCSI SPC-3, pp. 145-146
+	 * PQ=1: "A peripheral device having the specified peripheral
+	 * device type is not connected to this logical unit. However, the
+	 * device server is capable of supporting the specified peripheral
+	 * device type on this logical unit."
+	 * PDT=0x1f: "Unknown or no device type"
+	 * 2) USB UFI 1.0, p. 20
+	 * PDT=00h Direct-access device (floppy)
+	 * PDT=1Fh none (no FDD connected to the requested logical unit)
 	 */
-	if (starget->pdt_1f_for_no_lun && (result[0] & 0x1f) == 0x1f) {
+	if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) &&
+	     (result[0] & 0x1f) == 0x1f) {
 		SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
 					"scsi scan: peripheral device type"
 					" of 31, no device added\n"));
@@ -1345,7 +1354,6 @@
 	if (!starget)
 		return ERR_PTR(-ENOMEM);
 
-	get_device(&starget->dev);
 	mutex_lock(&shost->scan_mutex);
 	if (scsi_host_scan_allowed(shost))
 		scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata);
@@ -1404,7 +1412,6 @@
 	if (!starget)
 		return;
 
-	get_device(&starget->dev);
 	if (lun != SCAN_WILD_CARD) {
 		/*
 		 * Scan for a specific host/chan/id/lun.
@@ -1586,7 +1593,8 @@
 	if (sdev) {
 		sdev->sdev_gendev.parent = get_device(&starget->dev);
 		sdev->borken = 0;
-	}
+	} else
+		scsi_target_reap(starget);
 	put_device(&starget->dev);
  out:
 	mutex_unlock(&shost->scan_mutex);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index b03aa85..38c215a 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -32,6 +32,9 @@
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/scsi_cmnd.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <scsi/scsi_netlink_fc.h>
 #include "scsi_priv.h"
 
 static int fc_queue_work(struct Scsi_Host *, struct work_struct *);
@@ -93,6 +96,29 @@
 #define FC_PORTTYPE_MAX_NAMELEN		50
 
 
+/* Convert fc_host_event_code values to ascii string name */
+static const struct {
+	enum fc_host_event_code		value;
+	char				*name;
+} fc_host_event_code_names[] = {
+	{ FCH_EVT_LIP,			"lip" },
+	{ FCH_EVT_LINKUP,		"link_up" },
+	{ FCH_EVT_LINKDOWN,		"link_down" },
+	{ FCH_EVT_LIPRESET,		"lip_reset" },
+	{ FCH_EVT_RSCN,			"rscn" },
+	{ FCH_EVT_ADAPTER_CHANGE,	"adapter_chg" },
+	{ FCH_EVT_PORT_UNKNOWN,		"port_unknown" },
+	{ FCH_EVT_PORT_ONLINE,		"port_online" },
+	{ FCH_EVT_PORT_OFFLINE,		"port_offline" },
+	{ FCH_EVT_PORT_FABRIC,		"port_fabric" },
+	{ FCH_EVT_LINK_UNKNOWN,		"link_unknown" },
+	{ FCH_EVT_VENDOR_UNIQUE,	"vendor_unique" },
+};
+fc_enum_name_search(host_event_code, fc_host_event_code,
+		fc_host_event_code_names)
+#define FC_HOST_EVENT_CODE_MAX_NAMELEN	30
+
+
 /* Convert fc_port_state values to ascii string name */
 static struct {
 	enum fc_port_state	value;
@@ -216,6 +242,7 @@
 
 
 static void fc_timeout_deleted_rport(void *data);
+static void fc_timeout_fail_rport_io(void *data);
 static void fc_scsi_scan_rport(void *data);
 
 /*
@@ -223,7 +250,7 @@
  * Increase these values if you add attributes
  */
 #define FC_STARGET_NUM_ATTRS 	3
-#define FC_RPORT_NUM_ATTRS	9
+#define FC_RPORT_NUM_ATTRS	10
 #define FC_HOST_NUM_ATTRS	17
 
 struct fc_internal {
@@ -301,8 +328,6 @@
 	fc_host->supported_classes = FC_COS_UNSPECIFIED;
 	memset(fc_host->supported_fc4s, 0,
 		sizeof(fc_host->supported_fc4s));
-	memset(fc_host->symbolic_name, 0,
-		sizeof(fc_host->symbolic_name));
 	fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN;
 	fc_host->maxframe_size = -1;
 	memset(fc_host->serial_number, 0,
@@ -315,6 +340,8 @@
 		sizeof(fc_host->active_fc4s));
 	fc_host->speed = FC_PORTSPEED_UNKNOWN;
 	fc_host->fabric_name = -1;
+	memset(fc_host->symbolic_name, 0, sizeof(fc_host->symbolic_name));
+	memset(fc_host->system_hostname, 0, sizeof(fc_host->system_hostname));
 
 	fc_host->tgtid_bind_type = FC_TGTID_BIND_BY_WWPN;
 
@@ -377,10 +404,184 @@
 		 " exceeded, the scsi target is removed. Value should be"
 		 " between 1 and SCSI_DEVICE_BLOCK_MAX_TIMEOUT.");
 
+/**
+ * Netlink Infrastructure
+ **/
+
+static atomic_t fc_event_seq;
+
+/**
+ * fc_get_event_number - Obtain the next sequential FC event number
+ *
+ * Notes:
+ *   We could have inline'd this, but it would have required fc_event_seq to
+ *   be exposed. For now, live with the subroutine call.
+ *   Atomic used to avoid lock/unlock...
+ **/
+u32
+fc_get_event_number(void)
+{
+	return atomic_add_return(1, &fc_event_seq);
+}
+EXPORT_SYMBOL(fc_get_event_number);
+
+
+/**
+ * fc_host_post_event - called to post an even on an fc_host.
+ *
+ * @shost:		host the event occurred on
+ * @event_number:	fc event number obtained from get_fc_event_number()
+ * @event_code:		fc_host event being posted
+ * @event_data:		32bits of data for the event being posted
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ **/
+void
+fc_host_post_event(struct Scsi_Host *shost, u32 event_number,
+		enum fc_host_event_code event_code, u32 event_data)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	struct fc_nl_event *event;
+	const char *name;
+	u32 len, skblen;
+	int err;
+
+	if (!scsi_nl_sock) {
+		err = -ENOENT;
+		goto send_fail;
+	}
+
+	len = FC_NL_MSGALIGN(sizeof(*event));
+	skblen = NLMSG_SPACE(len);
+
+	skb = alloc_skb(skblen, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto send_fail;
+	}
+
+	nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
+				skblen - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		goto send_fail_skb;
+	}
+	event = NLMSG_DATA(nlh);
+
+	INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC,
+				FC_NL_ASYNC_EVENT, len);
+	event->seconds = get_seconds();
+	event->vendor_id = 0;
+	event->host_no = shost->host_no;
+	event->event_datalen = sizeof(u32);	/* bytes */
+	event->event_num = event_number;
+	event->event_code = event_code;
+	event->event_data = event_data;
+
+	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS,
+			      GFP_KERNEL);
+	if (err && (err != -ESRCH))	/* filter no recipient errors */
+		/* nlmsg_multicast already kfree_skb'd */
+		goto send_fail;
+
+	return;
+
+send_fail_skb:
+	kfree_skb(skb);
+send_fail:
+	name = get_fc_host_event_code_name(event_code);
+	printk(KERN_WARNING
+		"%s: Dropped Event : host %d %s data 0x%08x - err %d\n",
+		__FUNCTION__, shost->host_no,
+		(name) ? name : "<unknown>", event_data, err);
+	return;
+}
+EXPORT_SYMBOL(fc_host_post_event);
+
+
+/**
+ * fc_host_post_vendor_event - called to post a vendor unique event on
+ *                             a fc_host
+ *
+ * @shost:		host the event occurred on
+ * @event_number:	fc event number obtained from get_fc_event_number()
+ * @data_len:		amount, in bytes, of vendor unique data
+ * @data_buf:		pointer to vendor unique data
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ **/
+void
+fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
+		u32 data_len, char * data_buf, u64 vendor_id)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	struct fc_nl_event *event;
+	u32 len, skblen;
+	int err;
+
+	if (!scsi_nl_sock) {
+		err = -ENOENT;
+		goto send_vendor_fail;
+	}
+
+	len = FC_NL_MSGALIGN(sizeof(*event) + data_len);
+	skblen = NLMSG_SPACE(len);
+
+	skb = alloc_skb(skblen, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto send_vendor_fail;
+	}
+
+	nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
+				skblen - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		goto send_vendor_fail_skb;
+	}
+	event = NLMSG_DATA(nlh);
+
+	INIT_SCSI_NL_HDR(&event->snlh, SCSI_NL_TRANSPORT_FC,
+				FC_NL_ASYNC_EVENT, len);
+	event->seconds = get_seconds();
+	event->vendor_id = vendor_id;
+	event->host_no = shost->host_no;
+	event->event_datalen = data_len;	/* bytes */
+	event->event_num = event_number;
+	event->event_code = FCH_EVT_VENDOR_UNIQUE;
+	memcpy(&event->event_data, data_buf, data_len);
+
+	err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS,
+			      GFP_KERNEL);
+	if (err && (err != -ESRCH))	/* filter no recipient errors */
+		/* nlmsg_multicast already kfree_skb'd */
+		goto send_vendor_fail;
+
+	return;
+
+send_vendor_fail_skb:
+	kfree_skb(skb);
+send_vendor_fail:
+	printk(KERN_WARNING
+		"%s: Dropped Event : host %d vendor_unique - err %d\n",
+		__FUNCTION__, shost->host_no, err);
+	return;
+}
+EXPORT_SYMBOL(fc_host_post_vendor_event);
+
+
 
 static __init int fc_transport_init(void)
 {
-	int error = transport_class_register(&fc_host_class);
+	int error;
+
+	atomic_set(&fc_event_seq, 0);
+
+	error = transport_class_register(&fc_host_class);
 	if (error)
 		return error;
 	error = transport_class_register(&fc_rport_class);
@@ -424,11 +625,14 @@
 	struct fc_rport *rport = transport_class_to_rport(cdev);	\
 	struct Scsi_Host *shost = rport_to_shost(rport);		\
 	struct fc_internal *i = to_fc_internal(shost->transportt);	\
+	char *cp;							\
 	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||		\
 	    (rport->port_state == FC_PORTSTATE_DELETED) ||		\
 	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))		\
 		return -EBUSY;						\
-	val = simple_strtoul(buf, NULL, 0);				\
+	val = simple_strtoul(buf, &cp, 0);				\
+	if (*cp && (*cp != '\n'))					\
+		return -EINVAL;						\
 	i->f->set_rport_##field(rport, val);				\
 	return count;							\
 }
@@ -510,6 +714,13 @@
 	if (i->f->show_rport_##field)					\
 		count++
 
+#define SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(field)				\
+{									\
+	i->private_rport_attrs[count] = class_device_attr_rport_##field; \
+	i->rport_attrs[count] = &i->private_rport_attrs[count];		\
+	count++;							\
+}
+
 
 /* The FC Transport Remote Port Attributes: */
 
@@ -542,12 +753,14 @@
 	struct fc_rport *rport = transport_class_to_rport(cdev);
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
+	char *cp;
 	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||
 	    (rport->port_state == FC_PORTSTATE_DELETED) ||
 	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))
 		return -EBUSY;
-	val = simple_strtoul(buf, NULL, 0);
-	if ((val < 0) || (val > SCSI_DEVICE_BLOCK_MAX_TIMEOUT))
+	val = simple_strtoul(buf, &cp, 0);
+	if ((*cp && (*cp != '\n')) ||
+	    (val < 0) || (val > SCSI_DEVICE_BLOCK_MAX_TIMEOUT))
 		return -EINVAL;
 	i->f->set_rport_dev_loss_tmo(rport, val);
 	return count;
@@ -597,6 +810,44 @@
 fc_private_rport_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN);
 fc_private_rport_rd_attr(scsi_target_id, "%d\n", 20);
 
+/*
+ * fast_io_fail_tmo attribute
+ */
+static ssize_t
+show_fc_rport_fast_io_fail_tmo (struct class_device *cdev, char *buf)
+{
+	struct fc_rport *rport = transport_class_to_rport(cdev);
+
+	if (rport->fast_io_fail_tmo == -1)
+		return snprintf(buf, 5, "off\n");
+	return snprintf(buf, 20, "%d\n", rport->fast_io_fail_tmo);
+}
+
+static ssize_t
+store_fc_rport_fast_io_fail_tmo(struct class_device *cdev, const char *buf,
+			   size_t count)
+{
+	int val;
+	char *cp;
+	struct fc_rport *rport = transport_class_to_rport(cdev);
+
+	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||
+	    (rport->port_state == FC_PORTSTATE_DELETED) ||
+	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))
+		return -EBUSY;
+	if (strncmp(buf, "off", 3) == 0)
+		rport->fast_io_fail_tmo = -1;
+	else {
+		val = simple_strtoul(buf, &cp, 0);
+		if ((*cp && (*cp != '\n')) ||
+		    (val < 0) || (val >= rport->dev_loss_tmo))
+			return -EINVAL;
+		rport->fast_io_fail_tmo = val;
+	}
+	return count;
+}
+static FC_CLASS_DEVICE_ATTR(rport, fast_io_fail_tmo, S_IRUGO | S_IWUSR,
+	show_fc_rport_fast_io_fail_tmo, store_fc_rport_fast_io_fail_tmo);
 
 
 /*
@@ -682,12 +933,34 @@
 	int val;							\
 	struct Scsi_Host *shost = transport_class_to_shost(cdev);	\
 	struct fc_internal *i = to_fc_internal(shost->transportt);	\
+	char *cp;							\
 									\
-	val = simple_strtoul(buf, NULL, 0);				\
+	val = simple_strtoul(buf, &cp, 0);				\
+	if (*cp && (*cp != '\n'))					\
+		return -EINVAL;						\
 	i->f->set_host_##field(shost, val);				\
 	return count;							\
 }
 
+#define fc_host_store_str_function(field, slen)				\
+static ssize_t								\
+store_fc_host_##field(struct class_device *cdev, const char *buf,	\
+			   size_t count)				\
+{									\
+	struct Scsi_Host *shost = transport_class_to_shost(cdev);	\
+	struct fc_internal *i = to_fc_internal(shost->transportt);	\
+	unsigned int cnt=count;						\
+									\
+	/* count may include a LF at end of string */			\
+	if (buf[cnt-1] == '\n')						\
+		cnt--;							\
+	if (cnt > ((slen) - 1))						\
+		return -EINVAL;						\
+	memcpy(fc_host_##field(shost), buf, cnt);			\
+	i->f->set_host_##field(shost);					\
+	return count;							\
+}
+
 #define fc_host_rd_attr(field, format_string, sz)			\
 	fc_host_show_function(field, format_string, sz, )		\
 static FC_CLASS_DEVICE_ATTR(host, field, S_IRUGO,			\
@@ -815,7 +1088,6 @@
 fc_private_host_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long);
 fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20,
 			     unsigned long long);
-fc_private_host_rd_attr(symbolic_name, "%s\n", (FC_SYMBOLIC_NAME_SIZE +1));
 fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20);
 fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1));
 
@@ -858,6 +1130,13 @@
 fc_host_rd_enum_attr(port_type, FC_PORTTYPE_MAX_NAMELEN);
 fc_host_rd_enum_attr(port_state, FC_PORTSTATE_MAX_NAMELEN);
 fc_host_rd_attr_cast(fabric_name, "0x%llx\n", 20, unsigned long long);
+fc_host_rd_attr(symbolic_name, "%s\n", FC_SYMBOLIC_NAME_SIZE + 1);
+
+fc_private_host_show_function(system_hostname, "%s\n",
+		FC_SYMBOLIC_NAME_SIZE + 1, )
+fc_host_store_str_function(system_hostname, FC_SYMBOLIC_NAME_SIZE)
+static FC_CLASS_DEVICE_ATTR(host, system_hostname, S_IRUGO | S_IWUSR,
+		show_fc_host_system_hostname, store_fc_host_system_hostname);
 
 
 /* Private Host Attributes */
@@ -1223,7 +1502,6 @@
 	SETUP_HOST_ATTRIBUTE_RD(permanent_port_name);
 	SETUP_HOST_ATTRIBUTE_RD(supported_classes);
 	SETUP_HOST_ATTRIBUTE_RD(supported_fc4s);
-	SETUP_HOST_ATTRIBUTE_RD(symbolic_name);
 	SETUP_HOST_ATTRIBUTE_RD(supported_speeds);
 	SETUP_HOST_ATTRIBUTE_RD(maxframe_size);
 	SETUP_HOST_ATTRIBUTE_RD(serial_number);
@@ -1234,6 +1512,8 @@
 	SETUP_HOST_ATTRIBUTE_RD(active_fc4s);
 	SETUP_HOST_ATTRIBUTE_RD(speed);
 	SETUP_HOST_ATTRIBUTE_RD(fabric_name);
+	SETUP_HOST_ATTRIBUTE_RD(symbolic_name);
+	SETUP_HOST_ATTRIBUTE_RW(system_hostname);
 
 	/* Transport-managed attributes */
 	SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type);
@@ -1257,6 +1537,8 @@
 	SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(roles);
 	SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(port_state);
 	SETUP_PRIVATE_RPORT_ATTRIBUTE_RD(scsi_target_id);
+	if (ft->terminate_rport_io)
+		SETUP_PRIVATE_RPORT_ATTRIBUTE_RW(fast_io_fail_tmo);
 
 	BUG_ON(count > FC_RPORT_NUM_ATTRS);
 
@@ -1328,7 +1610,7 @@
  * @delay:	jiffies to delay the work queuing
  *
  * Return value:
- * 	0 on success / != 0 for error
+ * 	1 on success / 0 already queued / < 0 for error
  **/
 static int
 fc_queue_devloss_work(struct Scsi_Host *shost, struct work_struct *work,
@@ -1343,6 +1625,9 @@
 		return -EINVAL;
 	}
 
+	if (delay == 0)
+		return queue_work(fc_host_devloss_work_q(shost), work);
+
 	return queue_delayed_work(fc_host_devloss_work_q(shost), work, delay);
 }
 
@@ -1435,10 +1720,23 @@
 	struct fc_rport *rport = (struct fc_rport *)data;
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	unsigned long flags;
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+
+	/*
+	 * Involve the LLDD if possible. All io on the rport is to
+	 * be terminated, either as part of the dev_loss_tmo callback
+	 * processing, or via the terminate_rport_io function.
+	 */
+	if (i->f->dev_loss_tmo_callbk)
+		i->f->dev_loss_tmo_callbk(rport);
+	else if (i->f->terminate_rport_io)
+		i->f->terminate_rport_io(rport);
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
+		if (!cancel_delayed_work(&rport->fail_io_work))
+			fc_flush_devloss(shost);
 		if (!cancel_delayed_work(&rport->dev_loss_work))
 			fc_flush_devloss(shost);
 		spin_lock_irqsave(shost->host_lock, flags);
@@ -1461,10 +1759,7 @@
 	struct fc_rport *rport = (struct fc_rport *)data;
 	struct device *dev = &rport->dev;
 	struct Scsi_Host *shost = rport_to_shost(rport);
-
-	/* Delete SCSI target and sdevs */
-	if (rport->scsi_target_id != -1)
-		fc_starget_delete(data);
+	struct fc_internal *i = to_fc_internal(shost->transportt);
 
 	/*
 	 * if a scan is pending, flush the SCSI Host work_q so that 
@@ -1473,6 +1768,14 @@
 	if (rport->flags & FC_RPORT_SCAN_PENDING)
 		scsi_flush_work(shost);
 
+	/* Delete SCSI target and sdevs */
+	if (rport->scsi_target_id != -1)
+		fc_starget_delete(data);
+	else if (i->f->dev_loss_tmo_callbk)
+		i->f->dev_loss_tmo_callbk(rport);
+	else if (i->f->terminate_rport_io)
+		i->f->terminate_rport_io(rport);
+
 	transport_remove_device(dev);
 	device_del(dev);
 	transport_destroy_device(dev);
@@ -1524,8 +1827,10 @@
 	if (fci->f->dd_fcrport_size)
 		rport->dd_data = &rport[1];
 	rport->channel = channel;
+	rport->fast_io_fail_tmo = -1;
 
 	INIT_WORK(&rport->dev_loss_work, fc_timeout_deleted_rport, rport);
+	INIT_WORK(&rport->fail_io_work, fc_timeout_fail_rport_io, rport);
 	INIT_WORK(&rport->scan_work, fc_scsi_scan_rport, rport);
 	INIT_WORK(&rport->stgt_delete_work, fc_starget_delete, rport);
 	INIT_WORK(&rport->rport_delete_work, fc_rport_final_delete, rport);
@@ -1689,11 +1994,13 @@
 				/* restart the target */
 
 				/*
-				 * Stop the target timer first. Take no action
+				 * Stop the target timers first. Take no action
 				 * on the del_timer failure as the state
 				 * machine state change will validate the
 				 * transaction.
 				 */
+				if (!cancel_delayed_work(&rport->fail_io_work))
+					fc_flush_devloss(shost);
 				if (!cancel_delayed_work(work))
 					fc_flush_devloss(shost);
 
@@ -1837,6 +2144,7 @@
 fc_remote_port_delete(struct fc_rport  *rport)
 {
 	struct Scsi_Host *shost = rport_to_shost(rport);
+	struct fc_internal *i = to_fc_internal(shost->transportt);
 	int timeout = rport->dev_loss_tmo;
 	unsigned long flags;
 
@@ -1867,6 +2175,12 @@
 
 	scsi_target_block(&rport->dev);
 
+	/* see if we need to kill io faster than waiting for device loss */
+	if ((rport->fast_io_fail_tmo != -1) &&
+	    (rport->fast_io_fail_tmo < timeout) && (i->f->terminate_rport_io))
+		fc_queue_devloss_work(shost, &rport->fail_io_work,
+					rport->fast_io_fail_tmo * HZ);
+
 	/* cap the length the devices can be blocked until they are deleted */
 	fc_queue_devloss_work(shost, &rport->dev_loss_work, timeout * HZ);
 }
@@ -1926,6 +2240,8 @@
 		 * machine state change will validate the
 		 * transaction.
 		 */
+		if (!cancel_delayed_work(&rport->fail_io_work))
+			fc_flush_devloss(shost);
 		if (!cancel_delayed_work(&rport->dev_loss_work))
 			fc_flush_devloss(shost);
 
@@ -2047,6 +2363,28 @@
 }
 
 /**
+ * fc_timeout_fail_rport_io - Timeout handler for a fast io failing on a
+ *                       disconnected SCSI target.
+ *
+ * @data:	rport to terminate io on.
+ *
+ * Notes: Only requests the failure of the io, not that all are flushed
+ *    prior to returning.
+ **/
+static void
+fc_timeout_fail_rport_io(void  *data)
+{
+	struct fc_rport *rport = (struct fc_rport *)data;
+	struct Scsi_Host *shost = rport_to_shost(rport);
+	struct fc_internal *i = to_fc_internal(shost->transportt);
+
+	if (rport->port_state != FC_PORTSTATE_BLOCKED)
+		return;
+
+	i->f->terminate_rport_io(rport);
+}
+
+/**
  * fc_scsi_scan_rport - called to perform a scsi scan on a remote port.
  *
  * @data:	remote port to be scanned.
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 2ecd141..7b0019c 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -34,7 +34,7 @@
 #define ISCSI_SESSION_ATTRS 11
 #define ISCSI_CONN_ATTRS 11
 #define ISCSI_HOST_ATTRS 0
-#define ISCSI_TRANSPORT_VERSION "1.1-646"
+#define ISCSI_TRANSPORT_VERSION "2.0-685"
 
 struct iscsi_internal {
 	int daemon_pid;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 5a625c3..b5b0c2c 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -77,6 +77,24 @@
 	return len;						\
 }
 
+#define sas_bitfield_name_set(title, table)			\
+static ssize_t							\
+set_sas_##title##_names(u32 *table_key, const char *buf)	\
+{								\
+	ssize_t len = 0;					\
+	int i;							\
+								\
+	for (i = 0; i < ARRAY_SIZE(table); i++) {		\
+		len = strlen(table[i].name);			\
+		if (strncmp(buf, table[i].name, len) == 0 &&	\
+		    (buf[len] == '\n' || buf[len] == '\0')) {	\
+			*table_key = table[i].value;		\
+			return 0;				\
+		}						\
+	}							\
+	return -EINVAL;						\
+}
+
 #define sas_bitfield_name_search(title, table)			\
 static ssize_t							\
 get_sas_##title##_names(u32 table_key, char *buf)		\
@@ -131,7 +149,7 @@
 	{ SAS_LINK_RATE_6_0_GBPS,	"6.0 Gbit" },
 };
 sas_bitfield_name_search(linkspeed, sas_linkspeed_names)
-
+sas_bitfield_name_set(linkspeed, sas_linkspeed_names)
 
 /*
  * SAS host attributes
@@ -253,10 +271,39 @@
 	return get_sas_linkspeed_names(phy->field, buf);		\
 }
 
+/* Fudge to tell if we're minimum or maximum */
+#define sas_phy_store_linkspeed(field)					\
+static ssize_t								\
+store_sas_phy_##field(struct class_device *cdev, const char *buf,	\
+		      size_t count)					\
+{									\
+	struct sas_phy *phy = transport_class_to_phy(cdev);		\
+	struct Scsi_Host *shost = dev_to_shost(phy->dev.parent);	\
+	struct sas_internal *i = to_sas_internal(shost->transportt);	\
+	u32 value;							\
+	struct sas_phy_linkrates rates = {0};				\
+	int error;							\
+									\
+	error = set_sas_linkspeed_names(&value, buf);			\
+	if (error)							\
+		return error;						\
+	rates.field = value;						\
+	error = i->f->set_phy_speed(phy, &rates);			\
+									\
+	return error ? error : count;					\
+}
+
+#define sas_phy_linkspeed_rw_attr(field)				\
+	sas_phy_show_linkspeed(field)					\
+	sas_phy_store_linkspeed(field)					\
+static CLASS_DEVICE_ATTR(field, S_IRUGO, show_sas_phy_##field,		\
+	store_sas_phy_##field)
+
 #define sas_phy_linkspeed_attr(field)					\
 	sas_phy_show_linkspeed(field)					\
 static CLASS_DEVICE_ATTR(field, S_IRUGO, show_sas_phy_##field, NULL)
 
+
 #define sas_phy_show_linkerror(field)					\
 static ssize_t								\
 show_sas_phy_##field(struct class_device *cdev, char *buf)		\
@@ -266,9 +313,6 @@
 	struct sas_internal *i = to_sas_internal(shost->transportt);	\
 	int error;							\
 									\
-	if (!phy->local_attached)					\
-		return -EINVAL;						\
-									\
 	error = i->f->get_linkerrors ? i->f->get_linkerrors(phy) : 0;	\
 	if (error)							\
 		return error;						\
@@ -299,9 +343,6 @@
 	struct sas_internal *i = to_sas_internal(shost->transportt);
 	int error;
 
-	if (!phy->local_attached)
-		return -EINVAL;
-
 	error = i->f->phy_reset(phy, hard_reset);
 	if (error)
 		return error;
@@ -332,9 +373,9 @@
 //sas_phy_simple_attr(port_identifier, port_identifier, "%d\n", int);
 sas_phy_linkspeed_attr(negotiated_linkrate);
 sas_phy_linkspeed_attr(minimum_linkrate_hw);
-sas_phy_linkspeed_attr(minimum_linkrate);
+sas_phy_linkspeed_rw_attr(minimum_linkrate);
 sas_phy_linkspeed_attr(maximum_linkrate_hw);
-sas_phy_linkspeed_attr(maximum_linkrate);
+sas_phy_linkspeed_rw_attr(maximum_linkrate);
 sas_phy_linkerror_attr(invalid_dword_count);
 sas_phy_linkerror_attr(running_disparity_error_count);
 sas_phy_linkerror_attr(loss_of_dword_sync_count);
@@ -849,7 +890,7 @@
 	 * Only devices behind an expander are supported, because the
 	 * enclosure identifier is a SMP feature.
 	 */
-	if (phy->local_attached)
+	if (scsi_is_sas_phy_local(phy))
 		return -EINVAL;
 
 	error = i->f->get_enclosure_identifier(rphy, &identifier);
@@ -870,7 +911,7 @@
 	struct sas_internal *i = to_sas_internal(shost->transportt);
 	int val;
 
-	if (phy->local_attached)
+	if (scsi_is_sas_phy_local(phy))
 		return -EINVAL;
 
 	val = i->f->get_bay_identifier(rphy);
@@ -1316,13 +1357,23 @@
  * Setup / Teardown code
  */
 
-#define SETUP_TEMPLATE(attrb, field, perm, test)				\
+#define SETUP_TEMPLATE(attrb, field, perm, test)			\
 	i->private_##attrb[count] = class_device_attr_##field;		\
 	i->private_##attrb[count].attr.mode = perm;			\
 	i->attrb[count] = &i->private_##attrb[count];			\
 	if (test)							\
 		count++
 
+#define SETUP_TEMPLATE_RW(attrb, field, perm, test, ro_test, ro_perm)	\
+	i->private_##attrb[count] = class_device_attr_##field;		\
+	i->private_##attrb[count].attr.mode = perm;			\
+	if (ro_test) {							\
+		i->private_##attrb[count].attr.mode = ro_perm;		\
+		i->private_##attrb[count].store = NULL;			\
+	}								\
+	i->attrb[count] = &i->private_##attrb[count];			\
+	if (test)							\
+		count++
 
 #define SETUP_RPORT_ATTRIBUTE(field) 					\
 	SETUP_TEMPLATE(rphy_attrs, field, S_IRUGO, 1)
@@ -1333,6 +1384,10 @@
 #define SETUP_PHY_ATTRIBUTE(field)					\
 	SETUP_TEMPLATE(phy_attrs, field, S_IRUGO, 1)
 
+#define SETUP_PHY_ATTRIBUTE_RW(field)					\
+	SETUP_TEMPLATE_RW(phy_attrs, field, S_IRUGO | S_IWUSR, 1,	\
+			!i->f->set_phy_speed, S_IRUGO)
+
 #define SETUP_PORT_ATTRIBUTE(field)					\
 	SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1)
 
@@ -1413,9 +1468,9 @@
 	//SETUP_PHY_ATTRIBUTE(port_identifier);
 	SETUP_PHY_ATTRIBUTE(negotiated_linkrate);
 	SETUP_PHY_ATTRIBUTE(minimum_linkrate_hw);
-	SETUP_PHY_ATTRIBUTE(minimum_linkrate);
+	SETUP_PHY_ATTRIBUTE_RW(minimum_linkrate);
 	SETUP_PHY_ATTRIBUTE(maximum_linkrate_hw);
-	SETUP_PHY_ATTRIBUTE(maximum_linkrate);
+	SETUP_PHY_ATTRIBUTE_RW(maximum_linkrate);
 
 	SETUP_PHY_ATTRIBUTE(invalid_dword_count);
 	SETUP_PHY_ATTRIBUTE(running_disparity_error_count);
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 29a9a53..9f070f0 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -47,6 +47,7 @@
 
 /* Private data accessors (keep these out of the header file) */
 #define spi_dv_pending(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_pending)
+#define spi_dv_in_progress(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_in_progress)
 #define spi_dv_mutex(x) (((struct spi_transport_attrs *)&(x)->starget_data)->dv_mutex)
 
 struct spi_internal {
@@ -240,6 +241,7 @@
 	spi_pcomp_en(starget) = 0;
 	spi_hold_mcs(starget) = 0;
 	spi_dv_pending(starget) = 0;
+	spi_dv_in_progress(starget) = 0;
 	spi_initial_dv(starget) = 0;
 	mutex_init(&spi_dv_mutex(starget));
 
@@ -830,28 +832,37 @@
 	DV_SET(period, spi_min_period(starget));
 	/* try QAS requests; this should be harmless to set if the
 	 * target supports it */
-	if (scsi_device_qas(sdev))
+	if (scsi_device_qas(sdev)) {
 		DV_SET(qas, 1);
-	/* Also try IU transfers */
-	if (scsi_device_ius(sdev))
+	} else {
+		DV_SET(qas, 0);
+	}
+
+	if (scsi_device_ius(sdev) && spi_min_period(starget) < 9) {
+		/* This u320 (or u640). Set IU transfers */
 		DV_SET(iu, 1);
-	if (spi_min_period(starget) < 9) {
-		/* This u320 (or u640). Ignore the coupled parameters
-		 * like DT and IU, but set the optional ones */
+		/* Then set the optional parameters */
 		DV_SET(rd_strm, 1);
 		DV_SET(wr_flow, 1);
 		DV_SET(rti, 1);
 		if (spi_min_period(starget) == 8)
 			DV_SET(pcomp_en, 1);
+	} else {
+		DV_SET(iu, 0);
 	}
+
 	/* now that we've done all this, actually check the bus
 	 * signal type (if known).  Some devices are stupid on
 	 * a SE bus and still claim they can try LVD only settings */
 	if (i->f->get_signalling)
 		i->f->get_signalling(shost);
 	if (spi_signalling(shost) == SPI_SIGNAL_SE ||
-	    spi_signalling(shost) == SPI_SIGNAL_HVD)
+	    spi_signalling(shost) == SPI_SIGNAL_HVD ||
+	    !scsi_device_dt(sdev)) {
 		DV_SET(dt, 0);
+	} else {
+		DV_SET(dt, 1);
+	}
 	/* Do the read only INQUIRY tests */
 	spi_dv_retrain(sdev, buffer, buffer + sdev->inquiry_len,
 		       spi_dv_device_compare_inquiry);
@@ -907,6 +918,10 @@
 	if (unlikely(scsi_device_get(sdev)))
 		return;
 
+	if (unlikely(spi_dv_in_progress(starget)))
+		return;
+	spi_dv_in_progress(starget) = 1;
+
 	buffer = kzalloc(len, GFP_KERNEL);
 
 	if (unlikely(!buffer))
@@ -938,6 +953,7 @@
  out_free:
 	kfree(buffer);
  out_put:
+	spi_dv_in_progress(starget) = 0;
 	scsi_device_put(sdev);
 }
 EXPORT_SYMBOL(spi_dv_device);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 98bd3aa..638cff4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1215,7 +1215,7 @@
 		/* Either no media are present but the drive didn't tell us,
 		   or they are present but the read capacity command fails */
 		/* sdkp->media_present = 0; -- not always correct */
-		sdkp->capacity = 0x200000; /* 1 GB - random */
+		sdkp->capacity = 0; /* unknown mapped to zero - as usual */
 
 		return;
 	} else if (the_result && longrc) {
diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c
index 7cd366f..4f1db6f 100644
--- a/drivers/scsi/sgiwd93.c
+++ b/drivers/scsi/sgiwd93.c
@@ -97,7 +97,7 @@
 }
 
 static inline
-void fill_hpc_entries(struct hpc_chunk *hcp, Scsi_Cmnd *cmd, int datainp)
+void fill_hpc_entries(struct hpc_chunk *hcp, struct scsi_cmnd *cmd, int datainp)
 {
 	unsigned long len = cmd->SCp.this_residual;
 	void *addr = cmd->SCp.ptr;
@@ -129,7 +129,7 @@
 	hcp->desc.cntinfo = HPCDMA_EOX;
 }
 
-static int dma_setup(Scsi_Cmnd *cmd, int datainp)
+static int dma_setup(struct scsi_cmnd *cmd, int datainp)
 {
 	struct ip22_hostdata *hdata = HDATA(cmd->device->host);
 	struct hpc3_scsiregs *hregs =
@@ -163,7 +163,7 @@
 	return 0;
 }
 
-static void dma_stop(struct Scsi_Host *instance, Scsi_Cmnd *SCpnt,
+static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 		     int status)
 {
 	struct ip22_hostdata *hdata = HDATA(instance);
@@ -305,7 +305,7 @@
 	return 1;
 }
 
-static int sgiwd93_bus_reset(Scsi_Cmnd *cmd)
+static int sgiwd93_bus_reset(struct scsi_cmnd *cmd)
 {
 	/* FIXME perform bus-specific reset */
 
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
new file mode 100644
index 0000000..3cf3106
--- /dev/null
+++ b/drivers/scsi/stex.c
@@ -0,0 +1,1252 @@
+/*
+ * SuperTrak EX Series Storage Controller driver for Linux
+ *
+ *	Copyright (C) 2005, 2006 Promise Technology Inc.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Written By:
+ *		Ed Lin <promise_linux@promise.com>
+ *
+ *	Version: 2.9.0.13
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/pci.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+
+#define DRV_NAME "stex"
+#define ST_DRIVER_VERSION "2.9.0.13"
+#define ST_VER_MAJOR 		2
+#define ST_VER_MINOR 		9
+#define ST_OEM 			0
+#define ST_BUILD_VER 		13
+
+enum {
+	/* MU register offset */
+	IMR0	= 0x10,	/* MU_INBOUND_MESSAGE_REG0 */
+	IMR1	= 0x14,	/* MU_INBOUND_MESSAGE_REG1 */
+	OMR0	= 0x18,	/* MU_OUTBOUND_MESSAGE_REG0 */
+	OMR1	= 0x1c,	/* MU_OUTBOUND_MESSAGE_REG1 */
+	IDBL	= 0x20,	/* MU_INBOUND_DOORBELL */
+	IIS	= 0x24,	/* MU_INBOUND_INTERRUPT_STATUS */
+	IIM	= 0x28,	/* MU_INBOUND_INTERRUPT_MASK */
+	ODBL	= 0x2c,	/* MU_OUTBOUND_DOORBELL */
+	OIS	= 0x30,	/* MU_OUTBOUND_INTERRUPT_STATUS */
+	OIM	= 0x3c,	/* MU_OUTBOUND_INTERRUPT_MASK */
+
+	/* MU register value */
+	MU_INBOUND_DOORBELL_HANDSHAKE		= 1,
+	MU_INBOUND_DOORBELL_REQHEADCHANGED	= 2,
+	MU_INBOUND_DOORBELL_STATUSTAILCHANGED	= 4,
+	MU_INBOUND_DOORBELL_HMUSTOPPED		= 8,
+	MU_INBOUND_DOORBELL_RESET		= 16,
+
+	MU_OUTBOUND_DOORBELL_HANDSHAKE		= 1,
+	MU_OUTBOUND_DOORBELL_REQUESTTAILCHANGED	= 2,
+	MU_OUTBOUND_DOORBELL_STATUSHEADCHANGED	= 4,
+	MU_OUTBOUND_DOORBELL_BUSCHANGE		= 8,
+	MU_OUTBOUND_DOORBELL_HASEVENT		= 16,
+
+	/* MU status code */
+	MU_STATE_STARTING			= 1,
+	MU_STATE_FMU_READY_FOR_HANDSHAKE	= 2,
+	MU_STATE_SEND_HANDSHAKE_FRAME		= 3,
+	MU_STATE_STARTED			= 4,
+	MU_STATE_RESETTING			= 5,
+
+	MU_MAX_DELAY_TIME			= 240000,
+	MU_HANDSHAKE_SIGNATURE			= 0x55aaaa55,
+	HMU_PARTNER_TYPE			= 2,
+
+	/* firmware returned values */
+	SRB_STATUS_SUCCESS			= 0x01,
+	SRB_STATUS_ERROR			= 0x04,
+	SRB_STATUS_BUSY				= 0x05,
+	SRB_STATUS_INVALID_REQUEST		= 0x06,
+	SRB_STATUS_SELECTION_TIMEOUT		= 0x0A,
+	SRB_SEE_SENSE 				= 0x80,
+
+	/* task attribute */
+	TASK_ATTRIBUTE_SIMPLE			= 0x0,
+	TASK_ATTRIBUTE_HEADOFQUEUE		= 0x1,
+	TASK_ATTRIBUTE_ORDERED			= 0x2,
+	TASK_ATTRIBUTE_ACA			= 0x4,
+
+	/* request count, etc. */
+	MU_MAX_REQUEST				= 32,
+
+	/* one message wasted, use MU_MAX_REQUEST+1
+		to handle MU_MAX_REQUEST messages */
+	MU_REQ_COUNT				= (MU_MAX_REQUEST + 1),
+	MU_STATUS_COUNT				= (MU_MAX_REQUEST + 1),
+
+	STEX_CDB_LENGTH				= MAX_COMMAND_SIZE,
+	REQ_VARIABLE_LEN			= 1024,
+	STATUS_VAR_LEN				= 128,
+	ST_CAN_QUEUE				= MU_MAX_REQUEST,
+	ST_CMD_PER_LUN				= MU_MAX_REQUEST,
+	ST_MAX_SG				= 32,
+
+	/* sg flags */
+	SG_CF_EOT				= 0x80,	/* end of table */
+	SG_CF_64B				= 0x40,	/* 64 bit item */
+	SG_CF_HOST				= 0x20,	/* sg in host memory */
+
+	ST_MAX_ARRAY_SUPPORTED			= 16,
+	ST_MAX_TARGET_NUM			= (ST_MAX_ARRAY_SUPPORTED+1),
+	ST_MAX_LUN_PER_TARGET			= 16,
+
+	st_shasta				= 0,
+	st_vsc					= 1,
+
+	PASSTHRU_REQ_TYPE			= 0x00000001,
+	PASSTHRU_REQ_NO_WAKEUP			= 0x00000100,
+	ST_INTERNAL_TIMEOUT			= 30,
+
+	/* vendor specific commands of Promise */
+	ARRAY_CMD				= 0xe0,
+	CONTROLLER_CMD				= 0xe1,
+	DEBUGGING_CMD				= 0xe2,
+	PASSTHRU_CMD				= 0xe3,
+
+	PASSTHRU_GET_ADAPTER			= 0x05,
+	PASSTHRU_GET_DRVVER			= 0x10,
+	CTLR_POWER_STATE_CHANGE			= 0x0e,
+	CTLR_POWER_SAVING			= 0x01,
+
+	PASSTHRU_SIGNATURE			= 0x4e415041,
+
+	INQUIRY_EVPD				= 0x01,
+};
+
+struct st_sgitem {
+	u8 ctrl;	/* SG_CF_xxx */
+	u8 reserved[3];
+	__le32 count;
+	__le32 addr;
+	__le32 addr_hi;
+};
+
+struct st_sgtable {
+	__le16 sg_count;
+	__le16 max_sg_count;
+	__le32 sz_in_byte;
+	struct st_sgitem table[ST_MAX_SG];
+};
+
+struct handshake_frame {
+	__le32 rb_phy;		/* request payload queue physical address */
+	__le32 rb_phy_hi;
+	__le16 req_sz;		/* size of each request payload */
+	__le16 req_cnt;		/* count of reqs the buffer can hold */
+	__le16 status_sz;	/* size of each status payload */
+	__le16 status_cnt;	/* count of status the buffer can hold */
+	__le32 hosttime;	/* seconds from Jan 1, 1970 (GMT) */
+	__le32 hosttime_hi;
+	u8 partner_type;	/* who sends this frame */
+	u8 reserved0[7];
+	__le32 partner_ver_major;
+	__le32 partner_ver_minor;
+	__le32 partner_ver_oem;
+	__le32 partner_ver_build;
+	u32 reserved1[4];
+};
+
+struct req_msg {
+	__le16 tag;
+	u8 lun;
+	u8 target;
+	u8 task_attr;
+	u8 task_manage;
+	u8 prd_entry;
+	u8 payload_sz;		/* payload size in 4-byte */
+	u8 cdb[STEX_CDB_LENGTH];
+	u8 variable[REQ_VARIABLE_LEN];
+};
+
+struct status_msg {
+	__le16 tag;
+	u8 lun;
+	u8 target;
+	u8 srb_status;
+	u8 scsi_status;
+	u8 reserved;
+	u8 payload_sz;		/* payload size in 4-byte */
+	u8 variable[STATUS_VAR_LEN];
+};
+
+struct ver_info {
+	u32 major;
+	u32 minor;
+	u32 oem;
+	u32 build;
+	u32 reserved[2];
+};
+
+struct st_frame {
+	u32 base[6];
+	u32 rom_addr;
+
+	struct ver_info drv_ver;
+	struct ver_info bios_ver;
+
+	u32 bus;
+	u32 slot;
+	u32 irq_level;
+	u32 irq_vec;
+	u32 id;
+	u32 subid;
+
+	u32 dimm_size;
+	u8 dimm_type;
+	u8 reserved[3];
+
+	u32 channel;
+	u32 reserved1;
+};
+
+struct st_drvver {
+	u32 major;
+	u32 minor;
+	u32 oem;
+	u32 build;
+	u32 signature[2];
+	u8 console_id;
+	u8 host_no;
+	u8 reserved0[2];
+	u32 reserved[3];
+};
+
+#define MU_REQ_BUFFER_SIZE	(MU_REQ_COUNT * sizeof(struct req_msg))
+#define MU_STATUS_BUFFER_SIZE	(MU_STATUS_COUNT * sizeof(struct status_msg))
+#define MU_BUFFER_SIZE		(MU_REQ_BUFFER_SIZE + MU_STATUS_BUFFER_SIZE)
+#define STEX_BUFFER_SIZE	(MU_BUFFER_SIZE + sizeof(struct st_frame))
+
+struct st_ccb {
+	struct req_msg *req;
+	struct scsi_cmnd *cmd;
+
+	void *sense_buffer;
+	unsigned int sense_bufflen;
+	int sg_count;
+
+	u32 req_type;
+	u8 srb_status;
+	u8 scsi_status;
+};
+
+struct st_hba {
+	void __iomem *mmio_base;	/* iomapped PCI memory space */
+	void *dma_mem;
+	dma_addr_t dma_handle;
+
+	struct Scsi_Host *host;
+	struct pci_dev *pdev;
+
+	u32 req_head;
+	u32 req_tail;
+	u32 status_head;
+	u32 status_tail;
+
+	struct status_msg *status_buffer;
+	void *copy_buffer; /* temp buffer for driver-handled commands */
+	struct st_ccb ccb[MU_MAX_REQUEST];
+	struct st_ccb *wait_ccb;
+	wait_queue_head_t waitq;
+
+	unsigned int mu_status;
+	int out_req_cnt;
+
+	unsigned int cardtype;
+};
+
+static const char console_inq_page[] =
+{
+	0x03,0x00,0x03,0x03,0xFA,0x00,0x00,0x30,
+	0x50,0x72,0x6F,0x6D,0x69,0x73,0x65,0x20,	/* "Promise " */
+	0x52,0x41,0x49,0x44,0x20,0x43,0x6F,0x6E,	/* "RAID Con" */
+	0x73,0x6F,0x6C,0x65,0x20,0x20,0x20,0x20,	/* "sole    " */
+	0x31,0x2E,0x30,0x30,0x20,0x20,0x20,0x20,	/* "1.00    " */
+	0x53,0x58,0x2F,0x52,0x53,0x41,0x46,0x2D,	/* "SX/RSAF-" */
+	0x54,0x45,0x31,0x2E,0x30,0x30,0x20,0x20,	/* "TE1.00  " */
+	0x0C,0x20,0x20,0x20,0x20,0x20,0x20,0x20
+};
+
+MODULE_AUTHOR("Ed Lin");
+MODULE_DESCRIPTION("Promise Technology SuperTrak EX Controllers");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(ST_DRIVER_VERSION);
+
+static void stex_gettime(__le32 *time)
+{
+	struct timeval tv;
+	do_gettimeofday(&tv);
+
+	*time = cpu_to_le32(tv.tv_sec & 0xffffffff);
+	*(time + 1) = cpu_to_le32((tv.tv_sec >> 16) >> 16);
+}
+
+static struct status_msg *stex_get_status(struct st_hba *hba)
+{
+	struct status_msg *status =
+		hba->status_buffer + hba->status_tail;
+
+	++hba->status_tail;
+	hba->status_tail %= MU_STATUS_COUNT;
+
+	return status;
+}
+
+static void stex_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
+{
+	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+
+	cmd->sense_buffer[0] = 0x70;    /* fixed format, current */
+	cmd->sense_buffer[2] = sk;
+	cmd->sense_buffer[7] = 18 - 8;  /* additional sense length */
+	cmd->sense_buffer[12] = asc;
+	cmd->sense_buffer[13] = ascq;
+}
+
+static void stex_invalid_field(struct scsi_cmnd *cmd,
+			       void (*done)(struct scsi_cmnd *))
+{
+	/* "Invalid field in cbd" */
+	stex_set_sense(cmd, ILLEGAL_REQUEST, 0x24, 0x0);
+	done(cmd);
+}
+
+static struct req_msg *stex_alloc_req(struct st_hba *hba)
+{
+	struct req_msg *req = ((struct req_msg *)hba->dma_mem) +
+		hba->req_head;
+
+	++hba->req_head;
+	hba->req_head %= MU_REQ_COUNT;
+
+	return req;
+}
+
+static int stex_map_sg(struct st_hba *hba,
+	struct req_msg *req, struct st_ccb *ccb)
+{
+	struct pci_dev *pdev = hba->pdev;
+	struct scsi_cmnd *cmd;
+	dma_addr_t dma_handle;
+	struct scatterlist *src;
+	struct st_sgtable *dst;
+	int i;
+
+	cmd = ccb->cmd;
+	dst = (struct st_sgtable *)req->variable;
+	dst->max_sg_count = cpu_to_le16(ST_MAX_SG);
+	dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen);
+
+	if (cmd->use_sg) {
+		int n_elem;
+
+		src = (struct scatterlist *) cmd->request_buffer;
+		n_elem = pci_map_sg(pdev, src,
+			cmd->use_sg, cmd->sc_data_direction);
+		if (n_elem <= 0)
+			return -EIO;
+
+		ccb->sg_count = n_elem;
+		dst->sg_count = cpu_to_le16((u16)n_elem);
+
+		for (i = 0; i < n_elem; i++, src++) {
+			dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src));
+			dst->table[i].addr =
+				cpu_to_le32(sg_dma_address(src) & 0xffffffff);
+			dst->table[i].addr_hi =
+				cpu_to_le32((sg_dma_address(src) >> 16) >> 16);
+			dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST;
+		}
+		dst->table[--i].ctrl |= SG_CF_EOT;
+		return 0;
+	}
+
+	dma_handle = pci_map_single(pdev, cmd->request_buffer,
+		cmd->request_bufflen, cmd->sc_data_direction);
+	cmd->SCp.dma_handle = dma_handle;
+
+	ccb->sg_count = 1;
+	dst->sg_count = cpu_to_le16(1);
+	dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff);
+	dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16);
+	dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen);
+	dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST;
+
+	return 0;
+}
+
+static void stex_internal_copy(struct scsi_cmnd *cmd,
+	const void *src, size_t *count, int sg_count)
+{
+	size_t lcount;
+	size_t len;
+	void *s, *d, *base = NULL;
+	if (*count > cmd->request_bufflen)
+		*count = cmd->request_bufflen;
+	lcount = *count;
+	while (lcount) {
+		len = lcount;
+		s = (void *)src;
+		if (cmd->use_sg) {
+			size_t offset = *count - lcount;
+			s += offset;
+			base = scsi_kmap_atomic_sg(cmd->request_buffer,
+				sg_count, &offset, &len);
+			if (base == NULL) {
+				*count -= lcount;
+				return;
+			}
+			d = base + offset;
+		} else
+			d = cmd->request_buffer;
+
+		memcpy(d, s, len);
+
+		lcount -= len;
+		if (cmd->use_sg)
+			scsi_kunmap_atomic_sg(base);
+	}
+}
+
+static int stex_direct_copy(struct scsi_cmnd *cmd,
+	const void *src, size_t count)
+{
+	struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0];
+	size_t cp_len = count;
+	int n_elem = 0;
+
+	if (cmd->use_sg) {
+		n_elem = pci_map_sg(hba->pdev, cmd->request_buffer,
+			cmd->use_sg, cmd->sc_data_direction);
+		if (n_elem <= 0)
+			return 0;
+	}
+
+	stex_internal_copy(cmd, src, &cp_len, n_elem);
+
+	if (cmd->use_sg)
+		pci_unmap_sg(hba->pdev, cmd->request_buffer,
+			cmd->use_sg, cmd->sc_data_direction);
+	return cp_len == count;
+}
+
+static void stex_controller_info(struct st_hba *hba, struct st_ccb *ccb)
+{
+	struct st_frame *p;
+	size_t count = sizeof(struct st_frame);
+
+	p = hba->copy_buffer;
+	memset(p->base, 0, sizeof(u32)*6);
+	*(unsigned long *)(p->base) = pci_resource_start(hba->pdev, 0);
+	p->rom_addr = 0;
+
+	p->drv_ver.major = ST_VER_MAJOR;
+	p->drv_ver.minor = ST_VER_MINOR;
+	p->drv_ver.oem = ST_OEM;
+	p->drv_ver.build = ST_BUILD_VER;
+
+	p->bus = hba->pdev->bus->number;
+	p->slot = hba->pdev->devfn;
+	p->irq_level = 0;
+	p->irq_vec = hba->pdev->irq;
+	p->id = hba->pdev->vendor << 16 | hba->pdev->device;
+	p->subid =
+		hba->pdev->subsystem_vendor << 16 | hba->pdev->subsystem_device;
+
+	stex_internal_copy(ccb->cmd, p, &count, ccb->sg_count);
+}
+
+static void
+stex_send_cmd(struct st_hba *hba, struct req_msg *req, u16 tag)
+{
+	req->tag = cpu_to_le16(tag);
+	req->task_attr = TASK_ATTRIBUTE_SIMPLE;
+	req->task_manage = 0; /* not supported yet */
+	req->payload_sz = (u8)(sizeof(struct req_msg)/sizeof(u32));
+
+	hba->ccb[tag].req = req;
+	hba->out_req_cnt++;
+
+	writel(hba->req_head, hba->mmio_base + IMR0);
+	writel(MU_INBOUND_DOORBELL_REQHEADCHANGED, hba->mmio_base + IDBL);
+	readl(hba->mmio_base + IDBL); /* flush */
+}
+
+static int
+stex_slave_alloc(struct scsi_device *sdev)
+{
+	/* Cheat: usually extracted from Inquiry data */
+	sdev->tagged_supported = 1;
+
+	scsi_activate_tcq(sdev, sdev->host->can_queue);
+
+	return 0;
+}
+
+static int
+stex_slave_config(struct scsi_device *sdev)
+{
+	sdev->use_10_for_rw = 1;
+	sdev->use_10_for_ms = 1;
+	sdev->timeout = 60 * HZ;
+	sdev->tagged_supported = 1;
+
+	return 0;
+}
+
+static void
+stex_slave_destroy(struct scsi_device *sdev)
+{
+	scsi_deactivate_tcq(sdev, 1);
+}
+
+static int
+stex_queuecommand(struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
+{
+	struct st_hba *hba;
+	struct Scsi_Host *host;
+	unsigned int id,lun;
+	struct req_msg *req;
+	u16 tag;
+	host = cmd->device->host;
+	id = cmd->device->id;
+	lun = cmd->device->channel; /* firmware lun issue work around */
+	hba = (struct st_hba *) &host->hostdata[0];
+
+	switch (cmd->cmnd[0]) {
+	case MODE_SENSE_10:
+	{
+		static char ms10_caching_page[12] =
+			{ 0, 0x12, 0, 0, 0, 0, 0, 0, 0x8, 0xa, 0x4, 0 };
+		unsigned char page;
+		page = cmd->cmnd[2] & 0x3f;
+		if (page == 0x8 || page == 0x3f) {
+			stex_direct_copy(cmd, ms10_caching_page,
+					sizeof(ms10_caching_page));
+			cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+			done(cmd);
+		} else
+			stex_invalid_field(cmd, done);
+		return 0;
+	}
+	case INQUIRY:
+		if (id != ST_MAX_ARRAY_SUPPORTED)
+			break;
+		if (lun == 0 && (cmd->cmnd[1] & INQUIRY_EVPD) == 0) {
+			stex_direct_copy(cmd, console_inq_page,
+				sizeof(console_inq_page));
+			cmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8;
+			done(cmd);
+		} else
+			stex_invalid_field(cmd, done);
+		return 0;
+	case PASSTHRU_CMD:
+		if (cmd->cmnd[1] == PASSTHRU_GET_DRVVER) {
+			struct st_drvver ver;
+			ver.major = ST_VER_MAJOR;
+			ver.minor = ST_VER_MINOR;
+			ver.oem = ST_OEM;
+			ver.build = ST_BUILD_VER;
+			ver.signature[0] = PASSTHRU_SIGNATURE;
+			ver.console_id = ST_MAX_ARRAY_SUPPORTED;
+			ver.host_no = hba->host->host_no;
+			cmd->result = stex_direct_copy(cmd, &ver, sizeof(ver)) ?
+				DID_OK << 16 | COMMAND_COMPLETE << 8 :
+				DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+			done(cmd);
+			return 0;
+		}
+	default:
+		break;
+	}
+
+	cmd->scsi_done = done;
+
+	tag = cmd->request->tag;
+
+	if (unlikely(tag >= host->can_queue))
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	req = stex_alloc_req(hba);
+	req->lun = lun;
+	req->target = id;
+
+	/* cdb */
+	memcpy(req->cdb, cmd->cmnd, STEX_CDB_LENGTH);
+
+	hba->ccb[tag].cmd = cmd;
+	hba->ccb[tag].sense_bufflen = SCSI_SENSE_BUFFERSIZE;
+	hba->ccb[tag].sense_buffer = cmd->sense_buffer;
+	hba->ccb[tag].req_type = 0;
+
+	if (cmd->sc_data_direction != DMA_NONE)
+		stex_map_sg(hba, req, &hba->ccb[tag]);
+
+	stex_send_cmd(hba, req, tag);
+	return 0;
+}
+
+static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd)
+{
+	if (cmd->sc_data_direction != DMA_NONE) {
+		if (cmd->use_sg)
+			pci_unmap_sg(hba->pdev, cmd->request_buffer,
+				cmd->use_sg, cmd->sc_data_direction);
+		else
+			pci_unmap_single(hba->pdev, cmd->SCp.dma_handle,
+				cmd->request_bufflen, cmd->sc_data_direction);
+	}
+}
+
+static void stex_scsi_done(struct st_ccb *ccb)
+{
+	struct scsi_cmnd *cmd = ccb->cmd;
+	int result;
+
+	if (ccb->srb_status == SRB_STATUS_SUCCESS ||  ccb->srb_status == 0) {
+		result = ccb->scsi_status;
+		switch (ccb->scsi_status) {
+		case SAM_STAT_GOOD:
+			result |= DID_OK << 16 | COMMAND_COMPLETE << 8;
+			break;
+		case SAM_STAT_CHECK_CONDITION:
+			result |= DRIVER_SENSE << 24;
+			break;
+		case SAM_STAT_BUSY:
+			result |= DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8;
+			break;
+		default:
+			result |= DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+			break;
+		}
+	}
+	else if (ccb->srb_status & SRB_SEE_SENSE)
+		result = DRIVER_SENSE << 24 | SAM_STAT_CHECK_CONDITION;
+	else switch (ccb->srb_status) {
+		case SRB_STATUS_SELECTION_TIMEOUT:
+			result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
+			break;
+		case SRB_STATUS_BUSY:
+			result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8;
+			break;
+		case SRB_STATUS_INVALID_REQUEST:
+		case SRB_STATUS_ERROR:
+		default:
+			result = DID_ERROR << 16 | COMMAND_COMPLETE << 8;
+			break;
+	}
+
+	cmd->result = result;
+	cmd->scsi_done(cmd);
+}
+
+static void stex_copy_data(struct st_ccb *ccb,
+	struct status_msg *resp, unsigned int variable)
+{
+	size_t count = variable;
+	if (resp->scsi_status != SAM_STAT_GOOD) {
+		if (ccb->sense_buffer != NULL)
+			memcpy(ccb->sense_buffer, resp->variable,
+				min(variable, ccb->sense_bufflen));
+		return;
+	}
+
+	if (ccb->cmd == NULL)
+		return;
+	stex_internal_copy(ccb->cmd, resp->variable, &count, ccb->sg_count);
+}
+
+static void stex_mu_intr(struct st_hba *hba, u32 doorbell)
+{
+	void __iomem *base = hba->mmio_base;
+	struct status_msg *resp;
+	struct st_ccb *ccb;
+	unsigned int size;
+	u16 tag;
+
+	if (!(doorbell & MU_OUTBOUND_DOORBELL_STATUSHEADCHANGED))
+		return;
+
+	/* status payloads */
+	hba->status_head = readl(base + OMR1);
+	if (unlikely(hba->status_head >= MU_STATUS_COUNT)) {
+		printk(KERN_WARNING DRV_NAME "(%s): invalid status head\n",
+			pci_name(hba->pdev));
+		return;
+	}
+
+	if (unlikely(hba->mu_status != MU_STATE_STARTED ||
+		hba->out_req_cnt <= 0)) {
+		hba->status_tail = hba->status_head;
+		goto update_status;
+	}
+
+	while (hba->status_tail != hba->status_head) {
+		resp = stex_get_status(hba);
+		tag = le16_to_cpu(resp->tag);
+		if (unlikely(tag >= hba->host->can_queue)) {
+			printk(KERN_WARNING DRV_NAME
+				"(%s): invalid tag\n", pci_name(hba->pdev));
+			continue;
+		}
+
+		ccb = &hba->ccb[tag];
+		if (hba->wait_ccb == ccb)
+			hba->wait_ccb = NULL;
+		if (unlikely(ccb->req == NULL)) {
+			printk(KERN_WARNING DRV_NAME
+				"(%s): lagging req\n", pci_name(hba->pdev));
+			continue;
+		}
+
+		size = resp->payload_sz * sizeof(u32); /* payload size */
+		if (unlikely(size < sizeof(*resp) - STATUS_VAR_LEN ||
+			size > sizeof(*resp))) {
+			printk(KERN_WARNING DRV_NAME "(%s): bad status size\n",
+				pci_name(hba->pdev));
+		} else {
+			size -= sizeof(*resp) - STATUS_VAR_LEN; /* copy size */
+			if (size)
+				stex_copy_data(ccb, resp, size);
+		}
+
+		ccb->srb_status = resp->srb_status;
+		ccb->scsi_status = resp->scsi_status;
+
+		if (likely(ccb->cmd != NULL)) {
+			if (unlikely(ccb->cmd->cmnd[0] == PASSTHRU_CMD &&
+				ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER))
+				stex_controller_info(hba, ccb);
+			stex_unmap_sg(hba, ccb->cmd);
+			stex_scsi_done(ccb);
+			hba->out_req_cnt--;
+		} else if (ccb->req_type & PASSTHRU_REQ_TYPE) {
+			hba->out_req_cnt--;
+			if (ccb->req_type & PASSTHRU_REQ_NO_WAKEUP) {
+				ccb->req_type = 0;
+				continue;
+			}
+			ccb->req_type = 0;
+			if (waitqueue_active(&hba->waitq))
+				wake_up(&hba->waitq);
+		}
+	}
+
+update_status:
+	writel(hba->status_head, base + IMR1);
+	readl(base + IMR1); /* flush */
+}
+
+static irqreturn_t stex_intr(int irq, void *__hba, struct pt_regs *regs)
+{
+	struct st_hba *hba = __hba;
+	void __iomem *base = hba->mmio_base;
+	u32 data;
+	unsigned long flags;
+	int handled = 0;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+
+	data = readl(base + ODBL);
+
+	if (data && data != 0xffffffff) {
+		/* clear the interrupt */
+		writel(data, base + ODBL);
+		readl(base + ODBL); /* flush */
+		stex_mu_intr(hba, data);
+		handled = 1;
+	}
+
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return IRQ_RETVAL(handled);
+}
+
+static int stex_handshake(struct st_hba *hba)
+{
+	void __iomem *base = hba->mmio_base;
+	struct handshake_frame *h;
+	dma_addr_t status_phys;
+	int i;
+
+	if (readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE) {
+		writel(MU_INBOUND_DOORBELL_HANDSHAKE, base + IDBL);
+		readl(base + IDBL);
+		for (i = 0; readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE
+			&& i < MU_MAX_DELAY_TIME; i++) {
+			rmb();
+			msleep(1);
+		}
+
+		if (i == MU_MAX_DELAY_TIME) {
+			printk(KERN_ERR DRV_NAME
+				"(%s): no handshake signature\n",
+				pci_name(hba->pdev));
+			return -1;
+		}
+	}
+
+	udelay(10);
+
+	h = (struct handshake_frame *)(hba->dma_mem + MU_REQ_BUFFER_SIZE);
+	h->rb_phy = cpu_to_le32(hba->dma_handle);
+	h->rb_phy_hi = cpu_to_le32((hba->dma_handle >> 16) >> 16);
+	h->req_sz = cpu_to_le16(sizeof(struct req_msg));
+	h->req_cnt = cpu_to_le16(MU_REQ_COUNT);
+	h->status_sz = cpu_to_le16(sizeof(struct status_msg));
+	h->status_cnt = cpu_to_le16(MU_STATUS_COUNT);
+	stex_gettime(&h->hosttime);
+	h->partner_type = HMU_PARTNER_TYPE;
+
+	status_phys = hba->dma_handle + MU_REQ_BUFFER_SIZE;
+	writel(status_phys, base + IMR0);
+	readl(base + IMR0);
+	writel((status_phys >> 16) >> 16, base + IMR1);
+	readl(base + IMR1);
+
+	writel((status_phys >> 16) >> 16, base + OMR0); /* old fw compatible */
+	readl(base + OMR0);
+	writel(MU_INBOUND_DOORBELL_HANDSHAKE, base + IDBL);
+	readl(base + IDBL); /* flush */
+
+	udelay(10);
+	for (i = 0; readl(base + OMR0) != MU_HANDSHAKE_SIGNATURE
+		&& i < MU_MAX_DELAY_TIME; i++) {
+		rmb();
+		msleep(1);
+	}
+
+	if (i == MU_MAX_DELAY_TIME) {
+		printk(KERN_ERR DRV_NAME
+			"(%s): no signature after handshake frame\n",
+			pci_name(hba->pdev));
+		return -1;
+	}
+
+	writel(0, base + IMR0);
+	readl(base + IMR0);
+	writel(0, base + OMR0);
+	readl(base + OMR0);
+	writel(0, base + IMR1);
+	readl(base + IMR1);
+	writel(0, base + OMR1);
+	readl(base + OMR1); /* flush */
+	hba->mu_status = MU_STATE_STARTED;
+	return 0;
+}
+
+static int stex_abort(struct scsi_cmnd *cmd)
+{
+	struct Scsi_Host *host = cmd->device->host;
+	struct st_hba *hba = (struct st_hba *)host->hostdata;
+	u16 tag = cmd->request->tag;
+	void __iomem *base;
+	u32 data;
+	int result = SUCCESS;
+	unsigned long flags;
+	base = hba->mmio_base;
+	spin_lock_irqsave(host->host_lock, flags);
+	if (tag < host->can_queue && hba->ccb[tag].cmd == cmd)
+		hba->wait_ccb = &hba->ccb[tag];
+	else {
+		for (tag = 0; tag < host->can_queue; tag++)
+			if (hba->ccb[tag].cmd == cmd) {
+				hba->wait_ccb = &hba->ccb[tag];
+				break;
+			}
+		if (tag >= host->can_queue)
+			goto out;
+	}
+
+	data = readl(base + ODBL);
+	if (data == 0 || data == 0xffffffff)
+		goto fail_out;
+
+	writel(data, base + ODBL);
+	readl(base + ODBL); /* flush */
+
+	stex_mu_intr(hba, data);
+
+	if (hba->wait_ccb == NULL) {
+		printk(KERN_WARNING DRV_NAME
+			"(%s): lost interrupt\n", pci_name(hba->pdev));
+		goto out;
+	}
+
+fail_out:
+	stex_unmap_sg(hba, cmd);
+	hba->wait_ccb->req = NULL; /* nullify the req's future return */
+	hba->wait_ccb = NULL;
+	result = FAILED;
+out:
+	spin_unlock_irqrestore(host->host_lock, flags);
+	return result;
+}
+
+static void stex_hard_reset(struct st_hba *hba)
+{
+	struct pci_bus *bus;
+	int i;
+	u16 pci_cmd;
+	u8 pci_bctl;
+
+	for (i = 0; i < 16; i++)
+		pci_read_config_dword(hba->pdev, i * 4,
+			&hba->pdev->saved_config_space[i]);
+
+	/* Reset secondary bus. Our controller(MU/ATU) is the only device on
+	   secondary bus. Consult Intel 80331/3 developer's manual for detail */
+	bus = hba->pdev->bus;
+	pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
+	pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
+	msleep(1);
+	pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
+
+	for (i = 0; i < MU_MAX_DELAY_TIME; i++) {
+		pci_read_config_word(hba->pdev, PCI_COMMAND, &pci_cmd);
+		if (pci_cmd & PCI_COMMAND_MASTER)
+			break;
+		msleep(1);
+	}
+
+	ssleep(5);
+	for (i = 0; i < 16; i++)
+		pci_write_config_dword(hba->pdev, i * 4,
+			hba->pdev->saved_config_space[i]);
+}
+
+static int stex_reset(struct scsi_cmnd *cmd)
+{
+	struct st_hba *hba;
+	unsigned long flags;
+	hba = (struct st_hba *) &cmd->device->host->hostdata[0];
+
+	hba->mu_status = MU_STATE_RESETTING;
+
+	if (hba->cardtype == st_shasta)
+		stex_hard_reset(hba);
+
+	if (stex_handshake(hba)) {
+		printk(KERN_WARNING DRV_NAME
+			"(%s): resetting: handshake failed\n",
+			pci_name(hba->pdev));
+		return FAILED;
+	}
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	hba->req_head = 0;
+	hba->req_tail = 0;
+	hba->status_head = 0;
+	hba->status_tail = 0;
+	hba->out_req_cnt = 0;
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return SUCCESS;
+}
+
+static int stex_biosparam(struct scsi_device *sdev,
+	struct block_device *bdev, sector_t capacity, int geom[])
+{
+	int heads = 255, sectors = 63, cylinders;
+
+	if (capacity < 0x200000) {
+		heads = 64;
+		sectors = 32;
+	}
+
+	cylinders = sector_div(capacity, heads * sectors);
+
+	geom[0] = heads;
+	geom[1] = sectors;
+	geom[2] = cylinders;
+
+	return 0;
+}
+
+static struct scsi_host_template driver_template = {
+	.module				= THIS_MODULE,
+	.name				= DRV_NAME,
+	.proc_name			= DRV_NAME,
+	.bios_param			= stex_biosparam,
+	.queuecommand			= stex_queuecommand,
+	.slave_alloc			= stex_slave_alloc,
+	.slave_configure		= stex_slave_config,
+	.slave_destroy			= stex_slave_destroy,
+	.eh_abort_handler		= stex_abort,
+	.eh_host_reset_handler		= stex_reset,
+	.can_queue			= ST_CAN_QUEUE,
+	.this_id			= -1,
+	.sg_tablesize			= ST_MAX_SG,
+	.cmd_per_lun			= ST_CMD_PER_LUN,
+};
+
+static int stex_set_dma_mask(struct pci_dev * pdev)
+{
+	int ret;
+	if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)
+		&& !pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))
+		return 0;
+	ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	if (!ret)
+		ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	return ret;
+}
+
+static int __devinit
+stex_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct st_hba *hba;
+	struct Scsi_Host *host;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	pci_set_master(pdev);
+
+	host = scsi_host_alloc(&driver_template, sizeof(struct st_hba));
+
+	if (!host) {
+		printk(KERN_ERR DRV_NAME "(%s): scsi_host_alloc failed\n",
+			pci_name(pdev));
+		err = -ENOMEM;
+		goto out_disable;
+	}
+
+	hba = (struct st_hba *)host->hostdata;
+	memset(hba, 0, sizeof(struct st_hba));
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err < 0) {
+		printk(KERN_ERR DRV_NAME "(%s): request regions failed\n",
+			pci_name(pdev));
+		goto out_scsi_host_put;
+	}
+
+	hba->mmio_base = ioremap(pci_resource_start(pdev, 0),
+		pci_resource_len(pdev, 0));
+	if ( !hba->mmio_base) {
+		printk(KERN_ERR DRV_NAME "(%s): memory map failed\n",
+			pci_name(pdev));
+		err = -ENOMEM;
+		goto out_release_regions;
+	}
+
+	err = stex_set_dma_mask(pdev);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): set dma mask failed\n",
+			pci_name(pdev));
+		goto out_iounmap;
+	}
+
+	hba->dma_mem = dma_alloc_coherent(&pdev->dev,
+		STEX_BUFFER_SIZE, &hba->dma_handle, GFP_KERNEL);
+	if (!hba->dma_mem) {
+		err = -ENOMEM;
+		printk(KERN_ERR DRV_NAME "(%s): dma mem alloc failed\n",
+			pci_name(pdev));
+		goto out_iounmap;
+	}
+
+	hba->status_buffer =
+		(struct status_msg *)(hba->dma_mem + MU_REQ_BUFFER_SIZE);
+	hba->copy_buffer = hba->dma_mem + MU_BUFFER_SIZE;
+	hba->mu_status = MU_STATE_STARTING;
+
+	hba->cardtype = (unsigned int) id->driver_data;
+
+	/* firmware uses id/lun pair for a logical drive, but lun would be
+	   always 0 if CONFIG_SCSI_MULTI_LUN not configured, so we use
+	   channel to map lun here */
+	host->max_channel = ST_MAX_LUN_PER_TARGET - 1;
+	host->max_id = ST_MAX_TARGET_NUM;
+	host->max_lun = 1;
+	host->unique_id = host->host_no;
+	host->max_cmd_len = STEX_CDB_LENGTH;
+
+	hba->host = host;
+	hba->pdev = pdev;
+	init_waitqueue_head(&hba->waitq);
+
+	err = request_irq(pdev->irq, stex_intr, IRQF_SHARED, DRV_NAME, hba);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): request irq failed\n",
+			pci_name(pdev));
+		goto out_pci_free;
+	}
+
+	err = stex_handshake(hba);
+	if (err)
+		goto out_free_irq;
+
+	err = scsi_init_shared_tag_map(host, ST_CAN_QUEUE);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): init shared queue failed\n",
+			pci_name(pdev));
+		goto out_free_irq;
+	}
+
+	pci_set_drvdata(pdev, hba);
+
+	err = scsi_add_host(host, &pdev->dev);
+	if (err) {
+		printk(KERN_ERR DRV_NAME "(%s): scsi_add_host failed\n",
+			pci_name(pdev));
+		goto out_free_irq;
+	}
+
+	scsi_scan_host(host);
+
+	return 0;
+
+out_free_irq:
+	free_irq(pdev->irq, hba);
+out_pci_free:
+	dma_free_coherent(&pdev->dev, STEX_BUFFER_SIZE,
+			  hba->dma_mem, hba->dma_handle);
+out_iounmap:
+	iounmap(hba->mmio_base);
+out_release_regions:
+	pci_release_regions(pdev);
+out_scsi_host_put:
+	scsi_host_put(host);
+out_disable:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void stex_hba_stop(struct st_hba *hba)
+{
+	struct req_msg *req;
+	unsigned long flags;
+	unsigned long before;
+	u16 tag = 0;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	req = stex_alloc_req(hba);
+	memset(req->cdb, 0, STEX_CDB_LENGTH);
+
+	req->cdb[0] = CONTROLLER_CMD;
+	req->cdb[1] = CTLR_POWER_STATE_CHANGE;
+	req->cdb[2] = CTLR_POWER_SAVING;
+
+	hba->ccb[tag].cmd = NULL;
+	hba->ccb[tag].sg_count = 0;
+	hba->ccb[tag].sense_bufflen = 0;
+	hba->ccb[tag].sense_buffer = NULL;
+	hba->ccb[tag].req_type |= PASSTHRU_REQ_TYPE;
+
+	stex_send_cmd(hba, req, tag);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	before = jiffies;
+	while (hba->ccb[tag].req_type & PASSTHRU_REQ_TYPE) {
+		if (time_after(jiffies, before + ST_INTERNAL_TIMEOUT * HZ))
+			return;
+		msleep(10);
+	}
+}
+
+static void stex_hba_free(struct st_hba *hba)
+{
+	free_irq(hba->pdev->irq, hba);
+
+	iounmap(hba->mmio_base);
+
+	pci_release_regions(hba->pdev);
+
+	dma_free_coherent(&hba->pdev->dev, STEX_BUFFER_SIZE,
+			  hba->dma_mem, hba->dma_handle);
+}
+
+static void stex_remove(struct pci_dev *pdev)
+{
+	struct st_hba *hba = pci_get_drvdata(pdev);
+
+	scsi_remove_host(hba->host);
+
+	pci_set_drvdata(pdev, NULL);
+
+	stex_hba_stop(hba);
+
+	stex_hba_free(hba);
+
+	scsi_host_put(hba->host);
+
+	pci_disable_device(pdev);
+}
+
+static void stex_shutdown(struct pci_dev *pdev)
+{
+	struct st_hba *hba = pci_get_drvdata(pdev);
+
+	stex_hba_stop(hba);
+}
+
+static struct pci_device_id stex_pci_tbl[] = {
+	{ 0x105a, 0x8350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0xc350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0xf350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x4301, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x4302, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x8301, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x105a, 0x8302, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_shasta },
+	{ 0x1725, 0x7250, PCI_ANY_ID, PCI_ANY_ID, 0, 0, st_vsc },
+	{ }	/* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, stex_pci_tbl);
+
+static struct pci_driver stex_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= stex_pci_tbl,
+	.probe		= stex_probe,
+	.remove		= __devexit_p(stex_remove),
+	.shutdown	= stex_shutdown,
+};
+
+static int __init stex_init(void)
+{
+	printk(KERN_INFO DRV_NAME
+		": Promise SuperTrak EX Driver version: %s\n",
+		 ST_DRIVER_VERSION);
+
+	return pci_register_driver(&stex_pci_driver);
+}
+
+static void __exit stex_exit(void)
+{
+	pci_unregister_driver(&stex_pci_driver);
+}
+
+module_init(stex_init);
+module_exit(stex_exit);
diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index e681681..0372aa9 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -196,8 +196,8 @@
   u32 sense_data PACKED;
   /* The following fields are for software only.  They are included in
      the MSCP structure because they are associated with SCSI requests.  */
-  void (*done)(Scsi_Cmnd *);
-  Scsi_Cmnd *SCint;
+  void (*done) (struct scsi_cmnd *);
+  struct scsi_cmnd *SCint;
   ultrastor_sg_list sglist[ULTRASTOR_24F_MAX_SG]; /* use larger size for 24F */
 };
 
@@ -289,7 +289,7 @@
 
 static void ultrastor_interrupt(int, void *, struct pt_regs *);
 static irqreturn_t do_ultrastor_interrupt(int, void *, struct pt_regs *);
-static inline void build_sg_list(struct mscp *, Scsi_Cmnd *SCpnt);
+static inline void build_sg_list(struct mscp *, struct scsi_cmnd *SCpnt);
 
 
 /* Always called with host lock held */
@@ -673,7 +673,7 @@
     return buf;
 }
 
-static inline void build_sg_list(struct mscp *mscp, Scsi_Cmnd *SCpnt)
+static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)
 {
 	struct scatterlist *sl;
 	long transfer_length = 0;
@@ -694,7 +694,8 @@
 	mscp->transfer_data_length = transfer_length;
 }
 
-static int ultrastor_queuecommand(Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *))
+static int ultrastor_queuecommand(struct scsi_cmnd *SCpnt,
+				void (*done) (struct scsi_cmnd *))
 {
     struct mscp *my_mscp;
 #if ULTRASTOR_MAX_CMDS > 1
@@ -833,7 +834,7 @@
 
  */
 
-static int ultrastor_abort(Scsi_Cmnd *SCpnt)
+static int ultrastor_abort(struct scsi_cmnd *SCpnt)
 {
 #if ULTRASTOR_DEBUG & UD_ABORT
     char out[108];
@@ -843,7 +844,7 @@
     unsigned int mscp_index;
     unsigned char old_aborted;
     unsigned long flags;
-    void (*done)(Scsi_Cmnd *);
+    void (*done)(struct scsi_cmnd *);
     struct Scsi_Host *host = SCpnt->device->host;
 
     if(config.slot) 
@@ -960,7 +961,7 @@
     return SUCCESS;
 }
 
-static int ultrastor_host_reset(Scsi_Cmnd * SCpnt)
+static int ultrastor_host_reset(struct scsi_cmnd * SCpnt)
 {
     unsigned long flags;
     int i;
@@ -1045,8 +1046,8 @@
     unsigned int mscp_index;
 #endif
     struct mscp *mscp;
-    void (*done)(Scsi_Cmnd *);
-    Scsi_Cmnd *SCtmp;
+    void (*done) (struct scsi_cmnd *);
+    struct scsi_cmnd *SCtmp;
 
 #if ULTRASTOR_MAX_CMDS == 1
     mscp = &config.mscp[0];
@@ -1079,7 +1080,7 @@
 	    return;
 	}
 	if (icm_status == 3) {
-	    void (*done)(Scsi_Cmnd *) = mscp->done;
+	    void (*done)(struct scsi_cmnd *) = mscp->done;
 	    if (done) {
 		mscp->done = NULL;
 		mscp->SCint->result = DID_ABORT << 16;
diff --git a/drivers/scsi/ultrastor.h b/drivers/scsi/ultrastor.h
index da759a1..a692905 100644
--- a/drivers/scsi/ultrastor.h
+++ b/drivers/scsi/ultrastor.h
@@ -14,11 +14,13 @@
 #define _ULTRASTOR_H
 
 static int ultrastor_detect(struct scsi_host_template *);
-static const char *ultrastor_info(struct Scsi_Host * shpnt);
-static int ultrastor_queuecommand(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
-static int ultrastor_abort(Scsi_Cmnd *);
-static int ultrastor_host_reset(Scsi_Cmnd *);
-static int ultrastor_biosparam(struct scsi_device *, struct block_device *, sector_t, int *);
+static const char *ultrastor_info(struct Scsi_Host *shpnt);
+static int ultrastor_queuecommand(struct scsi_cmnd *,
+				void (*done)(struct scsi_cmnd *));
+static int ultrastor_abort(struct scsi_cmnd *);
+static int ultrastor_host_reset(struct scsi_cmnd *);
+static int ultrastor_biosparam(struct scsi_device *, struct block_device *,
+				sector_t, int *);
 
 
 #define ULTRASTOR_14F_MAX_SG 16
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index bfd2a22..a3b99ca 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -1400,8 +1400,8 @@
 static int __init pmz_init_port(struct uart_pmac_port *uap)
 {
 	struct device_node *np = uap->node;
-	char *conn;
-	struct slot_names_prop {
+	const char *conn;
+	const struct slot_names_prop {
 		int	count;
 		char	name[1];
 	} *slots;
@@ -1458,7 +1458,7 @@
 		uap->flags |= PMACZILOG_FLAG_IS_IRDA;
 	uap->port_type = PMAC_SCC_ASYNC;
 	/* 1999 Powerbook G3 has slot-names property instead */
-	slots = (struct slot_names_prop *)get_property(np, "slot-names", &len);
+	slots = get_property(np, "slot-names", &len);
 	if (slots && slots->count > 0) {
 		if (strcmp(slots->name, "IrDA") == 0)
 			uap->flags |= PMACZILOG_FLAG_IS_IRDA;
@@ -1470,7 +1470,8 @@
 	if (ZS_IS_INTMODEM(uap)) {
 		struct device_node* i2c_modem = find_devices("i2c-modem");
 		if (i2c_modem) {
-			char* mid = get_property(i2c_modem, "modem-id", NULL);
+			const char* mid =
+				get_property(i2c_modem, "modem-id", NULL);
 			if (mid) switch(*mid) {
 			case 0x04 :
 			case 0x05 :
diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c
index a2c56b2..3305fb6 100644
--- a/drivers/usb/input/hid-core.c
+++ b/drivers/usb/input/hid-core.c
@@ -1818,7 +1818,7 @@
 	int n, len, insize = 0;
 
         /* Ignore all Wacom devices */
-        if (dev->descriptor.idVendor == USB_VENDOR_ID_WACOM)
+        if (le16_to_cpu(dev->descriptor.idVendor) == USB_VENDOR_ID_WACOM)
                 return NULL;
 
 	for (n = 0; hid_blacklist[n].idVendor; n++)
diff --git a/drivers/video/S3triofb.c b/drivers/video/S3triofb.c
index afd146f..397005e 100644
--- a/drivers/video/S3triofb.c
+++ b/drivers/video/S3triofb.c
@@ -349,30 +349,30 @@
     s3trio_name[sizeof(s3trio_name)-1] = '\0';
     strcpy(fb_fix.id, s3trio_name);
 
-    if((pp = (int *)get_property(dp, "vendor-id", &len)) != NULL
+    if((pp = get_property(dp, "vendor-id", &len)) != NULL
 	&& *pp!=PCI_VENDOR_ID_S3) {
 	printk("%s: can't find S3 Trio board\n", dp->full_name);
 	return;
     }
 
-    if((pp = (int *)get_property(dp, "device-id", &len)) != NULL
+    if((pp = get_property(dp, "device-id", &len)) != NULL
 	&& *pp!=PCI_DEVICE_ID_S3_TRIO) {
 	printk("%s: can't find S3 Trio board\n", dp->full_name);
 	return;
     }
 
-    if ((pp = (int *)get_property(dp, "depth", &len)) != NULL
+    if ((pp = get_property(dp, "depth", &len)) != NULL
 	&& len == sizeof(int) && *pp != 8) {
 	printk("%s: can't use depth = %d\n", dp->full_name, *pp);
 	return;
     }
-    if ((pp = (int *)get_property(dp, "width", &len)) != NULL
+    if ((pp = get_property(dp, "width", &len)) != NULL
 	&& len == sizeof(int))
 	fb_var.xres = fb_var.xres_virtual = *pp;
-    if ((pp = (int *)get_property(dp, "height", &len)) != NULL
+    if ((pp = get_property(dp, "height", &len)) != NULL
 	&& len == sizeof(int))
 	fb_var.yres = fb_var.yres_virtual = *pp;
-    if ((pp = (int *)get_property(dp, "linebytes", &len)) != NULL
+    if ((pp = get_property(dp, "linebytes", &len)) != NULL
 	&& len == sizeof(int))
 	fb_fix.line_length = *pp;
     else
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 8e3400d..0ed577e 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -413,11 +413,11 @@
 static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo)
 {
 	struct device_node *dp = rinfo->of_node;
-	u32 *val;
+	const u32 *val;
 
 	if (dp == NULL)
 		return -ENODEV;
-	val = (u32 *) get_property(dp, "ATY,RefCLK", NULL);
+	val = get_property(dp, "ATY,RefCLK", NULL);
 	if (!val || !*val) {
 		printk(KERN_WARNING "radeonfb: No ATY,RefCLK property !\n");
 		return -EINVAL;
@@ -425,11 +425,11 @@
 
 	rinfo->pll.ref_clk = (*val) / 10;
 
-	val = (u32 *) get_property(dp, "ATY,SCLK", NULL);
+	val = get_property(dp, "ATY,SCLK", NULL);
 	if (val && *val)
 		rinfo->pll.sclk = (*val) / 10;
 
-	val = (u32 *) get_property(dp, "ATY,MCLK", NULL);
+	val = get_property(dp, "ATY,MCLK", NULL);
 	if (val && *val)
 		rinfo->pll.mclk = (*val) / 10;
 
diff --git a/drivers/video/aty/radeon_monitor.c b/drivers/video/aty/radeon_monitor.c
index 98c05bc..ea531a6 100644
--- a/drivers/video/aty/radeon_monitor.c
+++ b/drivers/video/aty/radeon_monitor.c
@@ -64,13 +64,13 @@
 {
         static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID",
 				     "EDID1", "EDID2",  NULL };
-	u8 *pedid = NULL;
-	u8 *pmt = NULL;
+	const u8 *pedid = NULL;
+	const u8 *pmt = NULL;
 	u8 *tmp;
         int i, mt = MT_NONE;  
 	
 	RTRACE("analyzing OF properties...\n");
-	pmt = (u8 *)get_property(dp, "display-type", NULL);
+	pmt = get_property(dp, "display-type", NULL);
 	if (!pmt)
 		return MT_NONE;
 	RTRACE("display-type: %s\n", pmt);
@@ -89,7 +89,7 @@
 	}
 
 	for (i = 0; propnames[i] != NULL; ++i) {
-		pedid = (u8 *)get_property(dp, propnames[i], NULL);
+		pedid = get_property(dp, propnames[i], NULL);
 		if (pedid != NULL)
 			break;
 	}
@@ -124,14 +124,14 @@
 		return MT_NONE;
 
 	if (rinfo->has_CRTC2) {
-		char *pname;
+		const char *pname;
 		int len, second = 0;
 
 		dp = dp->child;
 		do {
 			if (!dp)
 				return MT_NONE;
-			pname = (char *)get_property(dp, "name", NULL);
+			pname = get_property(dp, "name", NULL);
 			if (!pname)
 				return MT_NONE;
 			len = strlen(pname);
diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c
index f31e606..e308ed2 100644
--- a/drivers/video/aty/radeon_pm.c
+++ b/drivers/video/aty/radeon_pm.c
@@ -1268,7 +1268,7 @@
 			  0x21320032, 0xa1320032, 0x21320032, 0xffffffff,
 			  0x31320032 };
 
-		u32 *mrtable = default_mrtable;
+		const u32 *mrtable = default_mrtable;
 		int i, mrtable_size = ARRAY_SIZE(default_mrtable);
 
 		mdelay(30);
@@ -1287,7 +1287,7 @@
 		if (rinfo->of_node != NULL) {
 			int size;
 
-			mrtable = (u32 *)get_property(rinfo->of_node, "ATY,MRT", &size);
+			mrtable = get_property(rinfo->of_node, "ATY,MRT", &size);
 			if (mrtable)
 				mrtable_size = size >> 2;
 			else
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 390439b..1b4f75d 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -3197,11 +3197,11 @@
 		return;
 
 #ifdef CONFIG_ATARI
-	free_irq(IRQ_AUTO_4, fbcon_vbl_handler);
+	free_irq(IRQ_AUTO_4, fb_vbl_handler);
 #endif
 #ifdef CONFIG_MAC
 	if (MACH_IS_MAC && vbl_detected)
-		free_irq(IRQ_MAC_VBL, fbcon_vbl_handler);
+		free_irq(IRQ_MAC_VBL, fb_vbl_handler);
 #endif
 
 	kfree((void *)softback_buf);
diff --git a/drivers/video/nvidia/nv_of.c b/drivers/video/nvidia/nv_of.c
index 8209106..d9af88c 100644
--- a/drivers/video/nvidia/nv_of.c
+++ b/drivers/video/nvidia/nv_of.c
@@ -32,7 +32,7 @@
 {
 	struct nvidia_par *par = info->par;
 	struct device_node *parent, *dp;
-	unsigned char *pedid = NULL;
+	const unsigned char *pedid = NULL;
 	static char *propnames[] = {
 		"DFP,EDID", "LCD,EDID", "EDID", "EDID1",
 		"EDID,B", "EDID,A", NULL };
@@ -42,20 +42,19 @@
 	if (parent == NULL)
 		return -1;
 	if (par->twoHeads) {
-		char *pname;
+		const char *pname;
 		int len;
 
 		for (dp = NULL;
 		     (dp = of_get_next_child(parent, dp)) != NULL;) {
-			pname = (char *)get_property(dp, "name", NULL);
+			pname = get_property(dp, "name", NULL);
 			if (!pname)
 				continue;
 			len = strlen(pname);
 			if ((pname[len-1] == 'A' && conn == 1) ||
 			    (pname[len-1] == 'B' && conn == 2)) {
 				for (i = 0; propnames[i] != NULL; ++i) {
-					pedid = (unsigned char *)
-						get_property(dp, propnames[i],
+					pedid = get_property(dp, propnames[i],
 							     NULL);
 					if (pedid != NULL)
 						break;
@@ -67,8 +66,7 @@
 	}
 	if (pedid == NULL) {
 		for (i = 0; propnames[i] != NULL; ++i) {
-			pedid = (unsigned char *)
-				get_property(parent, propnames[i], NULL);
+			pedid = get_property(parent, propnames[i], NULL);
 			if (pedid != NULL)
 				break;
 		}
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index 0013311..bad0e98 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -409,30 +409,30 @@
 	unsigned int flags, rsize, addr_prop = 0;
 	unsigned long max_size = 0;
 	u64 rstart, address = OF_BAD_ADDR;
-	u32 *pp, *addrp, *up;
+	const u32 *pp, *addrp, *up;
 	u64 asize;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-depth", &len);
+	pp = get_property(dp, "linux,bootx-depth", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "depth", &len);
+		pp = get_property(dp, "depth", &len);
 	if (pp && len == sizeof(u32))
 		depth = *pp;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-width", &len);
+	pp = get_property(dp, "linux,bootx-width", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "width", &len);
+		pp = get_property(dp, "width", &len);
 	if (pp && len == sizeof(u32))
 		width = *pp;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-height", &len);
+	pp = get_property(dp, "linux,bootx-height", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "height", &len);
+		pp = get_property(dp, "height", &len);
 	if (pp && len == sizeof(u32))
 		height = *pp;
 
-	pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len);
+	pp = get_property(dp, "linux,bootx-linebytes", &len);
 	if (pp == NULL)
-		pp = (u32 *)get_property(dp, "linebytes", &len);
+		pp = get_property(dp, "linebytes", &len);
 	if (pp && len == sizeof(u32))
 		pitch = *pp;
 	else
@@ -450,9 +450,9 @@
 	 * ranges and pick one that is both big enough and if possible encloses
 	 * the "address" property. If none match, we pick the biggest
 	 */
-	up = (u32 *)get_property(dp, "linux,bootx-addr", &len);
+	up = get_property(dp, "linux,bootx-addr", &len);
 	if (up == NULL)
-		up = (u32 *)get_property(dp, "address", &len);
+		up = get_property(dp, "address", &len);
 	if (up && len == sizeof(u32))
 		addr_prop = *up;
 
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index 8ddb47a..4acde4f 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -1826,8 +1826,8 @@
 {
 	struct riva_par *par = info->par;
 	struct device_node *dp;
-	unsigned char *pedid = NULL;
-	unsigned char *disptype = NULL;
+	const unsigned char *pedid = NULL;
+	const unsigned char *disptype = NULL;
 	static char *propnames[] = {
 		"DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL };
 	int i;
@@ -1835,14 +1835,13 @@
 	NVTRACE_ENTER();
 	dp = pci_device_to_OF_node(pd);
 	for (; dp != NULL; dp = dp->child) {
-		disptype = (unsigned char *)get_property(dp, "display-type", NULL);
+		disptype = get_property(dp, "display-type", NULL);
 		if (disptype == NULL)
 			continue;
 		if (strncmp(disptype, "LCD", 3) != 0)
 			continue;
 		for (i = 0; propnames[i] != NULL; ++i) {
-			pedid = (unsigned char *)
-				get_property(dp, propnames[i], NULL);
+			pedid = get_property(dp, propnames[i], NULL);
 			if (pedid != NULL) {
 				par->EDID = pedid;
 				NVTRACE("LCD found.\n");
diff --git a/fs/Kconfig b/fs/Kconfig
index 3f00a9f..a270026 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -325,8 +325,8 @@
 source "fs/xfs/Kconfig"
 
 config OCFS2_FS
-	tristate "OCFS2 file system support (EXPERIMENTAL)"
-	depends on NET && SYSFS && EXPERIMENTAL
+	tristate "OCFS2 file system support"
+	depends on NET && SYSFS
 	select CONFIGFS_FS
 	select JBD
 	select CRC32
@@ -1471,8 +1471,8 @@
 	  If unsure, say N.
 
 config NFS_DIRECTIO
-	bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
-	depends on NFS_FS && EXPERIMENTAL
+	bool "Allow direct I/O on NFS files"
+	depends on NFS_FS
 	help
 	  This option enables applications to perform uncached I/O on files
 	  in NFS file systems using the O_DIRECT open() flag.  When O_DIRECT
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 0feb3bd..1eb9a2e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
+Version 1.46
+------------
+Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+
 Version 1.45
 ------------
 Do not time out lockw calls when using posix extensions. Do not
@@ -6,7 +10,8 @@
 (lock cancel now works, and unlock of merged range works even
 to Windows servers now).  Fix oops on mount to lanman servers
 (win9x, os/2 etc.) when null password.  Do not send listxattr
-(SMB to query all EAs) if nouser_xattr specified.
+(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
 
 Version 1.44
 ------------
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ad58eb0..fd1e52e 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -40,5 +40,7 @@
 	mode_t	mnt_file_mode;
 	mode_t	mnt_dir_mode;
 	int     mnt_cifs_flags;
+	int	prepathlen;
+	char *  prepath;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3cd7500..c3ef1c0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -189,7 +189,6 @@
 	buf->f_files = 0;	/* undefined */
 	buf->f_ffree = 0;	/* unlimited */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* BB we could add a second check for a QFS Unix capability bit */
 /* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
     if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
@@ -199,7 +198,6 @@
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-#endif /* CIFS_EXPERIMENTAL */
 	rc = CIFSSMBQFSInfo(xid, pTcon, buf);
 
 	/* Old Windows servers do not support level 103, retry with level 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 39ee8ef..bea875d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.45"
+#define CIFS_VERSION   "1.46"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 8623902..81df2bf 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1344,6 +1344,7 @@
 #define SMB_QUERY_ATTR_FLAGS            0x206  /* append,immutable etc. */
 #define SMB_QUERY_POSIX_PERMISSION      0x207
 #define SMB_QUERY_POSIX_LOCK            0x208
+/* #define SMB_POSIX_OPEN  		0x209 */
 #define SMB_QUERY_FILE_INTERNAL_INFO    0x3ee
 #define SMB_QUERY_FILE_ACCESS_INFO      0x3f0
 #define SMB_QUERY_FILE_NAME_INFO2       0x3f1 /* 0x30 bytes */
@@ -1363,6 +1364,7 @@
 #define SMB_SET_XATTR                   0x205
 #define SMB_SET_ATTR_FLAGS              0x206  /* append, immutable etc. */
 #define SMB_SET_POSIX_LOCK              0x208
+#define SMB_POSIX_OPEN                  0x209
 #define SMB_SET_FILE_BASIC_INFO2        0x3ec
 #define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
 #define SMB_FILE_ALL_INFO2              0x3fa
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5d394c7..0e9ba0b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -89,6 +89,7 @@
 	unsigned int wsize;
 	unsigned int sockopt;
 	unsigned short int port;
+	char * prepath;
 };
 
 static int ipv4_connect(struct sockaddr_in *psin_server, 
@@ -993,6 +994,28 @@
 				printk(KERN_WARNING "CIFS: domain name too long\n");
 				return 1;
 			}
+                } else if (strnicmp(data, "prefixpath", 10) == 0) {
+                        if (!value || !*value) {
+                                printk(KERN_WARNING
+                                       "CIFS: invalid path prefix\n");
+                                return 1;       /* needs_arg; */
+                        }
+                        if ((temp_len = strnlen(value, 1024)) < 1024) {
+				if(value[0] != '/')
+					temp_len++;  /* missing leading slash */
+                                vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
+                                if(vol->prepath == NULL)
+                                        return 1;
+				if(value[0] != '/') {
+					vol->prepath[0] = '/';
+	                                strcpy(vol->prepath+1,value);
+				} else
+					strcpy(vol->prepath,value);
+				cFYI(1,("prefix path %s",vol->prepath));
+                        } else {
+                                printk(KERN_WARNING "CIFS: prefix too long\n");
+                                return 1;
+                        }
 		} else if (strnicmp(data, "iocharset", 9) == 0) {
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid iocharset specified\n");
@@ -1605,6 +1628,7 @@
 	if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1619,6 +1643,7 @@
            locations such as env variables and files on disk */
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1639,6 +1664,7 @@
 			/* we failed translating address */
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -EINVAL;
 		}
@@ -1651,6 +1677,7 @@
 		cERROR(1,("Connecting to DFS root not implemented yet"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	} else /* which servers DFS root would we conect to */ {
@@ -1658,6 +1685,7 @@
 		       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1672,6 +1700,7 @@
 			cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset));
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -ELIBACC;
 		}
@@ -1688,6 +1717,7 @@
 	else {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1710,6 +1740,7 @@
 				sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		}
@@ -1720,6 +1751,7 @@
 			sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		} else {
@@ -1744,6 +1776,7 @@
 				sock_release(csocket);
 				kfree(volume_info.UNC);
 				kfree(volume_info.password);
+				kfree(volume_info.prepath);
 				FreeXid(xid);
 				return rc;
 			}
@@ -1831,6 +1864,14 @@
 			/* Windows ME may prefer this */
 			cFYI(1,("readsize set to minimum 2048"));
 		}
+		/* calculate prepath */
+		cifs_sb->prepath = volume_info.prepath;
+		if(cifs_sb->prepath) {
+			cifs_sb->prepathlen = strlen(cifs_sb->prepath);
+			cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
+			volume_info.prepath = NULL;
+		} else 
+			cifs_sb->prepathlen = 0;
 		cifs_sb->mnt_uid = volume_info.linux_uid;
 		cifs_sb->mnt_gid = volume_info.linux_gid;
 		cifs_sb->mnt_file_mode = volume_info.file_mode;
@@ -2008,6 +2049,7 @@
 	the password ptr is put in the new session structure (in which case the
 	password will be freed at unmount time) */
 	kfree(volume_info.UNC);
+	kfree(volume_info.prepath);
 	FreeXid(xid);
 	return rc;
 }
@@ -3195,6 +3237,7 @@
 	int xid;
 	struct cifsSesInfo *ses = NULL;
 	struct task_struct *cifsd_task;
+	char * tmp;
 
 	xid = GetXid();
 
@@ -3228,6 +3271,10 @@
 	}
 	
 	cifs_sb->tcon = NULL;
+	tmp = cifs_sb->prepath;
+	cifs_sb->prepathlen = 0;
+	cifs_sb->prepath = NULL;
+	kfree(tmp);
 	if (ses)
 		schedule_timeout_interruptible(msecs_to_jiffies(500));
 	if (ses)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 914239d5..66b825a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -46,7 +46,8 @@
 build_path_from_dentry(struct dentry *direntry)
 {
 	struct dentry *temp;
-	int namelen = 0;
+	int namelen;
+	int pplen;
 	char *full_path;
 	char dirsep;
 
@@ -56,7 +57,9 @@
 		when the server crashed */
 
 	dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
+	pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
+	namelen = pplen; 
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
@@ -70,7 +73,6 @@
 	if(full_path == NULL)
 		return full_path;
 	full_path[namelen] = 0;	/* trailing null */
-
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen -= 1 + temp->d_name.len;
 		if (namelen < 0) {
@@ -79,7 +81,7 @@
 			full_path[namelen] = dirsep;
 			strncpy(full_path + namelen + 1, temp->d_name.name,
 				temp->d_name.len);
-			cFYI(0, (" name: %s ", full_path + namelen));
+			cFYI(0, ("name: %s", full_path + namelen));
 		}
 		temp = temp->d_parent;
 		if(temp == NULL) {
@@ -88,18 +90,23 @@
 			return NULL;
 		}
 	}
-	if (namelen != 0) {
+	if (namelen != pplen) {
 		cERROR(1,
-		       ("We did not end path lookup where we expected namelen is %d",
+		       ("did not end path lookup where expected namelen is %d",
 			namelen));
-		/* presumably this is only possible if we were racing with a rename 
+		/* presumably this is only possible if racing with a rename 
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
 		kfree(full_path);
-		namelen = 0;
 		goto cifs_bp_rename_retry;
 	}
-
+	/* DIR_SEP already set for byte  0 / vs \ but not for
+	   subsequent slashes in prepath which currently must
+	   be entered the right way - not sure if there is an alternative
+	   since the '\' is a valid posix character so we can not switch
+	   those safely to '/' if any are found in the middle of the prepath */
+	/* BB test paths to Windows with '/' in the midst of prepath */
+	strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
 	return full_path;
 }
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9c5ba9..ddb012a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -752,6 +752,7 @@
 			int stored_rc = 0;
 			struct cifsLockInfo *li, *tmp;
 
+			rc = 0;
 			down(&fid->lock_sem);
 			list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
 				if (pfLock->fl_start <= li->offset &&
@@ -766,7 +767,7 @@
 					kfree(li);
 				}
 			}
-		up(&fid->lock_sem);
+			up(&fid->lock_sem);
 		}
 	}
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 067648b..18fcec1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -269,7 +269,7 @@
 				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
 					ea_value, buf_size,
 					ACL_TYPE_ACCESS);
-				CIFSSMBClose(xid, pTcon, fid)
+				CIFSSMBClose(xid, pTcon, fid);
 			}
 		} */  /* BB enable after fixing up return data */
                   		
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df02545..816e8ef 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@
 	return sd;
 }
 
+/*
+ *
+ * Return -EEXIST if there is already a configfs element with the same
+ * name for the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int configfs_dirent_exists(struct configfs_dirent *parent_sd,
+			   const unsigned char *new)
+{
+	struct configfs_dirent * sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_element) {
+			const unsigned char *existing = configfs_get_name(sd);
+			if (strcmp(existing, new))
+				continue;
+			else
+				return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+
 int configfs_make_dirent(struct configfs_dirent * parent_sd,
 			 struct dentry * dentry, void * element,
 			 umode_t mode, int type)
@@ -136,8 +162,10 @@
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
-	error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-				     CONFIGFS_DIR);
+	error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
+	if (!error)
+		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+					     CONFIGFS_DIR);
 	if (!error) {
 		error = configfs_create(d, mode, init_dir);
 		if (!error) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a3..17b392a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@
  * (or otherwise set) by the caller to indicate that it is now
  * in use by the dcache.
  */
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+					     struct inode *inode)
 {
 	struct dentry *alias;
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
 
-	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
-	if (!inode)
-		goto do_negative;
+	if (!inode) {
+		entry->d_inode = NULL;
+		return NULL;
+	}
+
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct qstr *qstr = &alias->d_name;
 
@@ -851,19 +853,35 @@
 		if (memcmp(qstr->name, name, len))
 			continue;
 		dget_locked(alias);
-		spin_unlock(&dcache_lock);
-		BUG_ON(!d_unhashed(alias));
-		iput(inode);
 		return alias;
 	}
+
 	list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
 	entry->d_inode = inode;
 	fsnotify_d_instantiate(entry, inode);
-	spin_unlock(&dcache_lock);
-	security_d_instantiate(entry, inode);
 	return NULL;
 }
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *result;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+
+	spin_lock(&dcache_lock);
+	result = __d_instantiate_unique(entry, inode);
+	spin_unlock(&dcache_lock);
+
+	if (!result) {
+		security_d_instantiate(entry, inode);
+		return NULL;
+	}
+
+	BUG_ON(!d_unhashed(result));
+	iput(inode);
+	return result;
+}
+
 EXPORT_SYMBOL(d_instantiate_unique);
 
 /**
@@ -1235,6 +1253,11 @@
  	hlist_add_head_rcu(&entry->d_hash, list);
 }
 
+static void _d_rehash(struct dentry * entry)
+{
+	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
 /**
  * d_rehash	- add an entry back to the hash
  * @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@
  
 void d_rehash(struct dentry * entry)
 {
-	struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
-	__d_rehash(entry, list);
+	_d_rehash(entry);
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -1386,6 +1407,120 @@
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+	struct dentry *dparent, *aparent;
+
+	switch_names(dentry, anon);
+	do_switch(dentry->d_name.len, anon->d_name.len);
+	do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+	dparent = dentry->d_parent;
+	aparent = anon->d_parent;
+
+	dentry->d_parent = (aparent == anon) ? dentry : aparent;
+	list_del(&dentry->d_u.d_child);
+	if (!IS_ROOT(dentry))
+		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+	anon->d_parent = (dparent == dentry) ? anon : dparent;
+	list_del(&anon->d_u.d_child);
+	if (!IS_ROOT(anon))
+		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&anon->d_u.d_child);
+
+	anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *alias, *actual;
+
+	BUG_ON(!d_unhashed(dentry));
+
+	spin_lock(&dcache_lock);
+
+	if (!inode) {
+		actual = dentry;
+		dentry->d_inode = NULL;
+		goto found_lock;
+	}
+
+	/* See if a disconnected directory already exists as an anonymous root
+	 * that we should splice into the tree instead */
+	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
+		spin_lock(&alias->d_lock);
+
+		/* Is this a mountpoint that we could splice into our tree? */
+		if (IS_ROOT(alias))
+			goto connect_mountpoint;
+
+		if (alias->d_name.len == dentry->d_name.len &&
+		    alias->d_parent == dentry->d_parent &&
+		    memcmp(alias->d_name.name,
+			   dentry->d_name.name,
+			   dentry->d_name.len) == 0)
+			goto replace_with_alias;
+
+		spin_unlock(&alias->d_lock);
+
+		/* Doh! Seem to be aliasing directories for some reason... */
+		dput(alias);
+	}
+
+	/* Add a unique reference */
+	actual = __d_instantiate_unique(dentry, inode);
+	if (!actual)
+		actual = dentry;
+	else if (unlikely(!d_unhashed(actual)))
+		goto shouldnt_be_hashed;
+
+found_lock:
+	spin_lock(&actual->d_lock);
+found:
+	_d_rehash(actual);
+	spin_unlock(&actual->d_lock);
+	spin_unlock(&dcache_lock);
+
+	if (actual == dentry) {
+		security_d_instantiate(dentry, inode);
+		return NULL;
+	}
+
+	iput(inode);
+	return actual;
+
+	/* Convert the anonymous/root alias into an ordinary dentry */
+connect_mountpoint:
+	__d_materialise_dentry(dentry, alias);
+
+	/* Replace the candidate dentry with the alias in the tree */
+replace_with_alias:
+	__d_drop(alias);
+	actual = alias;
+	goto found;
+
+shouldnt_be_hashed:
+	spin_unlock(&dcache_lock);
+	BUG();
+	goto shouldnt_be_hashed;
+}
+
 /**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
@@ -1784,6 +1919,7 @@
 EXPORT_SYMBOL(d_invalidate);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index d487043..b1981d0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -539,7 +539,6 @@
 
 #endif  /*  EXT2FS_DEBUG  */
 
-/* Superblock must be locked */
 unsigned long ext2_count_free_blocks (struct super_block * sb)
 {
 	struct ext2_group_desc * desc;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index de85c61..695f69c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -637,7 +637,6 @@
 	return ERR_PTR(err);
 }
 
-/* Superblock must be locked */
 unsigned long ext2_count_free_inodes (struct super_block * sb)
 {
 	struct ext2_group_desc *desc;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ca5bfb6..4286ff6 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1083,7 +1083,6 @@
 	unsigned long overhead;
 	int i;
 
-	lock_super(sb);
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
 	else {
@@ -1124,7 +1123,6 @@
 	buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
 	buf->f_ffree = ext2_count_free_inodes (sb);
 	buf->f_namelen = EXT2_NAME_LEN;
-	unlock_super(sb);
 	return 0;
 }
 
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e..3a56607 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@
 
 	uint16_t flags;
 	uint8_t usercompr;
-#if !defined (__ECOS)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
 	struct inode vfs_inode;
-#endif
-#endif
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 	struct posix_acl *i_acl_access;
 	struct posix_acl *i_acl_default;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 7675b33..5a6b4d6 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -21,6 +21,9 @@
 #include <linux/pagemap.h>
 #include "nodelist.h"
 
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+				     struct jffs2_node_frag *this);
+
 void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list)
 {
 	struct jffs2_full_dirent **prev = list;
@@ -87,7 +90,8 @@
 	}
 }
 
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this)
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+				     struct jffs2_node_frag *this)
 {
 	if (this->node) {
 		this->node->frags--;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index cae92c1..0ddfd70 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -334,7 +334,6 @@
 struct rb_node *rb_next(struct rb_node *);
 struct rb_node *rb_prev(struct rb_node *);
 void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
 void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
 int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 25bc1ae..4da09ce 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -1215,7 +1215,6 @@
 	rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
 	if (rc) {
 		JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
-		rc = rc ? rc : -EBADFD;
 		goto out;
 	}
 	rc = save_xattr_datum(c, xd);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df..50dbb67 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -151,11 +151,13 @@
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
+	struct rpc_clnt		*client = NFS_CLIENT(inode);
+	struct sockaddr_in	addr;
 	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
-	int			status, proto, vers;
+	int			status, vers;
 
 	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
 	if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@
 		return -ENOLCK;
 	}
 
-	/* Retrieve transport protocol from NFS client */
-	proto = NFS_CLIENT(inode)->cl_xprt->prot;
-
-	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
+	host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
 	if (host == NULL)
 		return -ENOLCK;
 
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a..703fb03 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
 #define NLM_HOST_REBIND		(60 * HZ)
 #define NLM_HOST_EXPIRE		((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
 #define NLM_HOST_COLLECT	((nrhosts > NLM_HOST_MAX)? 120 * HZ :  60 * HZ)
-#define NLM_HOST_ADDR(sv)	(&(sv)->s_nlmclnt->cl_xprt->addr)
 
 static struct nlm_host *	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
@@ -167,7 +166,6 @@
 nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
-	struct rpc_xprt	*xprt;
 
 	dprintk("lockd: nlm_bind_host(%08x)\n",
 			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -179,7 +177,6 @@
 	 * RPC rebind is required
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
-		xprt = clnt->cl_xprt;
 		if (time_after_eq(jiffies, host->h_nextrebind)) {
 			rpc_force_rebind(clnt);
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -187,31 +184,37 @@
 					host->h_nextrebind - jiffies);
 		}
 	} else {
-		xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL);
-		if (IS_ERR(xprt))
-			goto forgetit;
+		unsigned long increment = nlmsvc_timeout * HZ;
+		struct rpc_timeout timeparms = {
+			.to_initval	= increment,
+			.to_increment	= increment,
+			.to_maxval	= increment * 6UL,
+			.to_retries	= 5U,
+		};
+		struct rpc_create_args args = {
+			.protocol	= host->h_proto,
+			.address	= (struct sockaddr *)&host->h_addr,
+			.addrsize	= sizeof(host->h_addr),
+			.timeout	= &timeparms,
+			.servername	= host->h_name,
+			.program	= &nlm_program,
+			.version	= host->h_version,
+			.authflavor	= RPC_AUTH_UNIX,
+			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
+					   RPC_CLNT_CREATE_AUTOBIND),
+		};
 
-		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
-		xprt->resvport = 1;	/* NLM requires a reserved port */
-
-		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
-					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt))
-			goto forgetit;
-		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		clnt->cl_softrtry = 1; /* All queries are soft */
-
-		host->h_rpcclnt = clnt;
+		clnt = rpc_create(&args);
+		if (!IS_ERR(clnt))
+			host->h_rpcclnt = clnt;
+		else {
+			printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
+			clnt = NULL;
+		}
 	}
 
 	mutex_unlock(&host->h_mutex);
 	return clnt;
-
-forgetit:
-	printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
-	mutex_unlock(&host->h_mutex);
-	return NULL;
 }
 
 /*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f..5954dcb 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@
 static struct rpc_clnt *
 nsm_create(void)
 {
-	struct rpc_xprt		*xprt;
-	struct rpc_clnt		*clnt;
-	struct sockaddr_in	sin;
+	struct sockaddr_in	sin = {
+		.sin_family	= AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		.sin_port	= 0,
+	};
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_UDP,
+		.address	= (struct sockaddr *)&sin,
+		.addrsize	= sizeof(sin),
+		.servername	= "localhost",
+		.program	= &nsm_program,
+		.version	= SM_VERSION,
+		.authflavor	= RPC_AUTH_NULL,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT),
+	};
 
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	sin.sin_port = 0;
-
-	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
-
-	clnt = rpc_create_client(xprt, "localhost",
-				&nsm_program, SM_VERSION,
-				RPC_AUTH_NULL);
-	if (IS_ERR(clnt))
-		goto out_err;
-	clnt->cl_softrtry = 1;
-	clnt->cl_oneshot  = 1;
-	return clnt;
-
-out_err:
-	return clnt;
+	return rpc_create(&args);
 }
 
 /*
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0..f4580b4 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
-			   proc.o read.o symlink.o unlink.o write.o \
-			   namespace.o
+nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
+			   pagelist.o proc.o read.o symlink.o unlink.o \
+			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8..a3ee113 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
 
 #include "nfs4_fs.h"
 #include "callback.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
@@ -36,6 +37,21 @@
 
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
 
 /*
  * This is the callback kernel thread.
@@ -134,10 +150,8 @@
 /*
  * Kill the server process if it is not already up.
  */
-int nfs_callback_down(void)
+void nfs_callback_down(void)
 {
-	int ret = 0;
-
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
@@ -149,20 +163,19 @@
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
-	return ret;
 }
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
-	struct nfs4_client *clp;
+	struct sockaddr_in *addr = &rqstp->rq_addr;
+	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
-	clp = nfs4_find_client(addr);
+	clp = nfs_find_client(addr, 4);
 	if (clp == NULL)
 		return SVC_DROP;
-	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
-	nfs4_put_client(clp);
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+	nfs_put_client(clp);
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 			if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7f..5676163d 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@
 extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
+#ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(void);
-extern int nfs_callback_down(void);
+extern void nfs_callback_down(void);
+#else
+#define nfs_callback_up()	(0)
+#define nfs_callback_down()	do {} while(0)
+#endif
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483..97cf8f7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,19 +10,20 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
  
 unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
 	
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@
 	up_read(&nfsi->rwsem);
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
 	return res->status;
@@ -56,12 +57,12 @@
 
 unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct inode *inode;
 	unsigned res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@
 	}
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 0000000..ec1938d
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,1448 @@
+/* client.c: NFS client sharing and management code
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
+
+#include <asm/system.h>
+
+#include "nfs4_fs.h"
+#include "callback.h"
+#include "delegation.h"
+#include "iostat.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+
+static DEFINE_SPINLOCK(nfs_client_lock);
+static LIST_HEAD(nfs_client_list);
+static LIST_HEAD(nfs_volume_list);
+static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_version *nfs_version[5] = {
+	[2]			= &nfs_version2,
+#ifdef CONFIG_NFS_V3
+	[3]			= &nfs_version3,
+#endif
+#ifdef CONFIG_NFS_V4
+	[4]			= &nfs_version4,
+#endif
+};
+
+struct rpc_program nfs_program = {
+	.name			= "nfs",
+	.number			= NFS_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfs_version),
+	.version		= nfs_version,
+	.stats			= &nfs_rpcstat,
+	.pipe_dir_name		= "/nfs",
+};
+
+struct rpc_stat nfs_rpcstat = {
+	.program		= &nfs_program
+};
+
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name			= "nfsacl",
+	.number			= NFS_ACL_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfsacl_version),
+	.version		= nfsacl_version,
+	.stats			= &nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
+/*
+ * Allocate a shared client record
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs_client *nfs_alloc_client(const char *hostname,
+					   const struct sockaddr_in *addr,
+					   int nfsversion)
+{
+	struct nfs_client *clp;
+	int error;
+
+	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+		goto error_0;
+
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
+		goto error_1;
+	}
+	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+
+	if (nfsversion == 4) {
+		if (nfs_callback_up() < 0)
+			goto error_2;
+		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	}
+
+	atomic_set(&clp->cl_count, 1);
+	clp->cl_cons_state = NFS_CS_INITING;
+
+	clp->cl_nfsversion = nfsversion;
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+
+	if (hostname) {
+		clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+		if (!clp->cl_hostname)
+			goto error_3;
+	}
+
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_NFS_V4
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+#endif
+
+	return clp;
+
+error_3:
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
+error_2:
+	rpciod_down();
+	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+error_1:
+	kfree(clp);
+error_0:
+	return NULL;
+}
+
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+		nfs4_kill_renewd(clp);
+	while (!list_empty(&clp->cl_unused)) {
+		struct nfs4_state_owner *sp;
+
+		sp = list_entry(clp->cl_unused.next,
+				struct nfs4_state_owner,
+				so_list);
+		list_del(&sp->so_list);
+		kfree(sp);
+	}
+	BUG_ON(!list_empty(&clp->cl_state_owners));
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+		nfs_idmap_delete(clp);
+#endif
+}
+
+/*
+ * Destroy a shared client record
+ */
+static void nfs_free_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+
+	nfs4_shutdown_client(clp);
+
+	/* -EIO all pending I/O */
+	if (!IS_ERR(clp->cl_rpcclient))
+		rpc_shutdown_client(clp->cl_rpcclient);
+
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
+
+	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+		rpciod_down();
+
+	kfree(clp->cl_hostname);
+	kfree(clp);
+
+	dprintk("<-- nfs_free_client()\n");
+}
+
+/*
+ * Release a reference to a shared client record
+ */
+void nfs_put_client(struct nfs_client *clp)
+{
+	if (!clp)
+		return;
+
+	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+
+	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+		list_del(&clp->cl_share_link);
+		spin_unlock(&nfs_client_lock);
+
+		BUG_ON(!list_empty(&clp->cl_superblocks));
+
+		nfs_free_client(clp);
+	}
+}
+
+/*
+ * Find a client by address
+ * - caller must hold nfs_client_lock
+ */
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->cl_nfsversion != nfsversion)
+			continue;
+
+		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
+			   sizeof(clp->cl_addr.sin_addr)) != 0)
+			continue;
+
+		if (clp->cl_addr.sin_port == addr->sin_port)
+			goto found;
+	}
+
+	return NULL;
+
+found:
+	atomic_inc(&clp->cl_count);
+	return clp;
+}
+
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = __nfs_find_client(addr, nfsversion);
+	spin_unlock(&nfs_client_lock);
+
+	BUG_ON(clp && clp->cl_cons_state == 0);
+
+	return clp;
+}
+
+/*
+ * Look up a client by IP address and protocol version
+ * - creates a new record if one doesn't yet exist
+ */
+static struct nfs_client *nfs_get_client(const char *hostname,
+					 const struct sockaddr_in *addr,
+					 int nfsversion)
+{
+	struct nfs_client *clp, *new = NULL;
+	int error;
+
+	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+		hostname ?: "", NIPQUAD(addr->sin_addr),
+		addr->sin_port, nfsversion);
+
+	/* see if the client already exists */
+	do {
+		spin_lock(&nfs_client_lock);
+
+		clp = __nfs_find_client(addr, nfsversion);
+		if (clp)
+			goto found_client;
+		if (new)
+			goto install_client;
+
+		spin_unlock(&nfs_client_lock);
+
+		new = nfs_alloc_client(hostname, addr, nfsversion);
+	} while (new);
+
+	return ERR_PTR(-ENOMEM);
+
+	/* install a new client and return with it unready */
+install_client:
+	clp = new;
+	list_add(&clp->cl_share_link, &nfs_client_list);
+	spin_unlock(&nfs_client_lock);
+	dprintk("--> nfs_get_client() = %p [new]\n", clp);
+	return clp;
+
+	/* found an existing client
+	 * - make sure it's ready before returning
+	 */
+found_client:
+	spin_unlock(&nfs_client_lock);
+
+	if (new)
+		nfs_free_client(new);
+
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		DECLARE_WAITQUEUE(myself, current);
+
+		add_wait_queue(&nfs_client_active_wq, &myself);
+
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (signal_pending(current) ||
+			    clp->cl_cons_state > NFS_CS_READY)
+				break;
+			schedule();
+		}
+
+		remove_wait_queue(&nfs_client_active_wq, &myself);
+
+		if (signal_pending(current)) {
+			nfs_put_client(clp);
+			return ERR_PTR(-ERESTARTSYS);
+		}
+	}
+
+	if (clp->cl_cons_state < NFS_CS_READY) {
+		error = clp->cl_cons_state;
+		nfs_put_client(clp);
+		return ERR_PTR(error);
+	}
+
+	BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
+	dprintk("--> nfs_get_client() = %p [share]\n", clp);
+	return clp;
+}
+
+/*
+ * Mark a server as ready or failed
+ */
+static void nfs_mark_client_ready(struct nfs_client *clp, int state)
+{
+	clp->cl_cons_state = state;
+	wake_up_all(&nfs_client_active_wq);
+}
+
+/*
+ * Initialise the timeout values for a connection
+ */
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+				    unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
+/*
+ * Create an RPC client handle
+ */
+static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+						unsigned int timeo,
+						unsigned int retrans,
+						rpc_authflavor_t flavor)
+{
+	struct rpc_timeout	timeparms;
+	struct rpc_clnt		*clnt = NULL;
+	struct rpc_create_args args = {
+		.protocol	= proto,
+		.address	= (struct sockaddr *)&clp->cl_addr,
+		.addrsize	= sizeof(clp->cl_addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_hostname,
+		.program	= &nfs_program,
+		.version	= clp->rpc_ops->version,
+		.authflavor	= flavor,
+	};
+
+	if (!IS_ERR(clp->cl_rpcclient))
+		return 0;
+
+	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
+	clp->retrans_timeo = timeparms.to_initval;
+	clp->retrans_count = timeparms.to_retries;
+
+	clnt = rpc_create(&args);
+	if (IS_ERR(clnt)) {
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(clnt));
+		return PTR_ERR(clnt);
+	}
+
+	clp->cl_rpcclient = clnt;
+	return 0;
+}
+
+/*
+ * Version 2 or 3 client destruction
+ */
+static void nfs_destroy_server(struct nfs_server *server)
+{
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
+
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_down();	/* release rpc.lockd */
+}
+
+/*
+ * Version 2 or 3 lockd setup
+ */
+static int nfs_start_lockd(struct nfs_server *server)
+{
+	int error = 0;
+
+	if (server->nfs_client->cl_nfsversion > 3)
+		goto out;
+	if (server->flags & NFS_MOUNT_NONLM)
+		goto out;
+	error = lockd_up();
+	if (error < 0)
+		server->flags |= NFS_MOUNT_NONLM;
+	else
+		server->destroy = nfs_destroy_server;
+out:
+	return error;
+}
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+#ifdef CONFIG_NFS_V3_ACL
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	if (server->nfs_client->cl_nfsversion != 3)
+		goto out_noacl;
+	if (server->flags & NFS_MOUNT_NOACL)
+		goto out_noacl;
+
+	server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+	if (IS_ERR(server->client_acl))
+		goto out_noacl;
+
+	/* No errors! Assume that Sun nfsacls are supported */
+	server->caps |= NFS_CAP_ACLS;
+	return;
+
+out_noacl:
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	server->flags &= ~NFS_MOUNT_NOACL;
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+/*
+ * Create a general RPC client
+ */
+static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	server->client = rpc_clone_client(clp->cl_rpcclient);
+	if (IS_ERR(server->client)) {
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		return PTR_ERR(server->client);
+	}
+
+	if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(pseudoflavour, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
+		}
+	}
+	server->client->cl_softrtry = 0;
+	if (server->flags & NFS_MOUNT_SOFT)
+		server->client->cl_softrtry = 1;
+
+	server->client->cl_intr = 0;
+	if (server->flags & NFS4_MOUNT_INTR)
+		server->client->cl_intr = 1;
+
+	return 0;
+}
+
+/*
+ * Initialise an NFS2 or NFS3 client
+ */
+static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+{
+	int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is already initialised */
+		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v2_clientops;
+#ifdef CONFIG_NFS_V3
+	if (clp->cl_nfsversion == 3)
+		clp->rpc_ops = &nfs_v3_clientops;
+#endif
+	/*
+	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
+	 * - RFC 2623, sec 2.3.2
+	 */
+	error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
+			RPC_AUTH_UNIX);
+	if (error < 0)
+		goto error;
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 2 or 3 client
+ */
+static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+	struct nfs_client *clp;
+	int error, nfsvers = 2;
+
+	dprintk("--> nfs_init_server()\n");
+
+#ifdef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		nfsvers = 3;
+#endif
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+	if (IS_ERR(clp)) {
+		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+		return PTR_ERR(clp);
+	}
+
+	error = nfs_init_client(clp, data);
+	if (error < 0)
+		goto error;
+
+	server->nfs_client = clp;
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	/* Start lockd here, before we might error out */
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto error;
+
+	error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+	if (error < 0)
+		goto error;
+
+	server->namelen  = data->namlen;
+	/* Create a client RPC handle for the NFSv3 ACL management interface */
+	nfs_init_server_aclclient(server);
+	if (clp->cl_nfsversion == 3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
+	return 0;
+
+error:
+	server->nfs_client = NULL;
+	nfs_put_client(clp);
+	dprintk("<-- nfs_init_server() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Load up the server record from information gained in an fsinfo record
+ */
+static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
+{
+	unsigned long max_rpc_payload;
+
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+
+	if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
+		server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+	if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
+		server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+
+	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+	if (server->rsize > max_rpc_payload)
+		server->rsize = max_rpc_payload;
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+	}
+
+	server->maxfilesize = fsinfo->maxfilesize;
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+}
+
+/*
+ * Probe filesystem information, including the FSID on v2/v3
+ */
+static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_client *clp = server->nfs_client;
+	int error;
+
+	dprintk("--> nfs_probe_fsinfo()\n");
+
+	if (clp->rpc_ops->set_capabilities != NULL) {
+		error = clp->rpc_ops->set_capabilities(server, mntfh);
+		if (error < 0)
+			goto out_error;
+	}
+
+	fsinfo.fattr = fattr;
+	nfs_fattr_init(fattr);
+	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+	if (error < 0)
+		goto out_error;
+
+	nfs_server_set_fsinfo(server, &fsinfo);
+
+	/* Get some general file system info */
+	if (server->namelen == 0) {
+		struct nfs_pathconf pathinfo;
+
+		pathinfo.fattr = fattr;
+		nfs_fattr_init(fattr);
+
+		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+			server->namelen = pathinfo.max_namelen;
+	}
+
+	dprintk("<-- nfs_probe_fsinfo() = 0\n");
+	return 0;
+
+out_error:
+	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
+	return error;
+}
+
+/*
+ * Copy useful information when duplicating a server record
+ */
+static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+{
+	target->flags = source->flags;
+	target->acregmin = source->acregmin;
+	target->acregmax = source->acregmax;
+	target->acdirmin = source->acdirmin;
+	target->acdirmax = source->acdirmax;
+	target->caps = source->caps;
+}
+
+/*
+ * Allocate and initialise a server record
+ */
+static struct nfs_server *nfs_alloc_server(void)
+{
+	struct nfs_server *server;
+
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return NULL;
+
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
+
+	/* Zero out the NFS state stuff */
+	INIT_LIST_HEAD(&server->client_link);
+	INIT_LIST_HEAD(&server->master_link);
+
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		kfree(server);
+		return NULL;
+	}
+
+	return server;
+}
+
+/*
+ * Free up a server record
+ */
+void nfs_free_server(struct nfs_server *server)
+{
+	dprintk("--> nfs_free_server()\n");
+
+	spin_lock(&nfs_client_lock);
+	list_del(&server->client_link);
+	list_del(&server->master_link);
+	spin_unlock(&nfs_client_lock);
+
+	if (server->destroy != NULL)
+		server->destroy(server);
+	if (!IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+
+	nfs_put_client(server->nfs_client);
+
+	nfs_free_iostats(server->io_stats);
+	kfree(server);
+	nfs_release_automount_timer();
+	dprintk("<-- nfs_free_server()\n");
+}
+
+/*
+ * Create a version 2 or 3 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+				     struct nfs_fh *mntfh)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr;
+	int error;
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client representation */
+	error = nfs_init_server(server, data);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+		if (error < 0) {
+			dprintk("nfs_create_server: getattr error = %d\n", -error);
+			goto error;
+		}
+	}
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	return server;
+
+error:
+	nfs_free_server(server);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_NFS_V4
+/*
+ * Initialise an NFS4 client record
+ */
+static int nfs4_init_client(struct nfs_client *clp,
+		int proto, int timeo, int retrans,
+		rpc_authflavor_t authflavour)
+{
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is initialised already */
+		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v4_clientops;
+
+	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error;
+
+	error = nfs_idmap_new(clp);
+	if (error < 0) {
+		dprintk("%s: failed to create idmapper. Error = %d\n",
+			__FUNCTION__, error);
+		goto error;
+	}
+	__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+		const char *hostname, const struct sockaddr_in *addr,
+		rpc_authflavor_t authflavour,
+		int proto, int timeo, int retrans)
+{
+	struct nfs_client *clp;
+	int error;
+
+	dprintk("--> nfs4_set_client()\n");
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(hostname, addr, 4);
+	if (IS_ERR(clp)) {
+		error = PTR_ERR(clp);
+		goto error;
+	}
+	error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error_put;
+
+	server->nfs_client = clp;
+	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+	return 0;
+
+error_put:
+	nfs_put_client(clp);
+error:
+	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+		const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+{
+	int error;
+
+	dprintk("--> nfs4_init_server()\n");
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	error = nfs_init_server_rpcclient(server, authflavour);
+
+	/* Done */
+	dprintk("<-- nfs4_init_server() = %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
+				      const char *hostname,
+				      const struct sockaddr_in *addr,
+				      const char *mntpath,
+				      const char *ip_addr,
+				      rpc_authflavor_t authflavour,
+				      struct nfs_fh *mntfh)
+{
+	struct nfs_fattr fattr;
+	struct nfs_server *server;
+	int error;
+
+	dprintk("--> nfs4_create_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client record */
+	error = nfs4_set_client(server, hostname, addr, authflavour,
+			data->proto, data->timeo, data->retrans);
+	if (error < 0)
+		goto error;
+
+	/* set up the general RPC client */
+	error = nfs4_init_server(server, data, authflavour);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs4_path_walk(server, mntfh, mntpath);
+	if (error < 0)
+		goto error;
+
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+	dprintk("Mount FH: %d\n", mntfh->size);
+
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	dprintk("<-- nfs4_create_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+					       struct nfs_fh *fh)
+{
+	struct nfs_client *parent_client;
+	struct nfs_server *server, *parent_server;
+	struct nfs_fattr fattr;
+	int error;
+
+	dprintk("--> nfs4_create_referral_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	parent_server = NFS_SB(data->sb);
+	parent_client = parent_server->nfs_client;
+
+	/* Get a client representation.
+	 * Note: NFSv4 always uses TCP, */
+	error = nfs4_set_client(server, data->hostname, data->addr,
+			data->authflavor,
+			parent_server->client->cl_xprt->prot,
+			parent_client->retrans_timeo,
+			parent_client->retrans_count);
+	if (error < 0)
+		goto error;
+
+	/* Initialise the client representation from the parent server */
+	nfs_server_copy_userdata(server, parent_server);
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	error = nfs_init_server_rpcclient(server, data->authflavor);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr);
+	if (error < 0)
+		goto error;
+
+	dprintk("Referral FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Clone an NFS2, NFS3 or NFS4 server record
+ */
+struct nfs_server *nfs_clone_server(struct nfs_server *source,
+				    struct nfs_fh *fh,
+				    struct nfs_fattr *fattr)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr_fsinfo;
+	int error;
+
+	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+		(unsigned long long) fattr->fsid.major,
+		(unsigned long long) fattr->fsid.minor);
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy data from the source */
+	server->nfs_client = source->nfs_client;
+	atomic_inc(&server->nfs_client->cl_count);
+	nfs_server_copy_userdata(server, source);
+
+	server->fsid = fattr->fsid;
+
+	error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+	if (error < 0)
+		goto out_free_server;
+	if (!IS_ERR(source->client_acl))
+		nfs_init_server_aclclient(server);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
+	if (error < 0)
+		goto out_free_server;
+
+	dprintk("Cloned FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
+
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto out_free_server;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_clone_server() = %p\n", server);
+	return server;
+
+out_free_server:
+	nfs_free_server(server);
+	dprintk("<-- nfs_clone_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_fs_nfs;
+
+static int nfs_server_list_open(struct inode *inode, struct file *file);
+static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_server_list_stop(struct seq_file *p, void *v);
+static int nfs_server_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_server_list_ops = {
+	.start	= nfs_server_list_start,
+	.next	= nfs_server_list_next,
+	.stop	= nfs_server_list_stop,
+	.show	= nfs_server_list_show,
+};
+
+static struct file_operations nfs_server_list_fops = {
+	.open		= nfs_server_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int nfs_volume_list_open(struct inode *inode, struct file *file);
+static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_volume_list_stop(struct seq_file *p, void *v);
+static int nfs_volume_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_volume_list_ops = {
+	.start	= nfs_volume_list_start,
+	.next	= nfs_volume_list_next,
+	.stop	= nfs_volume_list_stop,
+	.show	= nfs_volume_list_show,
+};
+
+static struct file_operations nfs_volume_list_fops = {
+	.open		= nfs_volume_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/*
+ * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
+ * we're dealing
+ */
+static int nfs_server_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_server_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the server list and return the first item
+ */
+static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_client_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * move to next server
+ */
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_server_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_server_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_client *clp;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
+		return 0;
+	}
+
+	/* display one transport per line on subsequent lines */
+	clp = list_entry(v, struct nfs_client, cl_share_link);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   atomic_read(&clp->cl_count),
+		   clp->cl_hostname);
+
+	return 0;
+}
+
+/*
+ * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
+ */
+static int nfs_volume_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_volume_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the volume list and return the first item
+ */
+static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_volume_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * move to next volume
+ */
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_volume_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_volume_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_server *server;
+	struct nfs_client *clp;
+	char dev[8], fsid[17];
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT DEV     FSID\n");
+		return 0;
+	}
+	/* display one transport per line on subsequent lines */
+	server = list_entry(v, struct nfs_server, master_link);
+	clp = server->nfs_client;
+
+	snprintf(dev, 8, "%u:%u",
+		 MAJOR(server->s_dev), MINOR(server->s_dev));
+
+	snprintf(fsid, 17, "%llx:%llx",
+		 (unsigned long long) server->fsid.major,
+		 (unsigned long long) server->fsid.minor);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   dev,
+		   fsid);
+
+	return 0;
+}
+
+/*
+ * initialise the /proc/fs/nfsfs/ directory
+ */
+int __init nfs_fs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+	if (!proc_fs_nfs)
+		goto error_0;
+
+	proc_fs_nfs->owner = THIS_MODULE;
+
+	/* a file of servers with which we're dealing */
+	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_1;
+
+	p->proc_fops = &nfs_server_list_fops;
+	p->owner = THIS_MODULE;
+
+	/* a file of volumes that we have mounted */
+	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_2;
+
+	p->proc_fops = &nfs_volume_list_fops;
+	p->owner = THIS_MODULE;
+	return 0;
+
+error_2:
+	remove_proc_entry("servers", proc_fs_nfs);
+error_1:
+	remove_proc_entry("nfsfs", proc_root_fs);
+error_0:
+	return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/nfsfs/ directory
+ */
+void nfs_fs_proc_exit(void)
+{
+	remove_proc_entry("volumes", proc_fs_nfs);
+	remove_proc_entry("servers", proc_fs_nfs);
+	remove_proc_entry("nfsfs", proc_root_fs);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a31..5713367 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,6 +18,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
 {
@@ -52,7 +53,7 @@
 			case -NFS4ERR_EXPIRED:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 			case -NFS4ERR_STALE_CLIENTID:
-				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
 				goto out_err;
 		}
 	}
@@ -114,7 +115,7 @@
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -145,7 +146,7 @@
 					sizeof(delegation->stateid)) != 0 ||
 				delegation->type != nfsi->delegation->type) {
 			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
-					__FUNCTION__, NIPQUAD(clp->cl_addr));
+					__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
 			status = -EIO;
 		}
 	}
@@ -176,7 +177,7 @@
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +209,7 @@
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -232,7 +233,7 @@
 
 int nfs_do_expire_all_delegations(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -254,11 +255,11 @@
 	}
 out:
 	spin_unlock(&clp->cl_lock);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
 
-void nfs_expire_all_delegations(struct nfs4_client *clp)
+void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -266,17 +267,17 @@
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(nfs_do_expire_all_delegations, clp,
 			"%u.%u.%u.%u-delegreturn",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
 /*
  * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
  */
-void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	struct inode *inode;
@@ -299,7 +300,7 @@
 
 struct recall_threadargs {
 	struct inode *inode;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	const nfs4_stateid *stateid;
 
 	struct completion started;
@@ -310,7 +311,7 @@
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -371,7 +372,7 @@
 /*
  * Retrieve the inode associated with a delegation
  */
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
@@ -389,7 +390,7 @@
 /*
  * Mark all delegations as needing to be reclaimed
  */
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	spin_lock(&clp->cl_lock);
@@ -401,7 +402,7 @@
 /*
  * Reap all unclaimed delegations after reboot recovery is done
  */
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation, *n;
 	LIST_HEAD(head);
@@ -423,7 +424,7 @@
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694..2cfd4b2 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@
 int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
-void nfs_expire_all_delegations(struct nfs4_client *clp);
-void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+void nfs_expire_all_delegations(struct nfs_client *clp);
+void nfs_handle_cb_pathdown(struct nfs_client *clp);
 
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+void nfs_delegation_mark_reclaim(struct nfs_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4d..3419c2d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,7 +30,9 @@
 #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/pagevec.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -870,14 +872,14 @@
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct inode *dir,
-		struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+				 struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
 	return 0;
 }
 
@@ -902,9 +904,15 @@
 
 	lock_kernel();
 
-	/* If we're doing an exclusive create, optimize away the lookup */
-	if (nfs_is_exclusive_create(dir, nd))
-		goto no_entry;
+	/*
+	 * If we're doing an exclusive create, optimize away the lookup
+	 * but don't hash the dentry.
+	 */
+	if (nfs_is_exclusive_create(dir, nd)) {
+		d_instantiate(dentry, NULL);
+		res = NULL;
+		goto out_unlock;
+	}
 
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
@@ -913,7 +921,7 @@
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -922,8 +930,9 @@
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
 		goto out_unlock;
+
 no_entry:
-	res = d_add_unique(dentry, inode);
+	res = d_materialise_unique(dentry, inode);
 	if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
@@ -1117,11 +1126,13 @@
 		dput(dentry);
 		return NULL;
 	}
-	alias = d_add_unique(dentry, inode);
+
+	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
 		dentry = alias;
 	}
+
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	return dentry;
@@ -1143,23 +1154,22 @@
 		struct inode *dir = dentry->d_parent->d_inode;
 		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 		if (error)
-			goto out_err;
+			return error;
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
-		error = server->rpc_ops->getattr(server, fhandle, fattr);
+		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
-			goto out_err;
+			return error;
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	error = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out_err;
+		return error;
 	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry))
+		d_rehash(dentry);
 	return 0;
-out_err:
-	d_drop(dentry);
-	return error;
 }
 
 /*
@@ -1440,48 +1450,82 @@
 	return error;
 }
 
-static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+/*
+ * To create a symbolic link, most file systems instantiate a new inode,
+ * add a page to it containing the path, then write it out to the disk
+ * using prepare_write/commit_write.
+ *
+ * Unfortunately the NFS client can't create the in-core inode first
+ * because it needs a file handle to create an in-core inode (see
+ * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
+ * symlink request has completed on the server.
+ *
+ * So instead we allocate a raw page, copy the symname into it, then do
+ * the SYMLINK request with the page as the buffer.  If it succeeds, we
+ * now have a new file handle and can instantiate an in-core NFS inode
+ * and move the raw page into its mapping.
+ */
+static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
+	struct pagevec lru_pvec;
+	struct page *page;
+	char *kaddr;
 	struct iattr attr;
-	struct nfs_fattr sym_attr;
-	struct nfs_fh sym_fh;
-	struct qstr qsymname;
+	unsigned int pathlen = strlen(symname);
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name, symname);
 
-#ifdef NFS_PARANOIA
-if (dentry->d_inode)
-printk("nfs_proc_symlink: %s/%s not negative!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-	/*
-	 * Fill in the sattr for the call.
- 	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
-	 */
-	attr.ia_valid = ATTR_MODE;
-	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	if (pathlen > PAGE_SIZE)
+		return -ENAMETOOLONG;
 
-	qsymname.name = symname;
-	qsymname.len  = strlen(symname);
+	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	attr.ia_valid = ATTR_MODE;
 
 	lock_kernel();
-	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
-					  &attr, &sym_fh, &sym_attr);
-	nfs_end_data_update(dir);
-	if (!error) {
-		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
-	} else {
-		if (error == -EEXIST)
-			printk("nfs_proc_symlink: %s/%s already exists??\n",
-			       dentry->d_parent->d_name.name, dentry->d_name.name);
-		d_drop(dentry);
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		unlock_kernel();
+		return -ENOMEM;
 	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(kaddr, symname, pathlen);
+	if (pathlen < PAGE_SIZE)
+		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	nfs_begin_data_update(dir);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
+	nfs_end_data_update(dir);
+	if (error != 0) {
+		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+			dir->i_sb->s_id, dir->i_ino,
+			dentry->d_name.name, symname, error);
+		d_drop(dentry);
+		__free_page(page);
+		unlock_kernel();
+		return error;
+	}
+
+	/*
+	 * No big deal if we can't add this page to the page cache here.
+	 * READLINK will get the missing page from the server if needed.
+	 */
+	pagevec_init(&lru_pvec, 0);
+	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+							GFP_KERNEL)) {
+		if (!pagevec_add(&lru_pvec, page))
+			__pagevec_lru_add(&lru_pvec);
+		SetPageUptodate(page);
+		unlock_page(page);
+	} else
+		__free_page(page);
+
 	unlock_kernel();
-	return error;
+	return 0;
 }
 
 static int 
@@ -1638,35 +1682,211 @@
 	return error;
 }
 
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+	put_rpccred(entry->cred);
+	kfree(entry);
+	smp_mb__before_atomic_dec();
+	atomic_long_dec(&nfs_access_nr_entries);
+	smp_mb__after_atomic_dec();
+}
+
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+	LIST_HEAD(head);
+	struct nfs_inode *nfsi;
+	struct nfs_access_entry *cache;
+
+	spin_lock(&nfs_access_lru_lock);
+restart:
+	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+		struct inode *inode;
+
+		if (nr_to_scan-- == 0)
+			break;
+		inode = igrab(&nfsi->vfs_inode);
+		if (inode == NULL)
+			continue;
+		spin_lock(&inode->i_lock);
+		if (list_empty(&nfsi->access_cache_entry_lru))
+			goto remove_lru_entry;
+		cache = list_entry(nfsi->access_cache_entry_lru.next,
+				struct nfs_access_entry, lru);
+		list_move(&cache->lru, &head);
+		rb_erase(&cache->rb_node, &nfsi->access_cache);
+		if (!list_empty(&nfsi->access_cache_entry_lru))
+			list_move_tail(&nfsi->access_cache_inode_lru,
+					&nfs_access_lru_list);
+		else {
+remove_lru_entry:
+			list_del_init(&nfsi->access_cache_inode_lru);
+			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+		}
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&nfs_access_lru_lock);
+	while (!list_empty(&head)) {
+		cache = list_entry(head.next, struct nfs_access_entry, lru);
+		list_del(&cache->lru);
+		nfs_access_free_entry(cache);
+	}
+	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node *n, *dispose = NULL;
+	struct nfs_access_entry *entry;
+
+	/* Unhook entries from the cache */
+	while ((n = rb_first(root_node)) != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+		rb_erase(n, root_node);
+		list_del(&entry->lru);
+		n->rb_left = dispose;
+		dispose = n;
+	}
+	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
+
+	/* Now kill them all! */
+	while (dispose != NULL) {
+		n = dispose;
+		dispose = n->rb_left;
+		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+	}
+}
+
+void nfs_access_zap_cache(struct inode *inode)
+{
+	/* Remove from global LRU init */
+	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+		spin_unlock(&nfs_access_lru_lock);
+	}
+
+	spin_lock(&inode->i_lock);
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+}
+
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+	struct nfs_access_entry *entry;
+
+	while (n != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+
+		if (cred < entry->cred)
+			n = n->rb_left;
+		else if (cred > entry->cred)
+			n = n->rb_right;
+		else
+			return entry;
+	}
+	return NULL;
+}
+
 int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	struct nfs_access_entry *cache;
+	int err = -ENOENT;
 
-	if (cache->cred != cred
-			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
-		return -ENOENT;
-	memcpy(res, cache, sizeof(*res));
-	return 0;
+	spin_lock(&inode->i_lock);
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out_zap;
+	cache = nfs_access_search_rbtree(inode, cred);
+	if (cache == NULL)
+		goto out;
+	if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+		goto out_stale;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
+	err = 0;
+out:
+	spin_unlock(&inode->i_lock);
+	return err;
+out_stale:
+	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	list_del(&cache->lru);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(cache);
+	return -ENOENT;
+out_zap:
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+	return -ENOENT;
+}
+
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node **p = &root_node->rb_node;
+	struct rb_node *parent = NULL;
+	struct nfs_access_entry *entry;
+
+	spin_lock(&inode->i_lock);
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+		if (set->cred < entry->cred)
+			p = &parent->rb_left;
+		else if (set->cred > entry->cred)
+			p = &parent->rb_right;
+		else
+			goto found;
+	}
+	rb_link_node(&set->rb_node, parent, p);
+	rb_insert_color(&set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+	spin_unlock(&inode->i_lock);
+	return;
+found:
+	rb_replace_node(parent, &set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+	list_del(&entry->lru);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(entry);
 }
 
 void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
-
-	if (cache->cred != set->cred) {
-		if (cache->cred)
-			put_rpccred(cache->cred);
-		cache->cred = get_rpccred(set->cred);
-	}
-	/* FIXME: replace current access_cache BKL reliance with inode->i_lock */
-	spin_lock(&inode->i_lock);
-	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
-	spin_unlock(&inode->i_lock);
+	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache == NULL)
+		return;
+	RB_CLEAR_NODE(&cache->rb_node);
 	cache->jiffies = set->jiffies;
+	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
+
+	nfs_access_add_rbtree(inode, cache);
+
+	/* Update accounting */
+	smp_mb__before_atomic_inc();
+	atomic_long_inc(&nfs_access_nr_entries);
+	smp_mb__after_atomic_inc();
+
+	/* Add inode to global LRU list */
+	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+		spin_unlock(&nfs_access_lru_lock);
+	}
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e8928..be997d6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
+	res = NFS_PROTO(inode)->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -157,7 +157,7 @@
 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
-	if (origin == 2) {
+	if (origin == SEEK_END) {
 		struct inode *inode = filp->f_mapping->host;
 		int retval = nfs_revalidate_file_size(inode, filp);
 		if (retval < 0)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 0000000..76b08ae
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,311 @@
+/* getroot.c: get the root dentry for an NFS mount
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+#include <linux/security.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "nfs4_fs.h"
+#include "delegation.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+#define NFS_PARANOIA 1
+
+/*
+ * get an NFS2/NFS3 root dentry from the root filehandle
+ */
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the actual root for this mount */
+	fsinfo.fattr = &fattr;
+
+	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	security_d_instantiate(mntroot, inode);
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	return mntroot;
+}
+
+#ifdef CONFIG_NFS_V4
+
+/*
+ * Do a simple pathwalk from the root FH of the server to the nominated target
+ * of the mountpoint
+ * - give error on symlinks
+ * - give error on ".." occurring in the path
+ * - follow traversals
+ */
+int nfs4_path_walk(struct nfs_server *server,
+		   struct nfs_fh *mntfh,
+		   const char *path)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct nfs_fh lastfh;
+	struct qstr name;
+	int ret;
+	//int referral_count = 0;
+
+	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+
+	fsinfo.fattr = &fattr;
+	nfs_fattr_init(&fattr);
+
+	if (*path++ != '/') {
+		dprintk("nfs4_get_root: Path does not begin with a slash\n");
+		return -EINVAL;
+	}
+
+	/* Start by getting the root filehandle from the server */
+	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot obtained referral\n");
+		return -EREMOTE;
+	}
+
+next_component:
+	dprintk("Next: %s\n", path);
+
+	/* extract the next bit of the path */
+	if (!*path)
+		goto path_walk_complete;
+
+	name.name = path;
+	while (*path && *path != '/')
+		path++;
+	name.len = path - (const char *) name.name;
+
+eat_dot_dir:
+	while (*path == '/')
+		path++;
+
+	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+		path += 2;
+		goto eat_dot_dir;
+	}
+
+	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+	    ) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " Mount path contains reference to \"..\"\n");
+		return -EINVAL;
+	}
+
+	/* lookup the next FH in the sequence */
+	memcpy(&lastfh, mntfh, sizeof(lastfh));
+
+	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+
+	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+						    mntfh, &fattr);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh obtained referral\n");
+		return -EREMOTE;
+	}
+
+	goto next_component;
+
+path_walk_complete:
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+	dprintk("<-- nfs4_path_walk() = 0\n");
+	return 0;
+}
+
+/*
+ * get an NFS4 root dentry from the root filehandle
+ */
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	dprintk("--> nfs4_get_root()\n");
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the info about the server and filesystem */
+	error = nfs4_server_capabilities(server, mntfh);
+	if (error < 0) {
+		dprintk("nfs_get_root: getcaps error = %d\n",
+			-error);
+		return ERR_PTR(error);
+	}
+
+	/* get the actual root for this mount */
+	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, &fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	security_d_instantiate(mntroot, inode);
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	dprintk("<-- nfs4_get_root()\n");
+	return mntroot;
+}
+
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd5..82ad711 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
 /* Default cache timeout is 10 minutes */
 unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 struct idmap_hashent {
 	unsigned long ih_expires;
 	__u32 ih_id;
@@ -70,7 +84,6 @@
 };
 
 struct idmap {
-	char                  idmap_path[48];
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
@@ -94,24 +107,23 @@
         .destroy_msg    = idmap_pipe_destroy_msg,
 };
 
-void
-nfs_idmap_new(struct nfs4_client *clp)
+int
+nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
+	int error;
 
-	if (clp->cl_idmap != NULL)
-		return;
+	BUG_ON(clp->cl_idmap != NULL);
+
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
-                return;
+                return -ENOMEM;
 
-	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
-	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
-
-        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+        idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
+		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
-		return;
+		return error;
 	}
 
         mutex_init(&idmap->idmap_lock);
@@ -121,10 +133,11 @@
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
 
 	clp->cl_idmap = idmap;
+	return 0;
 }
 
 void
-nfs_idmap_delete(struct nfs4_client *clp)
+nfs_idmap_delete(struct nfs_client *clp)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
@@ -477,27 +490,27 @@
 	return (hash);
 }
 
-int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 }
 
-int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2..e8c143d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@
 
 void nfs_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct rpc_cred *cred;
-
 	/*
 	 * The following should never happen...
 	 */
 	BUG_ON(nfs_have_writebacks(inode));
-	BUG_ON (!list_empty(&nfsi->open_files));
+	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
+	BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
 	nfs_zap_acl_cache(inode);
-	cred = nfsi->cache_access.cred;
-	if (cred)
-		put_rpccred(cred);
-	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+	nfs_access_zap_cache(inode);
 }
 
 /**
@@ -242,13 +237,13 @@
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
@@ -290,7 +285,7 @@
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		nfsi->cache_access.cred = NULL;
+		nfsi->access_cache = RB_ROOT;
 
 		unlock_new_inode(inode);
 	} else
@@ -722,13 +717,11 @@
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	if (!nfs_have_delegation(inode, FMODE_READ)) {
-		/* Directories and symlinks: invalidate page cache */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
-			spin_lock(&inode->i_lock);
-			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-			spin_unlock(&inode->i_lock);
-		}
+	/* Directories: invalidate page cache */
+	if (S_ISDIR(inode->i_mode)) {
+		spin_lock(&inode->i_lock);
+		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+		spin_unlock(&inode->i_lock);
 	}
 	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
@@ -847,6 +840,12 @@
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it.
+ *
+ * NB: if the server didn't return any post op attributes, this
+ * function will force the retrieval of attributes before the next
+ * NFS request.  Thus it should be used only for operations that
+ * are expected to change one or more attributes, to avoid
+ * unnecessary NFS requests and trips through nfs_update_inode().
  */
 int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
@@ -1025,7 +1024,7 @@
  out_fileid:
 	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
 		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
-		NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
 		(long long)nfsi->fileid, (long long)fattr->fileid);
 	goto out_err;
 }
@@ -1109,6 +1108,8 @@
 		INIT_LIST_HEAD(&nfsi->dirty);
 		INIT_LIST_HEAD(&nfsi->commit);
 		INIT_LIST_HEAD(&nfsi->open_files);
+		INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+		INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 		INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 		atomic_set(&nfsi->data_updates, 0);
 		nfsi->ndirty = 0;
@@ -1144,6 +1145,10 @@
 {
 	int err;
 
+	err = nfs_fs_proc_init();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -1184,6 +1189,8 @@
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	nfs_fs_proc_exit();
+out5:
 	return err;
 }
 
@@ -1198,6 +1205,7 @@
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_nfs_fs();
+	nfs_fs_proc_exit();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5d..bea0b01 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
 
 #include <linux/mount.h>
 
+struct nfs_string;
+struct nfs_mount_data;
+struct nfs4_mount_data;
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ *        their needs. People that do NFS over a slow network, might for
+ *        instance want to reduce it to something closer to 1 for improved
+ *        interactive response.
+ */
+#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -15,7 +27,40 @@
 	rpc_authflavor_t authflavor;
 };
 
-/* namespace-nfs4.c */
+/* client.c */
+extern struct rpc_program nfs_program;
+
+extern void nfs_put_client(struct nfs_client *);
+extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+					    struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
+					     const char *,
+					     const struct sockaddr_in *,
+					     const char *,
+					     const char *,
+					     rpc_authflavor_t,
+					     struct nfs_fh *);
+extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
+						      struct nfs_fh *);
+extern void nfs_free_server(struct nfs_server *server);
+extern struct nfs_server *nfs_clone_server(struct nfs_server *,
+					   struct nfs_fh *,
+					   struct nfs_fattr *);
+#ifdef CONFIG_PROC_FS
+extern int __init nfs_fs_proc_init(void);
+extern void nfs_fs_proc_exit(void);
+#else
+static inline int nfs_fs_proc_init(void)
+{
+	return 0;
+}
+static inline void nfs_fs_proc_exit(void)
+{
+}
+#endif
+
+/* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
 #else
@@ -46,6 +91,7 @@
 #endif
 
 /* nfs2xdr.c */
+extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
@@ -54,8 +100,9 @@
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
-extern int nfs_stat_to_errno(int);
+#ifdef CONFIG_NFS_V4
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+#endif
 
 /* nfs4proc.c */
 #ifdef CONFIG_NFS_V4
@@ -66,6 +113,9 @@
 				  struct page *page);
 #endif
 
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
 /* inode.c */
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
@@ -76,10 +126,10 @@
 #endif
 
 /* super.c */
-extern struct file_system_type nfs_referral_nfs4_fs_type;
-extern struct file_system_type clone_nfs_fs_type;
+extern struct file_system_type nfs_xdev_fs_type;
 #ifdef CONFIG_NFS_V4
-extern struct file_system_type clone_nfs4_fs_type;
+extern struct file_system_type nfs4_xdev_fs_type;
+extern struct file_system_type nfs4_referral_fs_type;
 #endif
 
 extern struct rpc_stat nfs_rpcstat;
@@ -88,30 +138,30 @@
 extern void __exit unregister_nfs_fs(void);
 
 /* namespace.c */
-extern char *nfs_path(const char *base, const struct dentry *dentry,
+extern char *nfs_path(const char *base,
+		      const struct dentry *droot,
+		      const struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 
-/*
- * Determine the mount path as a string
- */
-static inline char *
-nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
-{
+/* getroot.c */
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
 #ifdef CONFIG_NFS_V4
-	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-#else
-	return NULL;
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+
+extern int nfs4_path_walk(struct nfs_server *server,
+			  struct nfs_fh *mntfh,
+			  const char *path);
 #endif
-}
 
 /*
  * Determine the device name as a string
  */
 static inline char *nfs_devname(const struct vfsmount *mnt_parent,
-			 const struct dentry *dentry,
-			 char *buffer, ssize_t buflen)
+				const struct dentry *dentry,
+				char *buffer, ssize_t buflen)
 {
-	return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+	return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
+			dentry, buffer, buflen);
 }
 
 /*
@@ -167,20 +217,3 @@
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
-
-/*
- * Check if the string represents a "valid" IPv4 address
- */
-static inline int valid_ipaddr4(const char *buf)
-{
-	int rc, count, in[4];
-
-	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
-	if (rc != 4)
-		return -EINVAL;
-	for (count = 0; count < 4; count++) {
-		if (in[count] > 255)
-			return -EINVAL;
-	}
-	return 0;
-}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4..d507b02 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
 #include <linux/net.h>
 #include <linux/in.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/nfs_fs.h>
 
@@ -77,22 +76,19 @@
 mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 		int protocol)
 {
-	struct rpc_xprt	*xprt;
-	struct rpc_clnt	*clnt;
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= (struct sockaddr *)srvaddr,
+		.addrsize	= sizeof(*srvaddr),
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+				   RPC_CLNT_CREATE_INTR),
+	};
 
-	xprt = xprt_create_proto(protocol, srvaddr, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-
-	clnt = rpc_create_client(xprt, hostname,
-				&mnt_program, version,
-				RPC_AUTH_UNIX);
-	if (!IS_ERR(clnt)) {
-		clnt->cl_softrtry = 1;
-		clnt->cl_oneshot  = 1;
-		clnt->cl_intr = 1;
-	}
-	return clnt;
+	return rpc_create(&args);
 }
 
 /*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169..77b0068 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFS namespace
  */
@@ -28,6 +29,7 @@
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - arbitrary string to prepend to the path
+ * @droot - pointer to root dentry for mountpoint
  * @dentry - pointer to dentry
  * @buffer - result buffer
  * @buflen - length of buffer
@@ -38,7 +40,9 @@
  * This is mainly for use in figuring out the path on the
  * server side when automounting on top of an existing partition.
  */
-char *nfs_path(const char *base, const struct dentry *dentry,
+char *nfs_path(const char *base,
+	       const struct dentry *droot,
+	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
 	char *end = buffer+buflen;
@@ -47,7 +51,7 @@
 	*--end = '\0';
 	buflen--;
 	spin_lock(&dcache_lock);
-	while (!IS_ROOT(dentry)) {
+	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
 		if (buflen < 0)
@@ -96,15 +100,18 @@
 	struct nfs_fattr fattr;
 	int err;
 
+	dprintk("--> nfs_follow_mountpoint()\n");
+
 	BUG_ON(IS_ROOT(dentry));
 	dprintk("%s: enter\n", __FUNCTION__);
 	dput(nd->dentry);
 	nd->dentry = dget(dentry);
-	if (d_mountpoint(nd->dentry))
-		goto out_follow;
+
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
-	err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+						  &nd->dentry->d_name,
+						  &fh, &fattr);
 	dput(parent);
 	if (err != 0)
 		goto out_err;
@@ -132,6 +139,8 @@
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 out:
 	dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+
+	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
 	return ERR_PTR(err);
 out_err:
 	path_release(nd);
@@ -172,22 +181,23 @@
 /*
  * Clone a mountpoint of the appropriate type
  */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
+static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
+					   const char *devname,
 					   struct nfs_clone_mount *mountdata)
 {
 #ifdef CONFIG_NFS_V4
 	struct vfsmount *mnt = NULL;
-	switch (server->rpc_ops->version) {
+	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
-			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 			break;
 		case 4:
-			mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
 	}
 	return mnt;
 #else
-	return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+	return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 #endif
 }
 
@@ -213,6 +223,8 @@
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
+	dprintk("--> nfs_do_submount()\n");
+
 	dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name);
@@ -227,5 +239,7 @@
 	free_page((unsigned long)page);
 out:
 	dprintk("%s: done\n", __FUNCTION__);
+
+	dprintk("<-- nfs_do_submount() = %p\n", mnt);
 	return mnt;
 }
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391ee..b49501f 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
 #define NFS_createargs_sz	(NFS_diropargs_sz+NFS_sattr_sz)
 #define NFS_renameargs_sz	(NFS_diropargs_sz+NFS_diropargs_sz)
 #define NFS_linkargs_sz		(NFS_fhandle_sz+NFS_diropargs_sz)
-#define NFS_symlinkargs_sz	(NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_symlinkargs_sz	(NFS_diropargs_sz+1+NFS_sattr_sz)
 #define NFS_readdirargs_sz	(NFS_fhandle_sz+2)
 
 #define NFS_attrstat_sz		(1+NFS_fattr_sz)
@@ -351,11 +351,26 @@
 static int
 nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
 {
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	size_t pad;
+
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+
+	/*
+	 * xdr_encode_pages may have added a few bytes to ensure the
+	 * pathname ends on a 4-byte boundary.  Start encoding the
+	 * attributes after the pad bytes.
+	 */
+	pad = sndbuf->tail->iov_len;
+	if (pad > 0)
+		p++;
 	p = xdr_encode_sattr(p, args->sattr);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f..f8688ea 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@
 }
 
 /*
- * Bare-bones access to getattr: this is for nfs_read_super.
+ * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
  */
 static int
 nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -90,8 +90,8 @@
 	int	status;
 
 	status = do_proc_get_root(server->client, fhandle, info);
-	if (status && server->client_sys != server->client)
-		status = do_proc_get_root(server->client_sys, fhandle, info);
+	if (status && server->nfs_client->cl_rpcclient != server->client)
+		status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
 	return status;
 }
 
@@ -544,23 +544,23 @@
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		  struct iattr *sattr, struct nfs_fh *fhandle,
-		  struct nfs_fattr *fattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		  unsigned int len, struct iattr *sattr)
 {
-	struct nfs_fattr	dir_attr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_attr;
 	struct nfs3_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct nfs3_diropres	res = {
 		.dir_attr	= &dir_attr,
-		.fh		= fhandle,
-		.fattr		= fattr
+		.fh		= &fhandle,
+		.fattr		= &fattr
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -569,13 +569,19 @@
 	};
 	int			status;
 
-	if (path->len > NFS3_MAXPATHLEN)
+	if (len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	nfs_fattr_init(&dir_attr);
-	nfs_fattr_init(fattr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+out:
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -785,7 +791,7 @@
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
@@ -886,7 +892,7 @@
 	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
 }
 
-struct nfs_rpc_ops	nfs_v3_clientops = {
+const struct nfs_rpc_ops nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269..16556fa 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
 #define NFS3_writeargs_sz	(NFS3_fh_sz+5)
 #define NFS3_createargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
 #define NFS3_mkdirargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
-#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+1+NFS3_sattr_sz)
 #define NFS3_mknodargs_sz	(NFS3_diropargs_sz+2+NFS3_sattr_sz)
 #define NFS3_renameargs_sz	(NFS3_diropargs_sz+NFS3_diropargs_sz)
 #define NFS3_linkargs_sz		(NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
 	p = xdr_encode_sattr(p, args->sattr);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Copy the page */
+	xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a10286..61095fe 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@
 };
 
 /*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
  * struct rpc_sequence ensures that RPC calls are sent in the exact
  * order that they appear on the list.
  */
@@ -127,7 +78,7 @@
 struct nfs4_state_owner {
 	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
+	struct nfs_client    *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
 	atomic_t	     so_count;
 
@@ -210,10 +161,10 @@
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +182,14 @@
 extern const u32 nfs4_fs_locations_bitmap[2];
 
 /* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +198,7 @@
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -276,10 +222,6 @@
 
 #else
 
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
 #define nfs4_close_state(a, b) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27..24e47f3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/nfs4namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFSv4 namespace
  */
@@ -23,7 +24,7 @@
 /*
  * Check if fs_root is valid
  */
-static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
+static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
 					 char *buffer, ssize_t buflen)
 {
 	char *end = buffer + buflen;
@@ -34,7 +35,7 @@
 
 	n = pathname->ncomponents;
 	while (--n >= 0) {
-		struct nfs4_string *component = &pathname->components[n];
+		const struct nfs4_string *component = &pathname->components[n];
 		buflen -= component->len + 1;
 		if (buflen < 0)
 			goto Elong;
@@ -47,6 +48,68 @@
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
+/*
+ * Determine the mount path as a string
+ */
+static char *nfs4_path(const struct vfsmount *mnt_parent,
+		       const struct dentry *dentry,
+		       char *buffer, ssize_t buflen)
+{
+	const char *srvpath;
+
+	srvpath = strchr(mnt_parent->mnt_devname, ':');
+	if (srvpath)
+		srvpath++;
+	else
+		srvpath = mnt_parent->mnt_devname;
+
+	return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+}
+
+/*
+ * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
+ * believe to be the server path to this dentry
+ */
+static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
+				const struct dentry *dentry,
+				const struct nfs4_fs_locations *locations,
+				char *page, char *page2)
+{
+	const char *path, *fs_path;
+
+	path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
+	if (IS_ERR(fs_path))
+		return PTR_ERR(fs_path);
+
+	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+		dprintk("%s: path %s does not begin with fsroot %s\n",
+			__FUNCTION__, path, fs_path);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if the string represents a "valid" IPv4 address
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+	int rc, count, in[4];
+
+	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+	if (rc != 4)
+		return -EINVAL;
+	for (count = 0; count < 4; count++) {
+		if (in[count] > 255)
+			return -EINVAL;
+	}
+	return 0;
+}
 
 /**
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -60,7 +123,7 @@
  */
 static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 					    const struct dentry *dentry,
-					    struct nfs4_fs_locations *locations)
+					    const struct nfs4_fs_locations *locations)
 {
 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
 	struct nfs_clone_mount mountdata = {
@@ -68,10 +131,9 @@
 		.dentry = dentry,
 		.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
 	};
-	char *page, *page2;
-	char *path, *fs_path;
+	char *page = NULL, *page2 = NULL;
 	char *devname;
-	int loc, s;
+	int loc, s, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;
@@ -79,36 +141,30 @@
 	dprintk("%s: referral at %s/%s\n", __FUNCTION__,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/* Ensure fs path is a prefix of current dentry path */
 	page = (char *) __get_free_page(GFP_USER);
-	if (page == NULL)
+	if (!page)
 		goto out;
+
 	page2 = (char *) __get_free_page(GFP_USER);
-	if (page2 == NULL)
+	if (!page2)
 		goto out;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (IS_ERR(path))
-		goto out_free;
-
-	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
-	if (IS_ERR(fs_path))
-		goto out_free;
-
-	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
-		dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
-		goto out_free;
+	/* Ensure fs path is a prefix of current dentry path */
+	error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+	if (error < 0) {
+		mnt = ERR_PTR(error);
+		goto out;
 	}
 
 	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
 	if (IS_ERR(devname)) {
 		mnt = (struct vfsmount *)devname;
-		goto out_free;
+		goto out;
 	}
 
 	loc = 0;
 	while (loc < locations->nlocations && IS_ERR(mnt)) {
-		struct nfs4_fs_location *location = &locations->locations[loc];
+		const struct nfs4_fs_location *location = &locations->locations[loc];
 		char *mnt_path;
 
 		if (location == NULL || location->nservers <= 0 ||
@@ -140,7 +196,7 @@
 			addr.sin_port = htons(NFS_PORT);
 			mountdata.addr = &addr;
 
-			mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
+			mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
 			if (!IS_ERR(mnt)) {
 				break;
 			}
@@ -149,10 +205,9 @@
 		loc++;
 	}
 
-out_free:
-	free_page((unsigned long)page);
-	free_page((unsigned long)page2);
 out:
+	free_page((unsigned long) page);
+	free_page((unsigned long) page2);
 	dprintk("%s: done\n", __FUNCTION__);
 	return mnt;
 }
@@ -165,7 +220,7 @@
  */
 struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct dentry *parent;
 	struct nfs4_fs_locations *fs_locations = NULL;
 	struct page *page;
@@ -183,11 +238,16 @@
 		goto out_free;
 
 	/* Get locations */
+	mnt = ERR_PTR(-ENOENT);
+
 	parent = dget_parent(dentry);
-	dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: getting locations for %s/%s\n",
+		__FUNCTION__, parent->d_name.name, dentry->d_name.name);
+
 	err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
 	dput(parent);
-	if (err != 0 || fs_locations->nlocations <= 0 ||
+	if (err != 0 ||
+	    fs_locations->nlocations <= 0 ||
 	    fs_locations->fs_path.ncomponents <= 0)
 		goto out_free;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 153898e..47c7e6e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
-#define NFS4_POLL_RETRY_MIN	(1*HZ)
+#define NFS4_POLL_RETRY_MIN	(HZ/10)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
 struct nfs4_opendata;
@@ -64,7 +64,7 @@
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
-	p->o_arg.clientid = server->nfs4_state->cl_clientid;
+	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_id;
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
@@ -550,7 +550,7 @@
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
 				/* Don't recall a delegation if it was lost */
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 				return err;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -758,7 +758,7 @@
 	}
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
 }
 
@@ -792,11 +792,18 @@
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
+	int ret;
 
-	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+	for (;;) {
+		ret = nfs4_wait_clnt_recover(server->client, clp);
+		if (ret != 0)
+			return ret;
+		if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+			break;
 		nfs4_schedule_state_recovery(clp);
-	return nfs4_wait_clnt_recover(server->client, clp);
+	}
+	return 0;
 }
 
 /*
@@ -867,7 +874,7 @@
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +960,7 @@
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -970,7 +977,7 @@
 	status = -ENOMEM;
 	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
 	if (opendata == NULL)
-		goto err_put_state_owner;
+		goto err_release_rwsem;
 
 	status = _nfs4_proc_open(opendata);
 	if (status != 0)
@@ -989,11 +996,11 @@
 	return 0;
 err_opendata_free:
 	nfs4_opendata_free(opendata);
+err_release_rwsem:
+	up_read(&clp->cl_sem);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
 out_err:
-	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
-	up_read(&clp->cl_sem);
 	*res = NULL;
 	return status;
 }
@@ -1133,7 +1140,7 @@
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(server->nfs4_state);
+			nfs4_schedule_state_recovery(server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1268,7 +1275,7 @@
 		BUG_ON(nd->intent.open.flags & O_CREAT);
 	}
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1298,7 @@
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1393,70 +1400,19 @@
 	return err;
 }
 
+/*
+ * get the file handle for the "/" directory on the server
+ */
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fsinfo *info)
+			      struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
-	unsigned char *		p;
-	struct qstr		q;
-	struct nfs4_lookup_arg args = {
-		.dir_fh = fhandle,
-		.name = &q,
-		.bitmask = nfs4_fattr_bitmap,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
 	int status;
 
-	/*
-	 * Now we do a separate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done separately so that we can conveniently
-	 * catch an ERR_WRONGSEC if it occurs along the way...
-	 */
 	status = nfs4_lookup_root(server, fhandle, info);
-	if (status)
-		goto out;
-
-	p = server->mnt_path;
-	for (;;) {
-		struct nfs4_exception exception = { };
-
-		while (*p == '/')
-			p++;
-		if (!*p)
-			break;
-		q.name = p;
-		while (*p && (*p != '/'))
-			p++;
-		q.len = p - q.name;
-
-		do {
-			nfs_fattr_init(fattr);
-			status = nfs4_handle_exception(server,
-					rpc_call_sync(server->client, &msg, 0),
-					&exception);
-		} while (exception.retry);
-		if (status == 0)
-			continue;
-		if (status == -ENOENT) {
-			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
-			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
-		}
-		break;
-	}
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
 		status = nfs4_do_fsinfo(server, fhandle, info);
-out:
 	return nfs4_map_errors(status);
 }
 
@@ -1565,7 +1521,7 @@
 
 	nfs_fattr_init(fattr);
 	
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 
@@ -1583,6 +1539,52 @@
 	return status;
 }
 
+static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+		struct qstr *name, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int		       status;
+	struct nfs4_lookup_arg args = {
+		.bitmask = server->attr_bitmask,
+		.dir_fh = dirfh,
+		.name = name,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupfh %s\n", name->name);
+	status = rpc_call_sync(server->client, &msg, 0);
+	dprintk("NFS reply lookupfh: %d\n", status);
+	if (status == -NFS4ERR_MOVED)
+		status = -EREMOTE;
+	return status;
+}
+
+static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+			      struct qstr *name, struct nfs_fh *fhandle,
+			      struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_lookupfh(server, dirfh, name,
+						    fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
@@ -1881,7 +1883,7 @@
 	struct rpc_cred *cred;
 	int status = 0;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred)) {
 		status = PTR_ERR(cred);
 		goto out;
@@ -2089,24 +2091,24 @@
 	return err;
 }
 
-static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs_fattr dir_fattr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
-		.name = name,
+		.name = &dentry->d_name,
 		.attrs = sattr,
 		.ftype = NF4LNK,
 		.bitmask = server->attr_bitmask,
 	};
 	struct nfs4_create_res res = {
 		.server = server,
-		.fh = fhandle,
-		.fattr = fattr,
+		.fh = &fhandle,
+		.fattr = &fattr,
 		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
@@ -2116,29 +2118,32 @@
 	};
 	int			status;
 
-	if (path->len > NFS4_MAXPATHLEN)
+	if (len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	arg.u.symlink = path;
-	nfs_fattr_init(fattr);
+
+	arg.u.symlink.pages = &page;
+	arg.u.symlink.len = len;
+	nfs_fattr_init(&fattr);
 	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	if (!status)
+	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
-	nfs_post_op_update_inode(dir, res.dir_fattr);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
 	return status;
 }
 
-static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, name, path, sattr,
-					fhandle, fattr),
+				_nfs4_proc_symlink(dir, dentry, page,
+							len, sattr),
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -2521,7 +2526,7 @@
  */
 static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
 	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
@@ -2543,7 +2548,7 @@
 	.rpc_call_done = nfs4_renew_done,
 };
 
-int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2560,7 @@
 			&nfs4_renew_ops, (void *)jiffies);
 }
 
-int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2770,7 +2775,7 @@
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (ret == 0)
 		nfs4_write_cached_acl(inode, buf, buflen);
 	return ret;
@@ -2791,7 +2796,7 @@
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2828,7 +2833,7 @@
 	return 0;
 }
 
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
 {
 	sigset_t oldset;
 	int res;
@@ -2871,7 +2876,7 @@
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -2886,6 +2891,7 @@
 			if (ret == 0)
 				exception->retry = 1;
 			break;
+		case -NFS4ERR_FILE_OPEN:
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
 			ret = nfs4_delay(server->client, &exception->timeout);
@@ -2898,7 +2904,7 @@
 	return nfs4_map_errors(ret);
 }
 
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -2922,7 +2928,7 @@
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
-				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
@@ -2945,7 +2951,7 @@
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2969,7 +2975,7 @@
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	long timeout;
 	int err;
@@ -3077,7 +3083,7 @@
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 			case 0:
 				return 0;
 		}
@@ -3106,7 +3112,7 @@
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3231,7 +3237,7 @@
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3349,7 @@
 	if (p->arg.lock_seqid == NULL)
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
-	p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3513,7 +3519,7 @@
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 	unsigned char fl_flags = request->fl_flags;
 	int status;
 
@@ -3715,7 +3721,7 @@
 	.listxattr	= nfs4_listxattr,
 };
 
-struct nfs_rpc_ops	nfs_v4_clientops = {
+const struct nfs_rpc_ops nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
@@ -3723,6 +3729,7 @@
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
+	.lookupfh	= nfs4_proc_lookupfh,
 	.lookup		= nfs4_proc_lookup,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
@@ -3743,6 +3750,7 @@
 	.statfs		= nfs4_proc_statfs,
 	.fsinfo		= nfs4_proc_fsinfo,
 	.pathconf	= nfs4_proc_pathconf,
+	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.read_done	= nfs4_read_done,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8..7b6df18 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
 void
 nfs4_renew_state(void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct nfs_client *clp = (struct nfs_client *)data;
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
@@ -108,7 +108,7 @@
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_schedule_state_renewal(struct nfs4_client *clp)
+nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
 	long timeout;
 
@@ -121,32 +121,20 @@
 			__FUNCTION__, (timeout + HZ - 1) / HZ);
 	cancel_delayed_work(&clp->cl_renewd);
 	schedule_delayed_work(&clp->cl_renewd, timeout);
+	set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
 	spin_unlock(&clp->cl_lock);
 }
 
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
-
-	if (!clp)
-		return;
 	flush_scheduled_work();
-	down_write(&clp->cl_sem);
-	if (!list_empty(&server->nfs4_siblings))
-		list_del_init(&server->nfs4_siblings);
-	up_write(&clp->cl_sem);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_kill_renewd(struct nfs4_client *clp)
+nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
-	if (!list_empty(&clp->cl_superblocks)) {
-		up_read(&clp->cl_sem);
-		return;
-	}
 	cancel_delayed_work(&clp->cl_renewd);
 	up_read(&clp->cl_sem);
 	flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b..5fffbdf 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,149 +50,15 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
 const nfs4_stateid zero_stateid;
 
-static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
-void
-init_nfsv4_state(struct nfs_server *server)
-{
-	server->nfs4_state = NULL;
-	INIT_LIST_HEAD(&server->nfs4_siblings);
-}
-
-void
-destroy_nfsv4_state(struct nfs_server *server)
-{
-	kfree(server->mnt_path);
-	server->mnt_path = NULL;
-	if (server->nfs4_state) {
-		nfs4_put_client(server->nfs4_state);
-		server->nfs4_state = NULL;
-	}
-}
-
-/*
- * nfs4_get_client(): returns an empty client structure
- * nfs4_put_client(): drops reference to client structure
- *
- * Since these are allocated/deallocated very rarely, we don't
- * bother putting them in a slab cache...
- */
-static struct nfs4_client *
-nfs4_alloc_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-
-	if (nfs_callback_up() < 0)
-		return NULL;
-	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
-		nfs_callback_down();
-		return NULL;
-	}
-	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
-	init_rwsem(&clp->cl_sem);
-	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
-	spin_lock_init(&clp->cl_lock);
-	atomic_set(&clp->cl_count, 1);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
-	INIT_LIST_HEAD(&clp->cl_superblocks);
-	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
-	clp->cl_rpcclient = ERR_PTR(-EINVAL);
-	clp->cl_boot_time = CURRENT_TIME;
-	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
-	return clp;
-}
-
-static void
-nfs4_free_client(struct nfs4_client *clp)
-{
-	struct nfs4_state_owner *sp;
-
-	while (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
-	nfs_idmap_delete(clp);
-	if (!IS_ERR(clp->cl_rpcclient))
-		rpc_shutdown_client(clp->cl_rpcclient);
-	kfree(clp);
-	nfs_callback_down();
-}
-
-static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
-		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
-			atomic_inc(&clp->cl_count);
-			return clp;
-		}
-	}
-	return NULL;
-}
-
-struct nfs4_client *nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp;
-	spin_lock(&state_spinlock);
-	clp = __nfs4_find_client(addr);
-	spin_unlock(&state_spinlock);
-	return clp;
-}
-
-struct nfs4_client *
-nfs4_get_client(struct in_addr *addr)
-{
-	struct nfs4_client *clp, *new = NULL;
-
-	spin_lock(&state_spinlock);
-	for (;;) {
-		clp = __nfs4_find_client(addr);
-		if (clp != NULL)
-			break;
-		clp = new;
-		if (clp != NULL) {
-			list_add(&clp->cl_servers, &nfs4_clientid_list);
-			new = NULL;
-			break;
-		}
-		spin_unlock(&state_spinlock);
-		new = nfs4_alloc_client(addr);
-		spin_lock(&state_spinlock);
-		if (new == NULL)
-			break;
-	}
-	spin_unlock(&state_spinlock);
-	if (new)
-		nfs4_free_client(new);
-	return clp;
-}
-
-void
-nfs4_put_client(struct nfs4_client *clp)
-{
-	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
-		return;
-	list_del(&clp->cl_servers);
-	spin_unlock(&state_spinlock);
-	BUG_ON(!list_empty(&clp->cl_superblocks));
-	rpc_wake_up(&clp->cl_rpcwaitq);
-	nfs4_kill_renewd(clp);
-	nfs4_free_client(clp);
-}
-
-static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
+static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
 			nfs_callback_tcpport, cred);
@@ -204,13 +70,13 @@
 }
 
 u32
-nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+nfs4_alloc_lockowner_id(struct nfs_client *clp)
 {
 	return clp->cl_lockowner_id ++;
 }
 
 static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp = NULL;
 
@@ -224,7 +90,7 @@
 	return sp;
 }
 
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rpc_cred *cred = NULL;
@@ -238,7 +104,7 @@
 	return cred;
 }
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -251,7 +117,7 @@
 }
 
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp, *res = NULL;
 
@@ -294,7 +160,7 @@
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	spin_lock(&clp->cl_lock);
 	list_del_init(&sp->so_list);
 	spin_unlock(&clp->cl_lock);
@@ -306,7 +172,7 @@
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
@@ -337,7 +203,7 @@
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	struct rpc_cred *cred = sp->so_cred;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +406,7 @@
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 
 	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
@@ -752,7 +618,7 @@
 
 static int reclaimer(void *);
 
-static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
 {
 	smp_mb__before_clear_bit();
 	clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,25 +630,25 @@
 /*
  * State recovery routine
  */
-static void nfs4_recover_state(struct nfs4_client *clp)
+static void nfs4_recover_state(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
 /*
  * Schedule a state recovery attempt
  */
-void nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs_client *clp)
 {
 	if (!clp)
 		return;
@@ -879,7 +745,7 @@
 	return status;
 }
 
-static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct nfs4_state *state;
@@ -903,7 +769,7 @@
 
 static int reclaimer(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
@@ -970,12 +836,12 @@
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
-				NIPQUAD(clp->cl_addr.s_addr), -status);
+				NIPQUAD(clp->cl_addr.sin_addr), -status);
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8f..3dd413f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
 
-static int nfs_stat_to_errno(int);
+static int nfs4_stat_to_errno(int);
 
 /* NFSv4 COMPOUND tags are only wanted for debugging purposes */
 #ifdef DEBUG
@@ -128,7 +128,7 @@
 #define decode_link_maxsz	(op_decode_hdr_maxsz + 5)
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
-				nfs4_path_maxsz + \
+				1 + \
 				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
@@ -529,7 +529,7 @@
 	if (iap->ia_valid & ATTR_MODE)
 		len += 4;
 	if (iap->ia_valid & ATTR_UID) {
-		owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
 		if (owner_namelen < 0) {
 			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
 			       iap->ia_uid);
@@ -541,7 +541,7 @@
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
-		owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
 		if (owner_grouplen < 0) {
 			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
 			       iap->ia_gid);
@@ -673,9 +673,9 @@
 
 	switch (create->ftype) {
 	case NF4LNK:
-		RESERVE_SPACE(4 + create->u.symlink->len);
-		WRITE32(create->u.symlink->len);
-		WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+		RESERVE_SPACE(4);
+		WRITE32(create->u.symlink.len);
+		xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
 		break;
 
 	case NF4BLK: case NF4CHR:
@@ -1160,7 +1160,7 @@
 	return 0;
 }
 
-static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
 {
 	uint32_t *p;
 
@@ -1246,7 +1246,7 @@
 	return 0;
 }
 
-static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
 {
         uint32_t *p;
 
@@ -1945,7 +1945,7 @@
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2127,12 +2127,12 @@
 	}
 	READ32(nfserr);
 	if (nfserr != NFS_OK)
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 	return 0;
 }
 
 /* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 {
 	uint32_t *p;
 	unsigned int strlen;
@@ -2636,7 +2636,7 @@
 	return 0;
 }
 
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
 {
 	uint32_t len, *p;
 
@@ -2660,7 +2660,7 @@
 	return 0;
 }
 
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
 {
 	uint32_t len, *p;
 
@@ -3051,9 +3051,9 @@
 	fattr->mode |= fmode;
 	if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+	if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+	if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
 		goto xdr_error;
 	if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
 		goto xdr_error;
@@ -3254,7 +3254,7 @@
 			if (decode_space_limit(xdr, &res->maxsize) < 0)
 				return -EIO;
 	}
-	return decode_ace(xdr, NULL, res->server->nfs4_state);
+	return decode_ace(xdr, NULL, res->server->nfs_client);
 }
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3565,7 +3565,7 @@
 	return 0;
 }
 
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 {
 	uint32_t *p;
 	uint32_t opnum;
@@ -3598,7 +3598,7 @@
 		READ_BUF(len);
 		return -NFSERR_CLID_INUSE;
 	} else
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 
 	return 0;
 }
@@ -4256,7 +4256,7 @@
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4335,7 +4335,7 @@
  * a SETCLIENTID request
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
-		struct nfs4_client *clp)
+		struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4346,7 +4346,7 @@
 	if (!status)
 		status = decode_setclientid(&xdr, clp);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4368,7 +4368,7 @@
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4521,7 +4521,7 @@
  * This one is used jointly by NFSv2 and NFSv3.
  */
 static int
-nfs_stat_to_errno(int stat)
+nfs4_stat_to_errno(int stat)
 {
 	int i;
 	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea..630e506 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
 	msg.rpc_resp = &fsinfo;
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
@@ -425,16 +425,17 @@
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		 struct iattr *sattr, struct nfs_fh *fhandle,
-		 struct nfs_fattr *fattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		 unsigned int len, struct iattr *sattr)
 {
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
 	struct nfs_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct rpc_message msg = {
@@ -443,13 +444,25 @@
 	};
 	int			status;
 
-	if (path->len > NFS2_MAXPATHLEN)
+	if (len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	nfs_fattr_init(fattr);
-	fhandle->size = 0;
+
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
+
+	/*
+	 * V2 SYMLINK requests don't return any attributes.  Setting the
+	 * filehandle size to zero indicates to nfs_instantiate that it
+	 * should fill in the data with a LOOKUP call on the wire.
+	 */
+	if (status == 0) {
+		nfs_fattr_init(&fattr);
+		fhandle.size = 0;
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
+
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -671,7 +684,7 @@
 }
 
 
-struct nfs_rpc_ops	nfs_v2_clientops = {
+const struct nfs_rpc_ops nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7a9ee00..69f1549 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@
 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
-			NFS_SERVER(inode)->hostname,
+			NFS_SERVER(inode)->nfs_client->cl_hostname,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			(unsigned long long)rdata->args.pgbase,
@@ -204,9 +204,11 @@
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&inode->i_lock);
 
-	nfs_readpage_truncate_uninitialised_page(rdata);
-	if (rdata->res.eof || rdata->res.count == rdata->args.count)
+	if (rdata->res.eof || rdata->res.count == rdata->args.count) {
 		SetPageUptodate(page);
+		if (rdata->res.eof && count != 0)
+			memclear_highpage_flush(page, rdata->args.pgbase, count);
+	}
 	result = 0;
 
 io_error:
@@ -566,8 +568,13 @@
 
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
-	/* Is this a short read? */
-	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+	if (task->tk_status < 0) {
+		if (task->tk_status == -ESTALE) {
+			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+			nfs_mark_for_revalidate(data->inode);
+		}
+	} else if (resp->count < argp->count && !resp->eof) {
+		/* This is a short read! */
 		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
@@ -614,6 +621,10 @@
 	if (error)
 		goto out_error;
 
+	error = -ESTALE;
+	if (NFS_STALE(inode))
+		goto out_error;
+
 	if (file == NULL) {
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
@@ -676,7 +687,7 @@
 	};
 	struct inode *inode = mapping->host;
 	struct nfs_server *server = NFS_SERVER(inode);
-	int ret;
+	int ret = -ESTALE;
 
 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
 			inode->i_sb->s_id,
@@ -684,6 +695,9 @@
 			nr_pages);
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+	if (NFS_STALE(inode))
+		goto out;
+
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (desc.ctx == NULL)
@@ -699,6 +713,7 @@
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
+out:
 	return ret;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee..b99113b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
  *
  *  Split from inode.c by David Howells <dhowells@redhat.com>
  *
+ * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
+ *   particular server are held in the same superblock
+ * - NFS superblocks can have several effective roots to the dentry tree
+ * - directory type roots are spliced into the tree when a path from one root reaches the root
+ *   of another (see nfs_lookup())
  */
 
 #include <linux/config.h>
@@ -52,66 +57,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
-
-/*
- * RPC cruft for NFS
- */
-static struct rpc_version * nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2,
-#if defined(CONFIG_NFS_V3)
-	&nfs_version3,
-#elif defined(CONFIG_NFS_V4)
-	NULL,
-#endif
-#if defined(CONFIG_NFS_V4)
-	&nfs_version4,
-#endif
-};
-
-static struct rpc_program nfs_program = {
-	.name			= "nfs",
-	.number			= NFS_PROGRAM,
-	.nrvers			= ARRAY_SIZE(nfs_version),
-	.version		= nfs_version,
-	.stats			= &nfs_rpcstat,
-	.pipe_dir_name		= "/nfs",
-};
-
-struct rpc_stat nfs_rpcstat = {
-	.program		= &nfs_program
-};
-
-
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version *	nfsacl_version[] = {
-	[3]			= &nfsacl_version3,
-};
-
-struct rpc_program		nfsacl_program = {
-	.name =			"nfsacl",
-	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		ARRAY_SIZE(nfsacl_version),
-	.version =		nfsacl_version,
-	.stats =		&nfsacl_rpcstat,
-};
-#endif  /* CONFIG_NFS_V3_ACL */
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
 
@@ -123,10 +74,10 @@
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs_fs_type = {
+struct file_system_type nfs_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs",
-	.get_sb		= nfs_clone_nfs_sb,
+	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -145,10 +96,10 @@
 #ifdef CONFIG_NFS_V4
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -159,18 +110,18 @@
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs4_fs_type = {
+struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_clone_nfs4_sb,
+	.get_sb		= nfs4_xdev_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type nfs_referral_nfs4_fs_type = {
+struct file_system_type nfs4_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_referral_nfs4_sb,
+	.get_sb		= nfs4_referral_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -187,39 +138,7 @@
 };
 #endif
 
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-
-static int param_set_port(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
-		return -EINVAL;
-	*((int *)kp->arg) = num;
-	return 0;
-}
-
-module_param_call(callback_tcpport, param_set_port, param_get_int,
-		 &nfs_callback_set_tcpport, 0644);
-#endif
-
-#ifdef CONFIG_NFS_V4
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	int jif = num * HZ;
-	if (endp == val || *endp || num < 0 || jif < num)
-		return -EINVAL;
-	*((int *)kp->arg) = jif;
-	return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
-		 &nfs_idmap_cache_timeout, 0644);
-#endif
+static struct shrinker *acl_shrinker;
 
 /*
  * Register the NFS filesystems
@@ -240,6 +159,7 @@
 	if (ret < 0)
 		goto error_2;
 #endif
+	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
 	return 0;
 
 #ifdef CONFIG_NFS_V4
@@ -257,6 +177,8 @@
  */
 void __exit unregister_nfs_fs(void)
 {
+	if (acl_shrinker != NULL)
+		remove_shrinker(acl_shrinker);
 #ifdef CONFIG_NFS_V4
 	unregister_filesystem(&nfs4_fs_type);
 	nfs_unregister_sysctl();
@@ -269,11 +191,10 @@
  */
 static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct super_block *sb = dentry->d_sb;
-	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_server *server = NFS_SB(dentry->d_sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
 	struct nfs_fattr fattr;
 	struct nfs_fsstat res = {
 			.fattr = &fattr,
@@ -282,7 +203,7 @@
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, rootfh, &res);
+	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -292,7 +213,7 @@
 	 * case where f_frsize != f_bsize.  Eventually we want to
 	 * report the value of wtmult in this field.
 	 */
-	buf->f_frsize = sb->s_blocksize;
+	buf->f_frsize = dentry->d_sb->s_blocksize;
 
 	/*
 	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -301,8 +222,8 @@
 	 * thus historically Linux's sys_statfs reports these
 	 * fields in units of f_bsize.
 	 */
-	buf->f_bsize = sb->s_blocksize;
-	blockbits = sb->s_blocksize_bits;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	blockbits = dentry->d_sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
@@ -323,9 +244,12 @@
 
 }
 
+/*
+ * Map the security flavour number to a name
+ */
 static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 {
-	static struct {
+	static const struct {
 		rpc_authflavor_t flavour;
 		const char *str;
 	} sec_flavours[] = {
@@ -356,10 +280,10 @@
  */
 static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
-	static struct proc_nfs_info {
+	static const struct proc_nfs_info {
 		int flag;
-		char *str;
-		char *nostr;
+		const char *str;
+		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
@@ -369,11 +293,12 @@
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
-	struct proc_nfs_info *nfs_infop;
+	const struct proc_nfs_info *nfs_infop;
+	struct nfs_client *clp = nfss->nfs_client;
 	char buf[12];
-	char *proto;
+	const char *proto;
 
-	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ || showdefaults)
@@ -402,8 +327,8 @@
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
-	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
-	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", clp->retrans_count);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
 }
 
@@ -417,7 +342,7 @@
 	nfs_show_mount_options(m, nfss, 0);
 
 	seq_puts(m, ",addr=");
-	seq_escape(m, nfss->hostname, " \t\n\\");
+	seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
 
 	return 0;
 }
@@ -454,7 +379,7 @@
 	seq_printf(m, ",namelen=%d", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
-	if (nfss->rpc_ops->version == 4) {
+	if (nfss->nfs_client->cl_nfsversion == 4) {
 		seq_printf(m, "\n\tnfsv4:\t");
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -501,481 +426,30 @@
 
 /*
  * Begin unmount by attempting to remove all automounted mountpoints we added
- * in response to traversals
+ * in response to xdev traversals and referrals
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
-	struct nfs_server *server;
-	struct rpc_clnt	*rpc;
-
 	shrink_submounts(vfsmnt, &nfs_automount_list);
-	if (!(flags & MNT_FORCE))
-		return;
-	/* -EIO all pending I/O */
-	server = NFS_SB(vfsmnt->mnt_sb);
-	rpc = server->client;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
-	rpc = server->client_acl;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
 }
 
 /*
- * Obtain the root inode of the file system.
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
  */
-static struct inode *
-nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+static int nfs_validate_mount_data(struct nfs_mount_data *data,
+				   struct nfs_fh *mntfh)
 {
-	struct nfs_server	*server = NFS_SB(sb);
-	int			error;
-
-	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
-	if (error < 0) {
-		dprintk("nfs_get_root: getattr error = %d\n", -error);
-		return ERR_PTR(error);
-	}
-
-	server->fsid = fsinfo->fattr->fsid;
-	return nfs_fhget(sb, rootfh, fsinfo->fattr);
-}
-
-/*
- * Do NFS version-independent mount processing, and sanity checking
- */
-static int
-nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
-{
-	struct nfs_server	*server;
-	struct inode		*root_inode;
-	struct nfs_fattr	fattr;
-	struct nfs_fsinfo	fsinfo = {
-					.fattr = &fattr,
-				};
-	struct nfs_pathconf pathinfo = {
-			.fattr = &fattr,
-	};
-	int no_root_error = 0;
-	unsigned long max_rpc_payload;
-
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	server = NFS_SB(sb);
-
-	sb->s_magic      = NFS_SUPER_MAGIC;
-
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		return -ENOMEM;
-
-	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
-	/* Did getting the root inode fail? */
-	if (IS_ERR(root_inode)) {
-		no_root_error = PTR_ERR(root_inode);
-		goto out_no_root;
-	}
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
-	}
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
-
-	/* mount time stamp, in seconds */
-	server->mount_time = jiffies;
-
-	/* Get some general file system info */
-	if (server->namelen == 0 &&
-	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
-		server->namelen = pathinfo.max_namelen;
-	/* Work out a lot of parameters */
-	if (server->rsize == 0)
-		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (server->wsize == 0)
-		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-
-	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
-		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
-	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
-		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
-
-	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
-	if (server->rsize > max_rpc_payload)
-		server->rsize = max_rpc_payload;
-	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
-		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
-		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(server->wsize,
-							 &sb->s_blocksize_bits);
-	server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
-
-	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
-	if (server->dtsize > server->rsize)
-		server->dtsize = server->rsize;
-
-	if (server->flags & NFS_MOUNT_NOAC) {
-		server->acregmin = server->acregmax = 0;
-		server->acdirmin = server->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
-	}
-	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
-
-	nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-
-	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
-	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
-
-	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
-	return 0;
-	/* Yargs. It didn't work out. */
-out_no_root:
-	dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
-	if (!IS_ERR(root_inode))
-		iput(root_inode);
-	return no_root_error;
-}
-
-/*
- * Initialise the timeout values for a connection
- */
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
-{
-	to->to_initval = timeo * HZ / 10;
-	to->to_retries = retrans;
-	if (!to->to_retries)
-		to->to_retries = 2;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		if (!to->to_initval)
-			to->to_initval = 60 * HZ;
-		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
-			to->to_initval = NFS_MAX_TCP_TIMEOUT;
-		to->to_increment = to->to_initval;
-		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
-		to->to_exponential = 0;
-		break;
-	case IPPROTO_UDP:
-	default:
-		if (!to->to_initval)
-			to->to_initval = 11 * HZ / 10;
-		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
-			to->to_initval = NFS_MAX_UDP_TIMEOUT;
-		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
-		to->to_exponential = 1;
-		break;
-	}
-}
-
-/*
- * Create an RPC client handle.
- */
-static struct rpc_clnt *
-nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
-{
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
-	struct rpc_clnt		*clnt = NULL;
-	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-
-	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		return (struct rpc_clnt *)xprt;
-	}
-	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 server->rpc_ops->version, data->pseudoflavor);
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		goto out_fail;
-	}
-
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
-
-	return clnt;
-
-out_fail:
-	return clnt;
-}
-
-/*
- * Clone a server record
- */
-static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
-	struct nfs_server *server = NFS_SB(sb);
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct inode *root_inode;
-	struct nfs_fsinfo fsinfo;
-	void *err = ERR_PTR(-ENOMEM);
-
-	sb->s_op = data->sb->s_op;
-	sb->s_blocksize = data->sb->s_blocksize;
-	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
-	sb->s_maxbytes = data->sb->s_maxbytes;
-
-	server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		goto out;
-
-	server->client = rpc_clone_client(parent->client);
-	if (IS_ERR((err = server->client)))
-		goto out;
-
-	if (!IS_ERR(parent->client_sys)) {
-		server->client_sys = rpc_clone_client(parent->client_sys);
-		if (IS_ERR((err = server->client_sys)))
-			goto out;
-	}
-	if (!IS_ERR(parent->client_acl)) {
-		server->client_acl = rpc_clone_client(parent->client_acl);
-		if (IS_ERR((err = server->client_acl)))
-			goto out;
-	}
-	root_inode = nfs_fhget(sb, data->fh, data->fattr);
-	if (!root_inode)
-		goto out;
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_put_root;
-	fsinfo.fattr = data->fattr;
-	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
-		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
-	sb->s_flags |= MS_ACTIVE;
-	return server;
-out_put_root:
-	iput(root_inode);
-out:
-	return err;
-}
-
-/*
- * Copy an existing superblock and attach revised data
- */
-static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
-		struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
-		struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
-		struct vfsmount *mnt)
-{
-	struct nfs_server *server;
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct super_block *sb = ERR_PTR(-EINVAL);
-	char *hostname;
-	int error = -ENOMEM;
-	int len;
-
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (server == NULL)
-		goto out_err;
-	memcpy(server, parent, sizeof(*server));
-	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
-	len = strlen(hostname) + 1;
-	server->hostname = kmalloc(len, GFP_KERNEL);
-	if (server->hostname == NULL)
-		goto free_server;
-	memcpy(server->hostname, hostname, len);
-	error = rpciod_up();
-	if (error != 0)
-		goto free_hostname;
-
-	sb = fill_sb(server, data);
-	if (IS_ERR(sb)) {
-		error = PTR_ERR(sb);
-		goto kill_rpciod;
-	}
-		
-	if (sb->s_root)
-		goto out_rpciod_down;
-
-	server = fill_server(sb, data);
-	if (IS_ERR(server)) {
-		error = PTR_ERR(server);
-		goto out_deactivate;
-	}
-	return simple_set_mnt(mnt, sb);
-out_deactivate:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	return error;
-out_rpciod_down:
-	rpciod_down();
-	kfree(server->hostname);
-	kfree(server);
-	return simple_set_mnt(mnt, sb);
-kill_rpciod:
-	rpciod_down();
-free_hostname:
-	kfree(server->hostname);
-free_server:
-	kfree(server);
-out_err:
-	return error;
-}
-
-/*
- * Set up an NFS2/3 superblock
- *
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
- */
-static int
-nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
-{
-	struct nfs_server	*server;
-	rpc_authflavor_t	authflavor;
-
-	server           = NFS_SB(sb);
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	if (data->bsize)
-		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Start lockd here, before we might error out */
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-
-	server->namelen  = data->namlen;
-	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
-	if (!server->hostname)
-		return -ENOMEM;
-	strcpy(server->hostname, data->hostname);
-
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3) {
-		server->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		server->rpc_ops = &nfs_v2_clientops;
-	}
-#else
-	server->rpc_ops = &nfs_v2_clientops;
-#endif
-
-	/* Fill in pseudoflavor for mount version < 5 */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
-	authflavor = data->pseudoflavor;	/* save for sb_init() */
-	/* XXX maybe we want to add a server->pseudoflavor field */
-
-	/* Create RPC client handles */
-	server->client = nfs_create_client(server, data);
-	if (IS_ERR(server->client))
-		return PTR_ERR(server->client);
-	/* RFC 2623, sec 2.3.2 */
-	if (authflavor != RPC_AUTH_UNIX) {
-		struct rpc_auth *auth;
-
-		server->client_sys = rpc_clone_client(server->client);
-		if (IS_ERR(server->client_sys))
-			return PTR_ERR(server->client_sys);
-		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
-		if (IS_ERR(auth))
-			return PTR_ERR(auth);
-	} else {
-		atomic_inc(&server->client->cl_count);
-		server->client_sys = server->client;
-	}
-	if (server->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3_ACL
-		if (!(server->flags & NFS_MOUNT_NOACL)) {
-			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
-			/* No errors! Assume that Sun nfsacls are supported */
-			if (!IS_ERR(server->client_acl))
-				server->caps |= NFS_CAP_ACLS;
-		}
-#else
-		server->flags &= ~NFS_MOUNT_NOACL;
-#endif /* CONFIG_NFS_V3_ACL */
-		/*
-		 * The VFS shouldn't apply the umask to mode bits. We will
-		 * do so ourselves when necessary.
-		 */
-		sb->s_flags |= MS_POSIXACL;
-		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
-			server->namelen = NFS3_MAXNAMLEN;
-		sb->s_time_gran = 1;
-	} else {
-		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
-			server->namelen = NFS2_MAXNAMLEN;
-	}
-
-	sb->s_op = &nfs_sops;
-	return nfs_sb_init(sb, authflavor);
-}
-
-static int nfs_set_super(struct super_block *s, void *data)
-{
-	s->s_fs_info = data;
-	return set_anon_super(s, data);
-}
-
-static int nfs_compare_super(struct super_block *sb, void *data)
-{
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
-
-	if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
-		return 0;
-	if (old->addr.sin_port != server->addr.sin_port)
-		return 0;
-	return !nfs_compare_fh(&old->fh, &server->fh);
-}
-
-static int nfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	int error;
-	struct nfs_server *server = NULL;
-	struct super_block *s;
-	struct nfs_fh *root;
-	struct nfs_mount_data *data = raw_data;
-
-	error = -EINVAL;
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err_noserver;
+		return -EINVAL;
 	}
+
 	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
 		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err_noserver;
+		return -EINVAL;
 	}
+
 	switch (data->version) {
 		case 1:
 			data->namlen = 0;
@@ -986,7 +460,7 @@
 				dprintk("%s: mount structure version %d does not support NFSv3\n",
 						__FUNCTION__,
 						data->version);
-				goto out_err_noserver;
+				return -EINVAL;
 			}
 			data->root.size = NFS2_FHSIZE;
 			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
@@ -995,288 +469,310 @@
 				dprintk("%s: mount structure version %d does not support strong security\n",
 						__FUNCTION__,
 						data->version);
-				goto out_err_noserver;
+				return -EINVAL;
 			}
 		case 5:
 			memset(data->context, 0, sizeof(data->context));
 	}
+
+	/* Set the pseudoflavor */
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
+
 #ifndef CONFIG_NFS_V3
 	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	error = -EPROTONOSUPPORT;
 	if (data->flags & NFS_MOUNT_VER3) {
 		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err_noserver;
+		return -EPROTONOSUPPORT;
 	}
 #endif /* CONFIG_NFS_V3 */
 
-	error = -ENOMEM;
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
-		goto out_err_noserver;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-
-	root = &server->fh;
-	if (data->flags & NFS_MOUNT_VER3)
-		root->size = data->root.size;
-	else
-		root->size = NFS2_FHSIZE;
-	error = -EINVAL;
-	if (root->size > sizeof(root->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		goto out_err;
-	}
-	memcpy(root->data, data->root.data, root->size);
-
 	/* We now require that the mount process passes the remote address */
-	memcpy(&server->addr, &data->addr, sizeof(server->addr));
-	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
+	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
 		dprintk("%s: mount program didn't pass remote address!\n",
-				__FUNCTION__);
-		goto out_err;
+			__FUNCTION__);
+		return -EINVAL;
 	}
 
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_err;
+	/* Prepare the root filehandle */
+	if (data->flags & NFS_MOUNT_VER3)
+		mntfh->size = data->root.size;
+	else
+		mntfh->size = NFS2_FHSIZE;
+
+	if (mntfh->size > sizeof(mntfh->data)) {
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		return -EINVAL;
 	}
 
+	memcpy(mntfh->data, data->root.data, mntfh->size);
+	if (mntfh->size < sizeof(mntfh->data))
+		memset(mntfh->data + mntfh->size, 0,
+		       sizeof(mntfh->data) - mntfh->size);
+
+	return 0;
+}
+
+/*
+ * Initialise the common bits of the superblock
+ */
+static inline void nfs_initialise_sb(struct super_block *sb)
+{
+	struct nfs_server *server = NFS_SB(sb);
+
+	sb->s_magic = NFS_SUPER_MAGIC;
+
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id),
+		 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+						 &sb->s_blocksize_bits);
+
+	if (server->flags & NFS_MOUNT_NOAC)
+		sb->s_flags |= MS_SYNCHRONOUS;
+
+	nfs_super_set_maxbytes(sb, server->maxfilesize);
+}
+
+/*
+ * Finish setting up an NFS2/3 superblock
+ */
+static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
+{
+	struct nfs_server *server = NFS_SB(sb);
+
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+
+	if (server->flags & NFS_MOUNT_VER3) {
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
+		sb->s_time_gran = 1;
+	}
+
+	sb->s_op = &nfs_sops;
+ 	nfs_initialise_sb(sb);
+}
+
+/*
+ * Finish setting up a cloned NFS2/3 superblock
+ */
+static void nfs_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
+{
+	struct nfs_server *server = NFS_SB(sb);
+
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
+
+	if (server->flags & NFS_MOUNT_VER3) {
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
+		sb->s_time_gran = 1;
+	}
+
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
+}
+
+static int nfs_set_super(struct super_block *s, void *_server)
+{
+	struct nfs_server *server = _server;
+	int ret;
+
+	s->s_fs_info = server;
+	ret = set_anon_super(s, server);
+	if (ret == 0)
+		server->s_dev = s->s_dev;
+	return ret;
+}
+
+static int nfs_compare_super(struct super_block *sb, void *data)
+{
+	struct nfs_server *server = data, *old = NFS_SB(sb);
+
+	if (old->nfs_client != server->nfs_client)
+		return 0;
+	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
+		return 0;
+	return 1;
+}
+
+static int nfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+{
+	struct nfs_server *server = NULL;
+	struct super_block *s;
+	struct nfs_fh mntfh;
+	struct nfs_mount_data *data = raw_data;
+	struct dentry *mntroot;
+	int error;
+
+	/* Validate the mount data */
+	error = nfs_validate_mount_data(data, &mntfh);
+	if (error < 0)
+		return error;
+
+	/* Get a volume representation */
+	server = nfs_create_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err_rpciod;
+		goto out_err_nosb;
 	}
 
-	if (s->s_root)
-		goto out_rpciod_down;
-
-	s->s_flags = flags;
-
-	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
+
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_fill_super(s, data);
+	}
+
+	mntroot = nfs_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	return 0;
 
-out_rpciod_down:
-	rpciod_down();
-	kfree(server);
-	return simple_set_mnt(mnt, s);
-
-out_err_rpciod:
-	rpciod_down();
-out_err:
-	kfree(server);
+out_err_nosb:
+	nfs_free_server(server);
 out_err_noserver:
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	return error;
 }
 
+/*
+ * Destroy an NFS2/3 superblock
+ */
 static void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 
 	kill_anon_super(s);
-
-	if (!IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_sys))
-		rpc_shutdown_client(server->client_sys);
-	if (!IS_ERR(server->client_acl))
-		rpc_shutdown_client(server->client_acl);
-
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_down();	/* release rpc.lockd */
-
-	rpciod_down();		/* release rpciod */
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
-static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
-	if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-	return sb;
-}
-
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			   const char *dev_name, void *raw_data,
+			   struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
+
+	dprintk("--> nfs_xdev_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
+
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_clone_super(s, data->sb);
+	}
+
+	mntroot = nfs_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs_xdev_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
 #ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
+
+/*
+ * Finish setting up a cloned NFS4 superblock
+ */
+static void nfs4_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs4_client *clp;
-	struct rpc_xprt *xprt = NULL;
-	struct rpc_clnt *clnt = NULL;
-	int err = -EIO;
-
-	clp = nfs4_get_client(&server->addr.sin_addr);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(err);
-	}
-
-	/* Now create transport and client */
-	down_write(&clp->cl_sem);
-	if (IS_ERR(clp->cl_rpcclient)) {
-		xprt = xprt_create_proto(proto, &server->addr, timeparms);
-		if (IS_ERR(xprt)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(xprt);
-			dprintk("%s: cannot create RPC transport. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-		}
-		/* Bind to a reserved port! */
-		xprt->resvport = 1;
-		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				server->rpc_ops->version, flavor);
-		if (IS_ERR(clnt)) {
-			up_write(&clp->cl_sem);
-			err = PTR_ERR(clnt);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-		}
-		clnt->cl_intr     = 1;
-		clnt->cl_softrtry = 1;
-		clp->cl_rpcclient = clnt;
-		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		nfs_idmap_new(clp);
-	}
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (!IS_ERR(clnt))
-		server->nfs4_state = clp;
-	up_write(&clp->cl_sem);
-	clp = NULL;
-
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %d\n",
-				__FUNCTION__, err);
-		return clnt;
-	}
-
-	if (server->nfs4_state->cl_idmap == NULL) {
-		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (clnt->cl_auth->au_flavor != flavor) {
-		struct rpc_auth *auth;
-
-		auth = rpcauth_create(flavor, clnt);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return (struct rpc_clnt *)auth;
-		}
-	}
-	return clnt;
-
- out_fail:
-	if (clp)
-		nfs4_put_client(clp);
-	return ERR_PTR(err);
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
+	sb->s_time_gran = 1;
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
 /*
  * Set up an NFS4 superblock
  */
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+static void nfs4_fill_super(struct super_block *sb)
 {
-	struct nfs_server *server;
-	struct rpc_timeout timeparms;
-	rpc_authflavor_t authflavour;
-	int err = -EIO;
-
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	server = NFS_SB(sb);
-	if (data->rsize != 0)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize != 0)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-	server->caps = NFS_CAP_ATOMIC_OPEN;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	server->rpc_ops = &nfs_v4_clientops;
-
-	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
-	/* Now create transport and client */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			err = -EINVAL;
-			goto out_fail;
-		}
-		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
-			err = -EFAULT;
-			goto out_fail;
-		}
-	}
-
-	server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
-	if (IS_ERR(server->client)) {
-		err = PTR_ERR(server->client);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-	}
-
 	sb->s_time_gran = 1;
-
 	sb->s_op = &nfs4_sops;
-	err = nfs_sb_init(sb, authflavour);
-
- out_fail:
-	return err;
+	nfs_initialise_sb(sb);
 }
 
-static int nfs4_compare_super(struct super_block *sb, void *data)
-{
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
-
-	if (strcmp(server->hostname, old->hostname) != 0)
-		return 0;
-	if (strcmp(server->mnt_path, old->mnt_path) != 0)
-		return 0;
-	return 1;
-}
-
-static void *
-nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 {
 	void *p = NULL;
 
@@ -1297,14 +793,22 @@
 	return dst;
 }
 
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
-	struct nfs_server *server;
-	struct super_block *s;
 	struct nfs4_mount_data *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct sockaddr_in addr;
+	rpc_authflavor_t authflavour;
+	struct nfs_fh mntfh;
+	struct dentry *mntroot;
+	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
 	void *p;
+	int error;
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1315,84 +819,112 @@
 		return -EINVAL;
 	}
 
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
-		return -ENOMEM;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(addr))
+		return -EINVAL;
+
+	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+		return -EFAULT;
+
+	if (addr.sin_family != AF_INET ||
+	    addr.sin_addr.s_addr == INADDR_ANY
+	    ) {
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
+		return -EINVAL;
+	}
+	/* RFC3530: The default port for NFS is 2049 */
+	if (addr.sin_port == 0)
+		addr.sin_port = NFS_PORT;
+
+	/* Grab the authentication type */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			error = -EINVAL;
+			goto out_err_noserver;
+		}
+
+		if (copy_from_user(&authflavour, data->auth_flavours,
+				   sizeof(authflavour))) {
+			error = -EFAULT;
+			goto out_err_noserver;
+		}
+	}
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 		goto out_err;
-	server->hostname = p;
+	hostname = p;
 
 	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
 	if (IS_ERR(p))
 		goto out_err;
-	server->mnt_path = p;
+	mntpath = p;
 
-	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
-			sizeof(server->ip_addr) - 1);
+	dprintk("MNTPATH: %s\n", mntpath);
+
+	p = nfs_copy_user_string(ip_addr, &data->client_addr,
+				 sizeof(ip_addr) - 1);
 	if (IS_ERR(p))
 		goto out_err;
 
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(server->addr)) {
-		error = -EINVAL;
-		goto out_free;
-	}
-	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		error = -EFAULT;
-		goto out_free;
-	}
-	if (server->addr.sin_family != AF_INET ||
-	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		error = -EINVAL;
-		goto out_free;
+	/* Get a volume representation */
+	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+				    authflavour, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_free;
-	}
-
-	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
 	}
 
-	if (s->s_root) {
-		kfree(server->mnt_path);
-		kfree(server->hostname);
-		kfree(server);
-		return simple_set_mnt(mnt, s);
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
 
-	s->s_flags = flags;
-
-	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
 	}
+
+	mntroot = nfs4_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	kfree(mntpath);
+	kfree(hostname);
+	return 0;
+
 out_err:
 	error = PTR_ERR(p);
+	goto out_err_noserver;
+
 out_free:
-	kfree(server->mnt_path);
-	kfree(server->hostname);
-	kfree(server);
+	nfs_free_server(server);
+out_err_noserver:
+	kfree(mntpath);
+	kfree(hostname);
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	goto out_err_noserver;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -1403,135 +935,140 @@
 	kill_anon_super(sb);
 
 	nfs4_renewd_prepare_shutdown(server);
-
-	if (server->client != NULL && !IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-
-	destroy_nfsv4_state(server);
-
-	rpciod_down();
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
 /*
- * Constructs the SERVER-side path
+ * Clone an NFS4 server record on xdev traversal (FSID-change)
  */
-static inline char *nfs4_dup_path(const struct dentry *dentry)
-{
-	char *page = (char *) __get_free_page(GFP_USER);
-	char *path;
-
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (!IS_ERR(path)) {
-		int len = PAGE_SIZE + page - path;
-		char *tmp = path;
-
-		path = kmalloc(len, GFP_KERNEL);
-		if (path)
-			memcpy(path, tmp, len);
-		else
-			path = ERR_PTR(-ENOMEM);
-	}
-	free_page((unsigned long)page);
-	return path;
-}
-
-static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	const struct dentry *dentry = data->dentry;
-	struct nfs4_client *clp = server->nfs4_state;
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	server->mnt_path = nfs4_dup_path(dentry);
-	if (IS_ERR(server->mnt_path)) {
-		sb = (struct super_block *)server->mnt_path;
-		goto err;
-	}
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	nfs4_server_capabilities(server, &server->fh);
-
-	down_write(&clp->cl_sem);
-	atomic_inc(&clp->cl_count);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
-
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data,
+			    struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
+
+	dprintk("--> nfs4_xdev_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
+
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_clone_super(s, data->sb);
+	}
+
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb = ERR_PTR(-ENOMEM);
-	int len;
-
-	len = strlen(data->mnt_path) + 1;
-	server->mnt_path = kmalloc(len, GFP_KERNEL);
-	if (server->mnt_path == NULL)
-		goto err;
-	memcpy(server->mnt_path, data->mnt_path, len);
-	memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
-
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
-
-static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
-	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_timeout timeparms;
-	int proto, timeo, retrans;
-	void *err;
-
-	proto = IPPROTO_TCP;
-	/* Since we are following a referral and there may be alternatives,
-	   set the timeouts and retries to low values */
-	timeo = 2;
-	retrans = 1;
-	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
-
-	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
-	if (IS_ERR((err = server->client)))
-		goto out_err;
-
-	sb->s_time_gran = 1;
-	sb->s_op = &nfs4_sops;
-	err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
-	if (!IS_ERR(err))
-		return server;
-out_err:
-	return (struct nfs_server *)err;
-}
-
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Create an NFS4 server record on referral traversal
+ */
+static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data,
+				struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	struct nfs_fh mntfh;
+	int error;
+
+	dprintk("--> nfs4_referral_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs4_create_referral_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
+
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
+	}
+
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-#endif
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8ab3cf1..c12effb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
+	writeback_congestion_end();
 	return err;
 }
 
@@ -590,8 +591,8 @@
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_inode_remove_request(req);
-		nfs_clear_page_writeback(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		nfs_clear_page_writeback(req);
 	}
 }
 
@@ -1252,7 +1253,13 @@
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
-	/* Call the NFS version-specific code */
+	/*
+	 * ->write_done will attempt to use post-op attributes to detect
+	 * conflicting writes by other clients.  A strict interpretation
+	 * of close-to-open would allow us to continue caching even if
+	 * another writer had changed the file, but some applications
+	 * depend on tighter cache coherency when writing.
+	 */
 	status = NFS_PROTO(data->inode)->write_done(task, data);
 	if (status != 0)
 		return status;
@@ -1273,7 +1280,7 @@
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS: faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->hostname,
+				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
@@ -1386,8 +1393,8 @@
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
-		nfs_clear_page_writeback(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1..8583d99 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@
 {
 	struct sockaddr_in	addr;
 	struct nfs4_callback    *cb = &clp->cl_callback;
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt *	xprt;
+	struct rpc_timeout	timeparms = {
+		.to_initval	= (NFSD_LEASE_TIME/4) * HZ,
+		.to_retries	= 5,
+		.to_maxval	= (NFSD_LEASE_TIME/2) * HZ,
+		.to_exponential	= 1,
+	};
 	struct rpc_program *	program = &cb->cb_program;
-	struct rpc_stat *	stat = &cb->cb_stat;
-	struct rpc_clnt *	clnt;
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_TCP,
+		.address	= (struct sockaddr *)&addr,
+		.addrsize	= sizeof(addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_name.data,
+		.program	= program,
+		.version	= nfs_cb_version[1]->number,
+		.authflavor	= RPC_AUTH_UNIX,	/* XXX: need AUTH_GSS... */
+		.flags		= (RPC_CLNT_CREATE_NOPING),
+	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	char                    hostname[32];
 	int status;
 
 	if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@
 	addr.sin_port = htons(cb->cb_port);
 	addr.sin_addr.s_addr = htonl(cb->cb_addr);
 
-	/* Initialize timeout */
-	timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
-	timeparms.to_retries = 0;
-	timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
-	timeparms.to_exponential = 1;
-
-	/* Create RPC transport */
-	xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("NFSD: couldn't create callback transport!\n");
-		goto out_err;
-	}
-
 	/* Initialize rpc_program */
 	program->name = "nfs4_cb";
 	program->number = cb->cb_prog;
 	program->nrvers = ARRAY_SIZE(nfs_cb_version);
 	program->version = nfs_cb_version;
-	program->stats = stat;
+	program->stats = &cb->cb_stat;
 
 	/* Initialize rpc_stat */
-	memset(stat, 0, sizeof(struct rpc_stat));
-	stat->program = program;
+	memset(program->stats, 0, sizeof(cb->cb_stat));
+	program->stats->program = program;
 
-	/* Create RPC client
- 	 *
-	 * XXX AUTH_UNIX only - need AUTH_GSS....
-	 */
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
+	/* Create RPC client */
+	cb->cb_client = rpc_create(&args);
+	if (!cb->cb_client) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
 	}
-	clnt->cl_intr = 0;
-	clnt->cl_softrtry = 1;
 
 	/* Kick rpciod, put the call on the wire. */
-
-	if (rpciod_up() != 0) {
-		dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+	if (rpciod_up() != 0)
 		goto out_clnt;
-	}
-
-	cb->cb_client = clnt;
 
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
 		goto out_rpciod;
-	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
@@ -462,7 +450,7 @@
 	rpciod_down();
 	cb->cb_client = NULL;
 out_clnt:
-	rpc_shutdown_client(clnt);
+	rpc_shutdown_client(cb->cb_client);
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da750..e35d7e5 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
 *
 */
 
-
+#include <linux/err.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfs4.h>
@@ -87,34 +87,35 @@
 nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
-	struct crypto_tfm *tfm;
+	struct hash_desc desc;
 	struct scatterlist sg[1];
 	int status = nfserr_resource;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
-	tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL)
-		goto out;
-	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		goto out_no_tfm;
+	cksum.len = crypto_hash_digestsize(desc.tfm);
 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 	if (cksum.data == NULL)
  		goto out;
-	crypto_digest_init(tfm);
 
 	sg[0].page = virt_to_page(clname->data);
 	sg[0].offset = offset_in_page(clname->data);
 	sg[0].length = clname->len;
 
-	crypto_digest_update(tfm, sg, 1);
-	crypto_digest_final(tfm, cksum.data);
+	if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
+		goto out;
 
 	md5_to_hex(dname, cksum.data);
 
 	kfree(cksum.data);
 	status = nfs_ok;
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(desc.tfm);
+out_no_tfm:
 	return status;
 }
 
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be84..9fb8132 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@
 	file.o 			\
 	heartbeat.o 		\
 	inode.o 		\
+	ioctl.o 		\
 	journal.o 		\
 	localalloc.o 		\
 	mmap.o 			\
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05..f43bc5f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@
 
 			ocfs2_remove_from_cache(inode, eb_bh);
 
-			BUG_ON(eb->h_suballoc_slot);
 			BUG_ON(el->l_recs[0].e_clusters);
 			BUG_ON(el->l_recs[0].e_cpos);
 			BUG_ON(el->l_recs[0].e_blkno);
-			status = ocfs2_free_extent_block(handle,
-							 tc->tc_ext_alloc_inode,
-							 tc->tc_ext_alloc_bh,
-							 eb);
-			if (status < 0) {
-				mlog_errno(status);
-				goto bail;
+			if (eb->h_suballoc_slot == 0) {
+				/*
+				 * This code only understands how to
+				 * lock the suballocator in slot 0,
+				 * which is fine because allocation is
+				 * only ever done out of that
+				 * suballocator too. A future version
+				 * might change that however, so avoid
+				 * a free if we don't know how to
+				 * handle it. This way an fs incompat
+				 * bit will not be necessary.
+				 */
+				status = ocfs2_free_extent_block(handle,
+								 tc->tc_ext_alloc_inode,
+								 tc->tc_ext_alloc_bh,
+								 eb);
+				if (status < 0) {
+					mlog_errno(status);
+					goto bail;
+				}
 			}
 		}
 		brelse(eb_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c34..3d7c082 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@
 static int ocfs2_commit_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
-	int ret, extending = 0, locklevel = 0;
-	loff_t new_i_size;
+	int ret;
 	struct buffer_head *di_bh = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_dinode *di;
 
 	mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
 
 	/* NOTE: ocfs2_file_aio_write has ensured that it's safe for
-	 * us to sample inode->i_size here without the metadata lock:
+	 * us to continue here without rechecking the I/O against
+	 * changed inode values.
 	 *
 	 * 1) We're currently holding the inode alloc lock, so no
 	 *    nodes can change it underneath us.
 	 *
 	 * 2) We've had to take the metadata lock at least once
-	 *    already to check for extending writes, hence insuring
-	 *    that our current copy is also up to date.
+	 *    already to check for extending writes, suid removal, etc.
+	 *    The meta data update code then ensures that we don't get a
+	 *    stale inode allocation image (i_size, i_clusters, etc).
 	 */
-	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (new_i_size > i_size_read(inode)) {
-		extending = 1;
-		locklevel = 1;
-	}
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page);
+	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
 	if (ret != 0) {
 		mlog_errno(ret);
 		goto out;
@@ -427,23 +424,20 @@
 		goto out_unlock_meta;
 	}
 
-	if (extending) {
-		handle = ocfs2_start_walk_page_trans(inode, page, from, to);
-		if (IS_ERR(handle)) {
-			ret = PTR_ERR(handle);
-			handle = NULL;
-			goto out_unlock_data;
-		}
+	handle = ocfs2_start_walk_page_trans(inode, page, from, to);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_unlock_data;
+	}
 
-		/* Mark our buffer early. We'd rather catch this error up here
-		 * as opposed to after a successful commit_write which would
-		 * require us to set back inode->i_size. */
-		ret = ocfs2_journal_access(handle, inode, di_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	/* Mark our buffer early. We'd rather catch this error up here
+	 * as opposed to after a successful commit_write which would
+	 * require us to set back inode->i_size. */
+	ret = ocfs2_journal_access(handle, inode, di_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
 	/* might update i_size */
@@ -453,37 +447,28 @@
 		goto out_commit;
 	}
 
-	if (extending) {
-		loff_t size = (u64) i_size_read(inode);
-		struct ocfs2_dinode *di =
-			(struct ocfs2_dinode *)di_bh->b_data;
+	di = (struct ocfs2_dinode *)di_bh->b_data;
 
-		/* ocfs2_mark_inode_dirty is too heavy to use here. */
-		inode->i_blocks = ocfs2_align_bytes_to_sectors(size);
-		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	/* ocfs2_mark_inode_dirty() is too heavy to use here. */
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 
-		di->i_size = cpu_to_le64(size);
-		di->i_ctime = di->i_mtime = 
-				cpu_to_le64(inode->i_mtime.tv_sec);
-		di->i_ctime_nsec = di->i_mtime_nsec = 
-				cpu_to_le32(inode->i_mtime.tv_nsec);
+	inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+	di->i_size = cpu_to_le64((u64)i_size_read(inode));
 
-		ret = ocfs2_journal_dirty(handle, di_bh);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	ret = ocfs2_journal_dirty(handle, di_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
-	BUG_ON(extending && (i_size_read(inode) != new_i_size));
-
 out_commit:
-	if (handle)
-		ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(handle);
 out_unlock_data:
 	ocfs2_data_unlock(inode, 1);
 out_unlock_meta:
-	ocfs2_meta_unlock(inode, locklevel);
+	ocfs2_meta_unlock(inode, 1);
 out:
 	if (di_bh)
 		brelse(di_bh);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf..c903741 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@
 	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
 		   (unsigned long long)block, nr, flags, inode);
 
+	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+	       (!inode || !(flags & OCFS2_BH_CACHED)));
+
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
 		mlog_errno(status);
@@ -140,6 +143,30 @@
 		bh = bhs[i];
 		ignore_cache = 0;
 
+		/* There are three read-ahead cases here which we need to
+		 * be concerned with. All three assume a buffer has
+		 * previously been submitted with OCFS2_BH_READAHEAD
+		 * and it hasn't yet completed I/O.
+		 *
+		 * 1) The current request is sync to disk. This rarely
+		 *    happens these days, and never when performance
+		 *    matters - the code can just wait on the buffer
+		 *    lock and re-submit.
+		 *
+		 * 2) The current request is cached, but not
+		 *    readahead. ocfs2_buffer_uptodate() will return
+		 *    false anyway, so we'll wind up waiting on the
+		 *    buffer lock to do I/O. We re-check the request
+		 *    with after getting the lock to avoid a re-submit.
+		 *
+		 * 3) The current request is readahead (and so must
+		 *    also be a caching one). We short circuit if the
+		 *    buffer is locked (under I/O) and if it's in the
+		 *    uptodate cache. The re-check from #2 catches the
+		 *    case that the previous read-ahead completes just
+		 *    before our is-it-in-flight check.
+		 */
+
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@
 				continue;
 			}
 
+			/* A read-ahead request was made - if the
+			 * buffer is already under read-ahead from a
+			 * previously submitted request than we are
+			 * done here. */
+			if ((flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_read_ahead(inode, bh))
+				continue;
+
 			lock_buffer(bh);
 			if (buffer_jbd(bh)) {
 #ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@
 				continue;
 #endif
 			}
+
+			/* Re-check ocfs2_buffer_uptodate() as a
+			 * previously read-ahead buffer may have
+			 * completed I/O while we were waiting for the
+			 * buffer lock. */
+			if ((flags & OCFS2_BH_CACHED)
+			    && !(flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_uptodate(inode, bh)) {
+				unlock_buffer(bh);
+				continue;
+			}
+
 			clear_buffer_uptodate(bh);
 			get_bh(bh); /* for end_buffer_read_sync() */
 			bh->b_end_io = end_buffer_read_sync;
-			if (flags & OCFS2_BH_READAHEAD)
-				submit_bh(READA, bh);
-			else
-				submit_bh(READ, bh);
+			submit_bh(READ, bh);
 			continue;
 		}
 	}
@@ -197,34 +241,39 @@
 	for (i = (nr - 1); i >= 0; i--) {
 		bh = bhs[i];
 
-		/* We know this can't have changed as we hold the
-		 * inode sem. Avoid doing any work on the bh if the
-		 * journal has it. */
-		if (!buffer_jbd(bh))
-			wait_on_buffer(bh);
+		if (!(flags & OCFS2_BH_READAHEAD)) {
+			/* We know this can't have changed as we hold the
+			 * inode sem. Avoid doing any work on the bh if the
+			 * journal has it. */
+			if (!buffer_jbd(bh))
+				wait_on_buffer(bh);
 
-		if (!buffer_uptodate(bh)) {
-			/* Status won't be cleared from here on out,
-			 * so we can safely record this and loop back
-			 * to cleanup the other buffers. Don't need to
-			 * remove the clustered uptodate information
-			 * for this bh as it's not marked locally
-			 * uptodate. */
-			status = -EIO;
-			brelse(bh);
-			bhs[i] = NULL;
-			continue;
+			if (!buffer_uptodate(bh)) {
+				/* Status won't be cleared from here on out,
+				 * so we can safely record this and loop back
+				 * to cleanup the other buffers. Don't need to
+				 * remove the clustered uptodate information
+				 * for this bh as it's not marked locally
+				 * uptodate. */
+				status = -EIO;
+				brelse(bh);
+				bhs[i] = NULL;
+				continue;
+			}
 		}
 
+		/* Always set the buffer in the cache, even if it was
+		 * a forced read, or read-ahead which hasn't yet
+		 * completed. */
 		if (inode)
 			ocfs2_set_buffer_uptodate(inode, bh);
 	}
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
 	     (unsigned long long)block, nr,
-	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
 
 bail:
 
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb909..6cc2093 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@
 
 
 #define OCFS2_BH_CACHED            1
-#define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
+#define OCFS2_BH_READAHEAD         8
 
 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
 				   struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d..305cba3 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@
 		max_pages = q->max_hw_segments;
 	max_pages--; /* Handle I/Os that straddle a page */
 
-	max_sectors = max_pages << (PAGE_SHIFT - 9);
-
+	if (max_pages) {
+		max_sectors = max_pages << (PAGE_SHIFT - 9);
+	} else {
+		/* If BIO contains 1 or less than 1 page. */
+		max_sectors = q->max_sectors;
+	}
 	/* Why is fls() 1-based???? */
 	pow_two_sectors = 1 << (fls(max_sectors) - 1);
 
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1..04e0191 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@
 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	unsigned long offset, blk;
-	int i, num, stored;
+	unsigned long offset, blk, last_ra_blk = 0;
+	int i, stored;
 	struct buffer_head * bh, * tmp;
 	struct ocfs2_dir_entry * de;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
-	int have_disk_lock = 0;
+	unsigned int ra_sectors = 16;
 
 	mlog_entry("dirino=%llu\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@
 			mlog_errno(error);
 		/* we haven't got any yet, so propagate the error. */
 		stored = error;
-		goto bail;
+		goto bail_nolock;
 	}
-	have_disk_lock = 1;
 
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
@@ -113,16 +112,21 @@
 			continue;
 		}
 
-		/*
-		 * Do the readahead (8k)
-		 */
-		if (!offset) {
-			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
+		/* The idea here is to begin with 8k read-ahead and to stay
+		 * 4k ahead of our current position.
+		 *
+		 * TODO: Use the pagecache for this. We just need to
+		 * make sure it's cluster-safe... */
+		if (!last_ra_blk
+		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
+			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
 			     i > 0; i--) {
 				tmp = ocfs2_bread(inode, ++blk, &err, 1);
 				if (tmp)
 					brelse(tmp);
 			}
+			last_ra_blk = blk;
+			ra_sectors = 8;
 		}
 
 revalidate:
@@ -194,9 +198,9 @@
 
 	stored = 0;
 bail:
-	if (have_disk_lock)
-		ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, 0);
 
+bail_nolock:
 	mlog_exit(stored);
 
 	return stored;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2..f13a4ba 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -367,12 +367,10 @@
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
-		       "name=%.*s, namelen=%u\n", 
-		       past->type == DLM_AST ? "" : "b", 
-		       dlm_get_lock_cookie_node(cookie),
-		       dlm_get_lock_cookie_seq(cookie),
-		       locklen, name, locklen);
+	mlog(0, "got %sast for unknown lock!  cookie=%u:%llu, "
+	     "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 
+	     dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
+	     locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1f..151b417 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1330,6 +1330,7 @@
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
+	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 
 	mlog_meta_lvb(0, lockres);
 
@@ -1360,6 +1361,9 @@
 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
 
+	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
+	ocfs2_set_inode_flags(inode);
+
 	/* fast-symlinks are a special case */
 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
 		inode->i_blocks = 0;
@@ -2899,8 +2903,9 @@
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
 	     be16_to_cpu(lvb->lvb_imode));
 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
-	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
-	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
+	     be32_to_cpu(lvb->lvb_iattr));
 }
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db..243ae86 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,7 +27,7 @@
 #ifndef DLMGLUE_H
 #define DLMGLUE_H
 
-#define OCFS2_LVB_VERSION 2
+#define OCFS2_LVB_VERSION 3
 
 struct ocfs2_meta_lvb {
 	__be32       lvb_version;
@@ -40,7 +40,8 @@
 	__be64       lvb_isize;
 	__be16       lvb_imode;
 	__be16       lvb_inlink;
-	__be32       lvb_reserved[3];
+	__be32       lvb_iattr;
+	__be32       lvb_reserved[2];
 };
 
 /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c8..2bbfa17 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
 #include "file.h"
 #include "sysfile.h"
 #include "inode.h"
+#include "ioctl.h"
 #include "journal.h"
 #include "mmap.h"
 #include "suballoc.h"
@@ -1227,10 +1228,12 @@
 	.open		= ocfs2_file_open,
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
+	.ioctl		= ocfs2_ioctl,
 };
 
 const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
+	.ioctl		= ocfs2_ioctl,
 };
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7..7bcf691 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -71,6 +71,26 @@
 				    struct inode *inode,
 				    struct buffer_head *fe_bh);
 
+void ocfs2_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = OCFS2_I(inode)->ip_attr;
+
+	inode->i_flags &= ~(S_IMMUTABLE |
+		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+
+	if (flags & OCFS2_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+
+	if (flags & OCFS2_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & OCFS2_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & OCFS2_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & OCFS2_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
 struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 				     u64 blkno,
 				     int delete_vote)
@@ -260,7 +280,6 @@
 		inode->i_blocks =
 			ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
 	inode->i_mapping->a_ops = &ocfs2_aops;
-	inode->i_flags |= S_NOATIME;
 	inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
 	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,6 +295,7 @@
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
 
 	if (create_ino)
 		inode->i_ino = ino_from_blkno(inode->i_sb,
@@ -330,6 +350,9 @@
 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
 				  OCFS2_LOCK_TYPE_DATA, inode);
 
+	ocfs2_set_inode_flags(inode);
+	inode->i_flags |= S_NOATIME;
+
 	status = 0;
 bail:
 	mlog_exit(status);
@@ -1027,12 +1050,8 @@
 	u64 p_blkno;
 	int readflags = OCFS2_BH_CACHED;
 
-#if 0
-	/* only turn this on if we know we can deal with read_block
-	 * returning nothing */
 	if (reada)
 		readflags |= OCFS2_BH_READAHEAD;
-#endif
 
 	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
 	    i_size_read(inode)) {
@@ -1131,6 +1150,7 @@
 
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
+	fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 	fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1189,8 @@
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
+	ocfs2_set_inode_flags(inode);
 	i_size_write(inode, le64_to_cpu(fe->i_size));
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 	inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6..4d1e539 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@
 	struct ocfs2_journal_handle	*ip_handle;
 
 	u32				ip_flags; /* see below */
+	u32				ip_attr; /* inode attributes */
 
 	/* protected by recovery_lock. */
 	struct inode			*ip_next_orphan;
@@ -142,4 +143,6 @@
 int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
 int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
 
+void ocfs2_set_inode_flags(struct inode *inode);
+
 #endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 0000000..3663cef
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,136 @@
+/*
+ * linux/fs/ocfs2/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "inode.h"
+#include "journal.h"
+
+#include "ocfs2_fs.h"
+#include "ioctl.h"
+
+#include <linux/ext2_fs.h>
+
+static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
+{
+	int status;
+
+	status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	if (status < 0) {
+		mlog_errno(status);
+		return status;
+	}
+	*flags = OCFS2_I(inode)->ip_attr;
+	ocfs2_meta_unlock(inode, 0);
+
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+				unsigned mask)
+{
+	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_journal_handle *handle = NULL;
+	struct buffer_head *bh = NULL;
+	unsigned oldflags;
+	int status;
+
+	mutex_lock(&inode->i_mutex);
+
+	status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = -EROFS;
+	if (IS_RDONLY(inode))
+		goto bail_unlock;
+
+	status = -EACCES;
+	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+		goto bail_unlock;
+
+	if (!S_ISDIR(inode->i_mode))
+		flags &= ~OCFS2_DIRSYNC_FL;
+
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto bail_unlock;
+	}
+
+	oldflags = ocfs2_inode->ip_attr;
+	flags = flags & mask;
+	flags |= oldflags & ~mask;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 */
+	status = -EPERM;
+	if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
+		(OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto bail_unlock;
+	}
+
+	ocfs2_inode->ip_attr = flags;
+	ocfs2_set_inode_flags(inode);
+
+	status = ocfs2_mark_inode_dirty(handle, inode, bh);
+	if (status < 0)
+		mlog_errno(status);
+
+	ocfs2_commit_trans(handle);
+bail_unlock:
+	ocfs2_meta_unlock(inode, 1);
+bail:
+	mutex_unlock(&inode->i_mutex);
+
+	if (bh)
+		brelse(bh);
+
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg)
+{
+	unsigned int flags;
+	int status;
+
+	switch (cmd) {
+	case OCFS2_IOC_GETFLAGS:
+		status = ocfs2_get_inode_attr(inode, &flags);
+		if (status < 0)
+			return status;
+
+		flags &= OCFS2_FL_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case OCFS2_IOC_SETFLAGS:
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		return ocfs2_set_inode_attr(inode, flags,
+			OCFS2_FL_MODIFIABLE);
+	default:
+		return -ENOTTY;
+	}
+}
+
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 0000000..4a7c829
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
+/*
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ *
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862..0d3e939 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
 #include "journal.h"
 #include "namei.h"
 #include "suballoc.h"
+#include "super.h"
 #include "symlink.h"
 #include "sysfile.h"
 #include "uptodate.h"
@@ -310,13 +311,6 @@
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
-	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
-		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
-		status = -EMLINK;
-		goto leave;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		status = -ENOMEM;
@@ -331,6 +325,11 @@
 		goto leave;
 	}
 
+	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
+		status = -EMLINK;
+		goto leave;
+	}
+
 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
 	if (!dirfe->i_links_count) {
 		/* can't make a file in a deleted directory. */
@@ -643,11 +642,6 @@
 		goto bail;
 	}
 
-	if (inode->i_nlink >= OCFS2_LINK_MAX) {
-		err = -EMLINK;
-		goto bail;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		err = -ENOMEM;
@@ -661,6 +655,11 @@
 		goto bail;
 	}
 
+	if (!dir->i_nlink) {
+		err = -ENOENT;
+		goto bail;
+	}
+
 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
 					dentry->d_name.len);
 	if (err)
@@ -1964,13 +1963,8 @@
 				}
 				num++;
 
-				/* XXX: questionable readahead stuff here */
 				bh = ocfs2_bread(dir, b++, &err, 1);
 				bh_use[ra_max] = bh;
-#if 0		// ???
-				if (bh)
-					ll_rw_block(READ, 1, &bh);
-#endif
 			}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1978,6 +1972,10 @@
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
+			ocfs2_error(dir->i_sb, "reading directory %llu, "
+				    "offset %lu\n",
+				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
+				    block);
 			brelse(bh);
 			goto next;
 		}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac5..3330a5d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
 #define OCFS2_DEALLOC_FL	(0x00000800)	/* Truncate log */
 
+/* Inode attributes, keep in sync with EXT2 */
+#define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
+#define OCFS2_UNRM_FL		(0x00000002)	/* Undelete */
+#define OCFS2_COMPR_FL		(0x00000004)	/* Compress file */
+#define OCFS2_SYNC_FL		(0x00000008)	/* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL	(0x00000010)	/* Immutable file */
+#define OCFS2_APPEND_FL		(0x00000020)	/* writes to file may only append */
+#define OCFS2_NODUMP_FL		(0x00000040)	/* do not dump file */
+#define OCFS2_NOATIME_FL	(0x00000080)	/* do not update atime */
+#define OCFS2_DIRSYNC_FL	(0x00010000)	/* dirsync behaviour (directories only) */
+
+#define OCFS2_FL_VISIBLE	(0x000100FF)	/* User visible flags */
+#define OCFS2_FL_MODIFIABLE	(0x000100FF)	/* User modifiable flags */
+
+/*
+ * ioctl commands
+ */
+#define OCFS2_IOC_GETFLAGS	_IOR('f', 1, long)
+#define OCFS2_IOC_SETFLAGS	_IOW('f', 2, long)
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
@@ -399,7 +419,9 @@
 	__le32 i_atime_nsec;
 	__le32 i_ctime_nsec;
 	__le32 i_mtime_nsec;
-/*70*/	__le64 i_reserved1[9];
+	__le32 i_attr;
+	__le32 i_reserved1;
+/*70*/	__le64 i_reserved2[8];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a7..9707ed7 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@
 }
 
 /* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. */
+ * the block is stored in our inode metadata cache. 
+ * 
+ * This can be called under lock_buffer()
+ */
 int ocfs2_buffer_uptodate(struct inode *inode,
 			  struct buffer_head *bh)
 {
@@ -226,6 +229,16 @@
 	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
 }
 
+/* 
+ * Determine whether a buffer is currently out on a read-ahead request.
+ * ip_io_sem should be held to serialize submitters with the logic here.
+ */
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh)
+{
+	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+}
+
 /* Requires ip_lock */
 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
 				     sector_t block)
@@ -403,7 +416,11 @@
  *
  * Note that this function may actually fail to insert the block if
  * memory cannot be allocated. This is not fatal however (but may
- * result in a performance penalty) */
+ * result in a performance penalty)
+ *
+ * Readahead buffers can be passed in here before the I/O request is
+ * completed.
+ */
 void ocfs2_set_buffer_uptodate(struct inode *inode,
 			       struct buffer_head *bh)
 {
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d..2e73206 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@
 				   struct buffer_head *bh);
 void ocfs2_remove_from_cache(struct inode *inode,
 			     struct buffer_head *bh);
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh);
 
 #endif /* OCFS2_UPTODATE_H */
diff --git a/include/Kbuild b/include/Kbuild
index cb25348..2d03f99 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -1,2 +1,9 @@
-header-y += asm-generic/ linux/ scsi/ sound/ mtd/ rdma/ video/
-header-y += asm-$(ARCH)/ 
+header-y += asm-generic/
+header-y += linux/
+header-y += scsi/
+header-y += sound/
+header-y += mtd/
+header-y += rdma/
+header-y += video/
+
+header-y += asm-$(ARCH)/
diff --git a/include/asm-alpha/Kbuild b/include/asm-alpha/Kbuild
index 2b06b3b..b7c8f18 100644
--- a/include/asm-alpha/Kbuild
+++ b/include/asm-alpha/Kbuild
@@ -1,5 +1,11 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += console.h fpu.h sysinfo.h compiler.h
+header-y += gentrap.h
+header-y += regdef.h
+header-y += pal.h
+header-y += reg.h
 
-header-y += gentrap.h regdef.h pal.h reg.h
+unifdef-y += console.h
+unifdef-y += fpu.h
+unifdef-y += sysinfo.h
+unifdef-y += compiler.h
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 70594b2..3c06be3 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -1,3 +1,12 @@
-header-y += atomic.h errno-base.h errno.h fcntl.h ioctl.h ipc.h mman.h \
-	signal.h statfs.h
-unifdef-y := resource.h siginfo.h
+header-y += atomic.h
+header-y += errno-base.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ipc.h
+header-y += mman.h
+header-y += signal.h
+header-y += statfs.h
+
+unifdef-y += resource.h
+unifdef-y += siginfo.h
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index c00de60..a84c3d8 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,8 +1,34 @@
-unifdef-y += a.out.h auxvec.h byteorder.h errno.h fcntl.h ioctl.h	\
-	ioctls.h ipcbuf.h mman.h msgbuf.h param.h poll.h		\
-	posix_types.h ptrace.h resource.h sembuf.h shmbuf.h shmparam.h	\
-	sigcontext.h siginfo.h signal.h socket.h sockios.h stat.h	\
-	statfs.h termbits.h termios.h types.h unistd.h user.h
+unifdef-y += a.out.h
+unifdef-y += auxvec.h
+unifdef-y += byteorder.h
+unifdef-y += errno.h
+unifdef-y += fcntl.h
+unifdef-y += ioctl.h
+unifdef-y += ioctls.h
+unifdef-y += ipcbuf.h
+unifdef-y += mman.h
+unifdef-y += msgbuf.h
+unifdef-y += param.h
+unifdef-y += poll.h
+unifdef-y += posix_types.h
+unifdef-y += ptrace.h
+unifdef-y += resource.h
+unifdef-y += sembuf.h
+unifdef-y += shmbuf.h
+unifdef-y += sigcontext.h
+unifdef-y += siginfo.h
+unifdef-y += signal.h
+unifdef-y += socket.h
+unifdef-y += sockios.h
+unifdef-y += stat.h
+unifdef-y += statfs.h
+unifdef-y += termbits.h
+unifdef-y += termios.h
+unifdef-y += types.h
+unifdef-y += unistd.h
+unifdef-y += user.h
 
 # These probably shouldn't be exported
-unifdef-y += elf.h page.h
+unifdef-y += shmparam.h
+unifdef-y += elf.h
+unifdef-y += page.h
diff --git a/include/asm-generic/audit_change_attr.h b/include/asm-generic/audit_change_attr.h
index cb05bf6..5076455 100644
--- a/include/asm-generic/audit_change_attr.h
+++ b/include/asm-generic/audit_change_attr.h
@@ -1,16 +1,20 @@
 __NR_chmod,
 __NR_fchmod,
+#ifdef __NR_chown
 __NR_chown,
 __NR_fchown,
 __NR_lchown,
+#endif
 __NR_setxattr,
 __NR_lsetxattr,
 __NR_fsetxattr,
 __NR_removexattr,
 __NR_lremovexattr,
 __NR_fremovexattr,
+#ifdef __NR_fchownat
 __NR_fchownat,
 __NR_fchmodat,
+#endif
 #ifdef __NR_chown32
 __NR_chown32,
 __NR_fchown32,
diff --git a/include/asm-generic/audit_dir_write.h b/include/asm-generic/audit_dir_write.h
index 161a7a5..6621bd8 100644
--- a/include/asm-generic/audit_dir_write.h
+++ b/include/asm-generic/audit_dir_write.h
@@ -1,14 +1,18 @@
 __NR_rename,
 __NR_mkdir,
 __NR_rmdir,
+#ifdef __NR_creat
 __NR_creat,
+#endif
 __NR_link,
 __NR_unlink,
 __NR_symlink,
 __NR_mknod,
+#ifdef __NR_mkdirat
 __NR_mkdirat,
 __NR_mknodat,
 __NR_unlinkat,
 __NR_renameat,
 __NR_linkat,
 __NR_symlinkat,
+#endif
diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
index 2308190..b75a348 100644
--- a/include/asm-i386/Kbuild
+++ b/include/asm-i386/Kbuild
@@ -1,5 +1,10 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += boot.h debugreg.h ldt.h ucontext.h
+header-y += boot.h
+header-y += debugreg.h
+header-y += ldt.h
+header-y += ucontext.h
 
-unifdef-y += mtrr.h setup.h vm86.h
+unifdef-y += mtrr.h
+unifdef-y += setup.h
+unifdef-y += vm86.h
diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild
index f1cb00f..15818a1 100644
--- a/include/asm-ia64/Kbuild
+++ b/include/asm-ia64/Kbuild
@@ -1,7 +1,17 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += break.h fpu.h fpswa.h gcc_intrin.h ia64regs.h		\
-	 intel_intrin.h intrinsics.h perfmon_default_smpl.h	\
-	 ptrace_offsets.h rse.h setup.h ucontext.h
+header-y += break.h
+header-y += fpu.h
+header-y += fpswa.h
+header-y += gcc_intrin.h
+header-y += ia64regs.h
+header-y += intel_intrin.h
+header-y += intrinsics.h
+header-y += perfmon_default_smpl.h
+header-y += ptrace_offsets.h
+header-y += rse.h
+header-y += setup.h
+header-y += ucontext.h
 
-unifdef-y += perfmon.h ustack.h
+unifdef-y += perfmon.h
+unifdef-y += ustack.h
diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h
index db017f8..fcc165d 100644
--- a/include/asm-m68k/page.h
+++ b/include/asm-m68k/page.h
@@ -2,6 +2,8 @@
 #define _M68K_PAGE_H
 
 
+#ifdef __KERNEL__
+
 /* PAGE_SHIFT determines the page size */
 #ifndef CONFIG_SUN3
 #define PAGE_SHIFT	(12)
@@ -15,8 +17,6 @@
 #endif
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-#ifdef __KERNEL__
-
 #include <asm/setup.h>
 
 #if PAGE_SHIFT < 13
@@ -175,8 +175,8 @@
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#endif /* __KERNEL__ */
-
 #include <asm-generic/page.h>
 
+#endif /* __KERNEL__ */
+
 #endif /* _M68K_PAGE_H */
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index 0695bc9..57d6d82 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -1,22 +1,14 @@
 #ifndef _PARISC_PAGE_H
 #define _PARISC_PAGE_H
 
-#if !defined(__KERNEL__)
-/* this is for userspace applications (4k page size) */
-# define PAGE_SHIFT	12	/* 4k */
-# define PAGE_SIZE	(1UL << PAGE_SHIFT)
-# define PAGE_MASK	(~(PAGE_SIZE-1))
-#endif
-
-
 #ifdef __KERNEL__
 
 #if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-# define PAGE_SHIFT	12	/* 4k */
+# define PAGE_SHIFT	12
 #elif defined(CONFIG_PARISC_PAGE_SIZE_16KB)
-# define PAGE_SHIFT	14	/* 16k */
+# define PAGE_SHIFT	14
 #elif defined(CONFIG_PARISC_PAGE_SIZE_64KB)
-# define PAGE_SHIFT	16	/* 64k */
+# define PAGE_SHIFT	16
 #else
 # error "unknown default kernel page size"
 #endif
@@ -188,9 +180,9 @@
 #define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#endif /* __KERNEL__ */
-
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
+#endif /* __KERNEL__ */
+
 #endif /* _PARISC_PAGE_H */
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
index ac61d7e..9827849 100644
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -1,10 +1,41 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += a.out.h asm-compat.h bootx.h byteorder.h cputable.h elf.h	\
-	nvram.h param.h posix_types.h ptrace.h seccomp.h signal.h	\
-	termios.h types.h unistd.h
+header-y += auxvec.h
+header-y += ioctls.h
+header-y += mman.h
+header-y += sembuf.h
+header-y += siginfo.h
+header-y += stat.h
+header-y += errno.h
+header-y += ipcbuf.h
+header-y += msgbuf.h
+header-y += shmbuf.h
+header-y += socket.h
+header-y += termbits.h
+header-y += fcntl.h
+header-y += ipc.h
+header-y += poll.h
+header-y += shmparam.h
+header-y += sockios.h
+header-y += ucontext.h
+header-y += ioctl.h
+header-y += linkage.h
+header-y += resource.h
+header-y += sigcontext.h
+header-y += statfs.h
 
-header-y += auxvec.h ioctls.h mman.h sembuf.h siginfo.h stat.h errno.h	\
-	ipcbuf.h msgbuf.h shmbuf.h socket.h termbits.h fcntl.h ipc.h	\
-	poll.h shmparam.h sockios.h ucontext.h ioctl.h linkage.h	\
-	resource.h sigcontext.h statfs.h
+unifdef-y += a.out.h
+unifdef-y += asm-compat.h
+unifdef-y += bootx.h
+unifdef-y += byteorder.h
+unifdef-y += cputable.h
+unifdef-y += elf.h
+unifdef-y += nvram.h
+unifdef-y += param.h
+unifdef-y += posix_types.h
+unifdef-y += ptrace.h
+unifdef-y += seccomp.h
+unifdef-y += signal.h
+unifdef-y += termios.h
+unifdef-y += types.h
+unifdef-y += unistd.h
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 1ba3c99..12707ab 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -23,6 +23,7 @@
 #define PPC_FEATURE_SMT			0x00004000
 #define PPC_FEATURE_ICACHE_SNOOP	0x00002000
 #define PPC_FEATURE_ARCH_2_05		0x00001000
+#define PPC_FEATURE_PA6T		0x00000800
 
 #define PPC_FEATURE_TRUE_LE		0x00000002
 #define PPC_FEATURE_PPC_LE		0x00000001
@@ -36,6 +37,7 @@
 struct cpu_spec;
 
 typedef	void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec);
+typedef	void (*cpu_restore_t)(void);
 
 enum powerpc_oprofile_type {
 	PPC_OPROFILE_INVALID = 0,
@@ -65,6 +67,8 @@
 	 * BHT, SPD, etc... from head.S before branching to identify_machine
 	 */
 	cpu_setup_t	cpu_setup;
+	/* Used to restore cpu setup on secondary processors and at resume */
+	cpu_restore_t	cpu_restore;
 
 	/* Used by oprofile userspace to select the right counters */
 	char		*oprofile_cpu_type;
@@ -145,7 +149,7 @@
 
 #define CPU_FTR_PPCAS_ARCH_V2_BASE (CPU_FTR_SLB | \
 					CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
-					CPU_FTR_NODSISRALIGN | CPU_FTR_CTRL)
+					CPU_FTR_NODSISRALIGN)
 
 /* iSeries doesn't support large pages */
 #ifdef CONFIG_PPC_ISERIES
@@ -310,24 +314,29 @@
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \
 	    CPU_FTR_MMCRA | CPU_FTR_CTRL)
 #define CPU_FTRS_POWER4	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA)
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_MMCRA)
 #define CPU_FTRS_PPC970	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA)
 #define CPU_FTRS_POWER5	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR)
 #define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_REAL_LE)
 #define CPU_FTRS_CELL	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
-	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_CTRL | CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
+	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
+#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
+	    CPU_FTR_PURR | CPU_FTR_REAL_LE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2)
 #endif
@@ -336,7 +345,7 @@
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
-	    CPU_FTRS_CELL | CPU_FTR_CI_LARGE_PAGE)
+	    CPU_FTRS_CELL | CPU_FTRS_PA6T)
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
@@ -375,7 +384,7 @@
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
 	    CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 &	\
-	    CPU_FTRS_CELL & CPU_FTRS_POSSIBLE)
+	    CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
 #else
 enum {
 	CPU_FTRS_ALWAYS =
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h
index 0d3c4e8..257d1ce 100644
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -164,9 +164,15 @@
 #define H_VIO_SIGNAL		0x104
 #define H_SEND_CRQ		0x108
 #define H_COPY_RDMA		0x110
+#define H_REGISTER_LOGICAL_LAN	0x114
+#define H_FREE_LOGICAL_LAN	0x118
+#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
+#define H_SEND_LOGICAL_LAN	0x120
+#define H_MULTICAST_CTRL	0x130
 #define H_SET_XDABR		0x134
 #define H_STUFF_TCE		0x138
 #define H_PUT_TCE_INDIRECT	0x13C
+#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
 #define H_VTERM_PARTNER_INFO	0x150
 #define H_REGISTER_VTERM	0x154
 #define H_FREE_VTERM		0x158
@@ -196,102 +202,59 @@
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING		0x1D8
 #define H_JOIN			0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ		0x2B0
+#define MAX_HCALL_OPCODE	H_ENABLE_CRQ
 
 #ifndef __ASSEMBLY__
 
-/* plpar_hcall() -- Generic call interface using above opcodes
+/**
+ * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
+ * @opcode: The hypervisor call to make.
  *
- * The actual call interface is a hypervisor call instruction with
- * the opcode in R3 and input args in R4-R7.
- * Status is returned in R3 with variable output values in R4-R11.
- * Only H_PTE_READ with H_READ_4 uses R6-R11 so we ignore it for now
- * and return only two out args which MUST ALWAYS BE PROVIDED.
- */
-long plpar_hcall(unsigned long opcode,
-		 unsigned long arg1,
-		 unsigned long arg2,
-		 unsigned long arg3,
-		 unsigned long arg4,
-		 unsigned long *out1,
-		 unsigned long *out2,
-		 unsigned long *out3);
-
-/* Same as plpar_hcall but for those opcodes that return no values
- * other than status.  Slightly more efficient.
+ * This call supports up to 7 arguments and only returns the status of
+ * the hcall. Use this version where possible, its slightly faster than
+ * the other plpar_hcalls.
  */
 long plpar_hcall_norets(unsigned long opcode, ...);
 
-/*
- * Special hcall interface for ibmveth support.
- * Takes 8 input parms. Returns a rc and stores the
- * R4 return value in *out1.
- */
-long plpar_hcall_8arg_2ret(unsigned long opcode,
-			   unsigned long arg1,
-			   unsigned long arg2,
-			   unsigned long arg3,
-			   unsigned long arg4,
-			   unsigned long arg5,
-			   unsigned long arg6,
-			   unsigned long arg7,
-			   unsigned long arg8,
-			   unsigned long *out1);
-
-/* plpar_hcall_4out()
+/**
+ * plpar_hcall: - Make a pseries hypervisor call
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 4 return arguments in.
  *
- * same as plpar_hcall except with 4 output arguments.
+ * This call supports up to 6 arguments and 4 return arguments. Use
+ * PLPAR_HCALL_BUFSIZE to size the return argument buffer.
  *
+ * Used for all but the craziest of phyp interfaces (see plpar_hcall9)
  */
-long plpar_hcall_4out(unsigned long opcode,
-		      unsigned long arg1,
-		      unsigned long arg2,
-		      unsigned long arg3,
-		      unsigned long arg4,
-		      unsigned long *out1,
-		      unsigned long *out2,
-		      unsigned long *out3,
-		      unsigned long *out4);
+#define PLPAR_HCALL_BUFSIZE 4
+long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
 
-long plpar_hcall_7arg_7ret(unsigned long opcode,
-			   unsigned long arg1,
-			   unsigned long arg2,
-			   unsigned long arg3,
-			   unsigned long arg4,
-			   unsigned long arg5,
-			   unsigned long arg6,
-			   unsigned long arg7,
-			   unsigned long *out1,
-			   unsigned long *out2,
-			   unsigned long *out3,
-			   unsigned long *out4,
-			   unsigned long *out5,
-			   unsigned long *out6,
-			   unsigned long *out7);
+/**
+ * plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments
+ * @opcode: The hypervisor call to make.
+ * @retbuf: Buffer to store up to 9 return arguments in.
+ *
+ * This call supports up to 9 arguments and 9 return arguments. Use
+ * PLPAR_HCALL9_BUFSIZE to size the return argument buffer.
+ */
+#define PLPAR_HCALL9_BUFSIZE 9
+long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
-long plpar_hcall_9arg_9ret(unsigned long opcode,
-			   unsigned long arg1,
-			   unsigned long arg2,
-			   unsigned long arg3,
-			   unsigned long arg4,
-			   unsigned long arg5,
-			   unsigned long arg6,
-			   unsigned long arg7,
-			   unsigned long arg8,
-			   unsigned long arg9,
-			   unsigned long *out1,
-			   unsigned long *out2,
-			   unsigned long *out3,
-			   unsigned long *out4,
-			   unsigned long *out5,
-			   unsigned long *out6,
-			   unsigned long *out7,
-			   unsigned long *out8,
-			   unsigned long *out9);
+/* For hcall instrumentation.  One structure per-hcall, per-CPU */
+struct hcall_stats {
+	unsigned long	num_calls;	/* number of calls (on this CPU) */
+	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
+	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+};
+void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
+			unsigned long purr_delta);
+#define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
 
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/ibmebus.h b/include/asm-powerpc/ibmebus.h
index 7a42723..7ab195a 100644
--- a/include/asm-powerpc/ibmebus.h
+++ b/include/asm-powerpc/ibmebus.h
@@ -48,7 +48,7 @@
 extern struct bus_type ibmebus_bus_type;
 
 struct ibmebus_dev {	
-	char *name;
+	const char *name;
 	struct of_device ofdev;
 };
 
diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index b09b42a..c8390f9 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <asm/mpc8xx.h>
 #endif
+#include <asm/io.h>
 
 #ifndef MAX_HWIFS
 #ifdef __powerpc64__
@@ -21,15 +22,14 @@
 #endif
 #endif
 
+#define __ide_mm_insw(p, a, c)	_insw_ns((volatile u16 __iomem *)(p), (a), (c))
+#define __ide_mm_insl(p, a, c)	_insl_ns((volatile u32 __iomem *)(p), (a), (c))
+#define __ide_mm_outsw(p, a, c)	_outsw_ns((volatile u16 __iomem *)(p), (a), (c))
+#define __ide_mm_outsl(p, a, c)	_outsl_ns((volatile u32 __iomem *)(p), (a), (c))
+
 #ifndef  __powerpc64__
 #include <linux/hdreg.h>
 #include <linux/ioport.h>
-#include <asm/io.h>
-
-extern void __ide_mm_insw(void __iomem *port, void *addr, u32 count);
-extern void __ide_mm_outsw(void __iomem *port, void *addr, u32 count);
-extern void __ide_mm_insl(void __iomem *port, void *addr, u32 count);
-extern void __ide_mm_outsl(void __iomem *port, void *addr, u32 count);
 
 struct ide_machdep_calls {
         int         (*default_irq)(unsigned long base);
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 212428d..46bae1c 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -20,20 +20,11 @@
 #include <asm/page.h>
 #include <asm/byteorder.h>
 #include <asm/paca.h>
-#ifdef CONFIG_PPC_ISERIES 
-#include <asm/iseries/iseries_io.h>
-#endif  
 #include <asm/synch.h>
 #include <asm/delay.h>
 
 #include <asm-generic/iomap.h>
 
-#define __ide_mm_insw(p, a, c) _insw_ns((volatile u16 __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c) _insl_ns((volatile u32 __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c) _outsw_ns((volatile u16 __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c) _outsl_ns((volatile u32 __iomem *)(p), (a), (c))
-
-
 #define SIO_CONFIG_RA	0x398
 #define SIO_CONFIG_RD	0x399
 
@@ -43,42 +34,53 @@
 extern unsigned long pci_io_base;
 
 #ifdef CONFIG_PPC_ISERIES
-/* __raw_* accessors aren't supported on iSeries */
-#define __raw_readb(addr)	{ BUG(); 0; }
-#define __raw_readw(addr)       { BUG(); 0; }
-#define __raw_readl(addr)       { BUG(); 0; }
-#define __raw_readq(addr)       { BUG(); 0; }
-#define __raw_writeb(v, addr)   { BUG(); 0; }
-#define __raw_writew(v, addr)   { BUG(); 0; }
-#define __raw_writel(v, addr)   { BUG(); 0; }
-#define __raw_writeq(v, addr)   { BUG(); 0; }
-#define readb(addr)		iSeries_Read_Byte(addr)
-#define readw(addr)		iSeries_Read_Word(addr)
-#define readl(addr)		iSeries_Read_Long(addr)
-#define writeb(data, addr)	iSeries_Write_Byte((data),(addr))
-#define writew(data, addr)	iSeries_Write_Word((data),(addr))
-#define writel(data, addr)	iSeries_Write_Long((data),(addr))
-#define memset_io(a,b,c)	iSeries_memset_io((a),(b),(c))
-#define memcpy_fromio(a,b,c)	iSeries_memcpy_fromio((a), (b), (c))
-#define memcpy_toio(a,b,c)	iSeries_memcpy_toio((a), (b), (c))
 
-#define inb(addr)		readb(((void __iomem *)(long)(addr)))
-#define inw(addr)		readw(((void __iomem *)(long)(addr)))
-#define inl(addr)		readl(((void __iomem *)(long)(addr)))
-#define outb(data,addr)		writeb(data,((void __iomem *)(long)(addr)))
-#define outw(data,addr)		writew(data,((void __iomem *)(long)(addr)))
-#define outl(data,addr)		writel(data,((void __iomem *)(long)(addr)))
-/*
- * The *_ns versions below don't do byte-swapping.
- * Neither do the standard versions now, these are just here
- * for older code.
- */
-#define insb(port, buf, ns)	_insb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insw(port, buf, ns)	_insw_ns((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insl(port, buf, nl)	_insl_ns((u8 __iomem *)((port)+pci_io_base), (buf), (nl))
-#define insw_ns(port, buf, ns)	_insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define insl_ns(port, buf, nl)	_insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
-#else
+extern int in_8(const volatile unsigned char __iomem *addr);
+extern void out_8(volatile unsigned char __iomem *addr, int val);
+extern int in_le16(const volatile unsigned short __iomem *addr);
+extern int in_be16(const volatile unsigned short __iomem *addr);
+extern void out_le16(volatile unsigned short __iomem *addr, int val);
+extern void out_be16(volatile unsigned short __iomem *addr, int val);
+extern unsigned in_le32(const volatile unsigned __iomem *addr);
+extern unsigned in_be32(const volatile unsigned __iomem *addr);
+extern void out_le32(volatile unsigned __iomem *addr, int val);
+extern void out_be32(volatile unsigned __iomem *addr, int val);
+extern unsigned long in_le64(const volatile unsigned long __iomem *addr);
+extern unsigned long in_be64(const volatile unsigned long __iomem *addr);
+extern void out_le64(volatile unsigned long __iomem *addr, unsigned long val);
+extern void out_be64(volatile unsigned long __iomem *addr, unsigned long val);
+
+extern unsigned char __raw_readb(const volatile void __iomem *addr);
+extern unsigned short __raw_readw(const volatile void __iomem *addr);
+extern unsigned int __raw_readl(const volatile void __iomem *addr);
+extern unsigned long __raw_readq(const volatile void __iomem *addr);
+extern void __raw_writeb(unsigned char v, volatile void __iomem *addr);
+extern void __raw_writew(unsigned short v, volatile void __iomem *addr);
+extern void __raw_writel(unsigned int v, volatile void __iomem *addr);
+extern void __raw_writeq(unsigned long v, volatile void __iomem *addr);
+
+extern void memset_io(volatile void __iomem *addr, int c, unsigned long n);
+extern void memcpy_fromio(void *dest, const volatile void __iomem *src,
+                                 unsigned long n);
+extern void memcpy_toio(volatile void __iomem *dest, const void *src,
+                                 unsigned long n);
+
+#else /* CONFIG_PPC_ISERIES */
+
+#define in_8(addr)		__in_8((addr))
+#define out_8(addr, val)	__out_8((addr), (val))
+#define in_le16(addr)		__in_le16((addr))
+#define in_be16(addr)		__in_be16((addr))
+#define out_le16(addr, val)	__out_le16((addr), (val))
+#define out_be16(addr, val)	__out_be16((addr), (val))
+#define in_le32(addr)		__in_le32((addr))
+#define in_be32(addr)		__in_be32((addr))
+#define out_le32(addr, val)	__out_le32((addr), (val))
+#define out_be32(addr, val)	__out_be32((addr), (val))
+#define in_le64(addr)		__in_le64((addr))
+#define in_be64(addr)		__in_be64((addr))
+#define out_le64(addr, val)	__out_le64((addr), (val))
+#define out_be64(addr, val)	__out_be64((addr), (val))
 
 static inline unsigned char __raw_readb(const volatile void __iomem *addr)
 {
@@ -112,23 +114,11 @@
 {
 	*(volatile unsigned long __force *)addr = v;
 }
-#define readb(addr)		eeh_readb(addr)
-#define readw(addr)		eeh_readw(addr)
-#define readl(addr)		eeh_readl(addr)
-#define readq(addr)		eeh_readq(addr)
-#define writeb(data, addr)	eeh_writeb((data), (addr))
-#define writew(data, addr)	eeh_writew((data), (addr))
-#define writel(data, addr)	eeh_writel((data), (addr))
-#define writeq(data, addr)	eeh_writeq((data), (addr))
 #define memset_io(a,b,c)	eeh_memset_io((a),(b),(c))
 #define memcpy_fromio(a,b,c)	eeh_memcpy_fromio((a),(b),(c))
 #define memcpy_toio(a,b,c)	eeh_memcpy_toio((a),(b),(c))
-#define inb(port)		eeh_inb((unsigned long)port)
-#define outb(val, port)		eeh_outb(val, (unsigned long)port)
-#define inw(port)		eeh_inw((unsigned long)port)
-#define outw(val, port)		eeh_outw(val, (unsigned long)port)
-#define inl(port)		eeh_inl((unsigned long)port)
-#define outl(val, port)		eeh_outl(val, (unsigned long)port)
+
+#endif /* CONFIG_PPC_ISERIES */
 
 /*
  * The insw/outsw/insl/outsl macros don't do byte-swapping.
@@ -138,30 +128,37 @@
 #define insb(port, buf, ns)	eeh_insb((port), (buf), (ns))
 #define insw(port, buf, ns)	eeh_insw_ns((port), (buf), (ns))
 #define insl(port, buf, nl)	eeh_insl_ns((port), (buf), (nl))
-#define insw_ns(port, buf, ns)	eeh_insw_ns((port), (buf), (ns))
-#define insl_ns(port, buf, nl)	eeh_insl_ns((port), (buf), (nl))
-
-#endif
 
 #define outsb(port, buf, ns)  _outsb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
 #define outsw(port, buf, ns)  _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
 #define outsl(port, buf, nl)  _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
 
+#define readb(addr)		eeh_readb(addr)
+#define readw(addr)		eeh_readw(addr)
+#define readl(addr)		eeh_readl(addr)
+#define readq(addr)		eeh_readq(addr)
+#define writeb(data, addr)	eeh_writeb((data), (addr))
+#define writew(data, addr)	eeh_writew((data), (addr))
+#define writel(data, addr)	eeh_writel((data), (addr))
+#define writeq(data, addr)	eeh_writeq((data), (addr))
+#define inb(port)		eeh_inb((unsigned long)port)
+#define outb(val, port)		eeh_outb(val, (unsigned long)port)
+#define inw(port)		eeh_inw((unsigned long)port)
+#define outw(val, port)		eeh_outw(val, (unsigned long)port)
+#define inl(port)		eeh_inl((unsigned long)port)
+#define outl(val, port)		eeh_outl(val, (unsigned long)port)
+
 #define readb_relaxed(addr) readb(addr)
 #define readw_relaxed(addr) readw(addr)
 #define readl_relaxed(addr) readl(addr)
 #define readq_relaxed(addr) readq(addr)
 
-extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
-extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
-extern void _insw(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl);
-extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl);
+extern void _insb(volatile u8 __iomem *port, void *buf, long count);
+extern void _outsb(volatile u8 __iomem *port, const void *buf, long count);
+extern void _insw_ns(volatile u16 __iomem *port, void *buf, long count);
+extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count);
+extern void _insl_ns(volatile u32 __iomem *port, void *buf, long count);
+extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count);
 
 static inline void mmiowb(void)
 {
@@ -180,14 +177,6 @@
 #define inl_p(port)             inl(port)
 #define outl_p(val, port)       (udelay(1), outl((val), (port)))
 
-/*
- * The *_ns versions below don't do byte-swapping.
- * Neither do the standard versions now, these are just here
- * for older code.
- */
-#define outsw_ns(port, buf, ns)	_outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
-#define outsl_ns(port, buf, nl)	_outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
-
 
 #define IO_SPACE_LIMIT ~(0UL)
 
@@ -279,7 +268,7 @@
  * and should not be used directly by device drivers.  Use inb/readb
  * instead.
  */
-static inline int in_8(const volatile unsigned char __iomem *addr)
+static inline int __in_8(const volatile unsigned char __iomem *addr)
 {
 	int ret;
 
@@ -288,14 +277,14 @@
 	return ret;
 }
 
-static inline void out_8(volatile unsigned char __iomem *addr, int val)
+static inline void __out_8(volatile unsigned char __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; stb%U0%X0 %1,%0"
 			     : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-static inline int in_le16(const volatile unsigned short __iomem *addr)
+static inline int __in_le16(const volatile unsigned short __iomem *addr)
 {
 	int ret;
 
@@ -304,7 +293,7 @@
 	return ret;
 }
 
-static inline int in_be16(const volatile unsigned short __iomem *addr)
+static inline int __in_be16(const volatile unsigned short __iomem *addr)
 {
 	int ret;
 
@@ -313,21 +302,21 @@
 	return ret;
 }
 
-static inline void out_le16(volatile unsigned short __iomem *addr, int val)
+static inline void __out_le16(volatile unsigned short __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; sthbrx %1,0,%2"
 			     : "=m" (*addr) : "r" (val), "r" (addr));
 	get_paca()->io_sync = 1;
 }
 
-static inline void out_be16(volatile unsigned short __iomem *addr, int val)
+static inline void __out_be16(volatile unsigned short __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; sth%U0%X0 %1,%0"
 			     : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-static inline unsigned in_le32(const volatile unsigned __iomem *addr)
+static inline unsigned __in_le32(const volatile unsigned __iomem *addr)
 {
 	unsigned ret;
 
@@ -336,7 +325,7 @@
 	return ret;
 }
 
-static inline unsigned in_be32(const volatile unsigned __iomem *addr)
+static inline unsigned __in_be32(const volatile unsigned __iomem *addr)
 {
 	unsigned ret;
 
@@ -345,21 +334,21 @@
 	return ret;
 }
 
-static inline void out_le32(volatile unsigned __iomem *addr, int val)
+static inline void __out_le32(volatile unsigned __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr)
 			     : "r" (val), "r" (addr));
 	get_paca()->io_sync = 1;
 }
 
-static inline void out_be32(volatile unsigned __iomem *addr, int val)
+static inline void __out_be32(volatile unsigned __iomem *addr, int val)
 {
 	__asm__ __volatile__("sync; stw%U0%X0 %1,%0"
 			     : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-static inline unsigned long in_le64(const volatile unsigned long __iomem *addr)
+static inline unsigned long __in_le64(const volatile unsigned long __iomem *addr)
 {
 	unsigned long tmp, ret;
 
@@ -379,7 +368,7 @@
 	return ret;
 }
 
-static inline unsigned long in_be64(const volatile unsigned long __iomem *addr)
+static inline unsigned long __in_be64(const volatile unsigned long __iomem *addr)
 {
 	unsigned long ret;
 
@@ -388,7 +377,7 @@
 	return ret;
 }
 
-static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long val)
+static inline void __out_le64(volatile unsigned long __iomem *addr, unsigned long val)
 {
 	unsigned long tmp;
 
@@ -406,15 +395,13 @@
 	get_paca()->io_sync = 1;
 }
 
-static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val)
+static inline void __out_be64(volatile unsigned long __iomem *addr, unsigned long val)
 {
 	__asm__ __volatile__("sync; std%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
 	get_paca()->io_sync = 1;
 }
 
-#ifndef CONFIG_PPC_ISERIES 
 #include <asm/eeh.h>
-#endif
 
 /**
  *	check_signature		-	find BIOS signatures
@@ -430,7 +417,6 @@
 	const unsigned char *signature, int length)
 {
 	int retval = 0;
-#ifndef CONFIG_PPC_ISERIES 
 	do {
 		if (readb(io_addr) != *signature)
 			goto out;
@@ -440,7 +426,6 @@
 	} while (length);
 	retval = 1;
 out:
-#endif
 	return retval;
 }
 
diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h
index d903a62..4da41ef 100644
--- a/include/asm-powerpc/irq.h
+++ b/include/asm-powerpc/irq.h
@@ -137,7 +137,7 @@
 extern struct irq_map_entry irq_map[NR_IRQS];
 
 
-/***
+/**
  * irq_alloc_host - Allocate a new irq_host data structure
  * @node: device-tree node of the interrupt controller
  * @revmap_type: type of reverse mapping to use
@@ -159,14 +159,14 @@
 				       irq_hw_number_t inval_irq);
 
 
-/***
+/**
  * irq_find_host - Locates a host for a given device node
  * @node: device-tree node of the interrupt controller
  */
 extern struct irq_host *irq_find_host(struct device_node *node);
 
 
-/***
+/**
  * irq_set_default_host - Set a "default" host
  * @host: default host pointer
  *
@@ -178,7 +178,7 @@
 extern void irq_set_default_host(struct irq_host *host);
 
 
-/***
+/**
  * irq_set_virq_count - Set the maximum number of virt irqs
  * @count: number of linux virtual irqs, capped with NR_IRQS
  *
@@ -188,7 +188,7 @@
 extern void irq_set_virq_count(unsigned int count);
 
 
-/***
+/**
  * irq_create_mapping - Map a hardware interrupt into linux virq space
  * @host: host owning this hardware interrupt or NULL for default host
  * @hwirq: hardware irq number in that host space
@@ -202,13 +202,13 @@
 				       irq_hw_number_t hwirq);
 
 
-/***
+/**
  * irq_dispose_mapping - Unmap an interrupt
  * @virq: linux virq number of the interrupt to unmap
  */
 extern void irq_dispose_mapping(unsigned int virq);
 
-/***
+/**
  * irq_find_mapping - Find a linux virq from an hw irq number.
  * @host: host owning this hardware interrupt
  * @hwirq: hardware irq number in that host space
@@ -221,7 +221,7 @@
 				     irq_hw_number_t hwirq);
 
 
-/***
+/**
  * irq_radix_revmap - Find a linux virq from a hw irq number.
  * @host: host owning this hardware interrupt
  * @hwirq: hardware irq number in that host space
@@ -232,7 +232,7 @@
 extern unsigned int irq_radix_revmap(struct irq_host *host,
 				     irq_hw_number_t hwirq);
 
-/***
+/**
  * irq_linear_revmap - Find a linux virq from a hw irq number.
  * @host: host owning this hardware interrupt
  * @hwirq: hardware irq number in that host space
@@ -247,7 +247,7 @@
 
 
 
-/***
+/**
  * irq_alloc_virt - Allocate virtual irq numbers
  * @host: host owning these new virtual irqs
  * @count: number of consecutive numbers to allocate
@@ -261,7 +261,7 @@
 				   unsigned int count,
 				   unsigned int hint);
 
-/***
+/**
  * irq_free_virt - Free virtual irq numbers
  * @virq: virtual irq number of the first interrupt to free
  * @count: number of interrupts to free
@@ -300,7 +300,7 @@
 
 /* -- End OF helpers -- */
 
-/***
+/**
  * irq_early_init - Init irq remapping subsystem
  */
 extern void irq_early_init(void);
diff --git a/include/asm-powerpc/iseries/hv_call_xm.h b/include/asm-powerpc/iseries/hv_call_xm.h
index ca9202c..392ac3f 100644
--- a/include/asm-powerpc/iseries/hv_call_xm.h
+++ b/include/asm-powerpc/iseries/hv_call_xm.h
@@ -16,23 +16,6 @@
 #define HvCallXmSetTce			HvCallXm + 11
 #define HvCallXmSetTces			HvCallXm + 13
 
-/*
- * Structure passed to HvCallXm_getTceTableParms
- */
-struct iommu_table_cb {
-	unsigned long	itc_busno;	/* Bus number for this tce table */
-	unsigned long	itc_start;	/* Will be NULL for secondary */
-	unsigned long	itc_totalsize;	/* Size (in pages) of whole table */
-	unsigned long	itc_offset;	/* Index into real tce table of the
-					   start of our section */
-	unsigned long	itc_size;	/* Size (in pages) of our section */
-	unsigned long	itc_index;	/* Index of this tce table */
-	unsigned short	itc_maxtables;	/* Max num of tables for partition */
-	unsigned char	itc_virtbus;	/* Flag to indicate virtual bus */
-	unsigned char	itc_slotno;	/* IOA Tce Slot Index */
-	unsigned char	itc_rsvd[4];
-};
-
 static inline void HvCallXm_getTceTableParms(u64 cb)
 {
 	HvCall1(HvCallXmGetTceTableParms, cb);
diff --git a/include/asm-powerpc/iseries/hv_lp_config.h b/include/asm-powerpc/iseries/hv_lp_config.h
index df8b207..a006fd1 100644
--- a/include/asm-powerpc/iseries/hv_lp_config.h
+++ b/include/asm-powerpc/iseries/hv_lp_config.h
@@ -25,7 +25,6 @@
 
 #include <asm/iseries/hv_call_sc.h>
 #include <asm/iseries/hv_types.h>
-#include <asm/iseries/it_lp_naca.h>
 
 enum {
 	HvCallCfg_Cur	= 0,
@@ -44,16 +43,8 @@
 #define HvCallCfgGetHostingLpIndex			HvCallCfg + 32
 
 extern HvLpIndex HvLpConfig_getLpIndex_outline(void);
-
-static inline HvLpIndex	HvLpConfig_getLpIndex(void)
-{
-	return itLpNaca.xLpIndex;
-}
-
-static inline HvLpIndex	HvLpConfig_getPrimaryLpIndex(void)
-{
-	return itLpNaca.xPrimaryLpIndex;
-}
+extern HvLpIndex HvLpConfig_getLpIndex(void);
+extern HvLpIndex HvLpConfig_getPrimaryLpIndex(void);
 
 static inline u64 HvLpConfig_getMsChunks(void)
 {
diff --git a/include/asm-powerpc/iseries/iseries_io.h b/include/asm-powerpc/iseries/iseries_io.h
deleted file mode 100644
index f29009b..0000000
--- a/include/asm-powerpc/iseries/iseries_io.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _ASM_POWERPC_ISERIES_ISERIES_IO_H
-#define _ASM_POWERPC_ISERIES_ISERIES_IO_H
-
-
-#ifdef CONFIG_PPC_ISERIES
-#include <linux/types.h>
-/*
- * Created by Allan Trautman on Thu Dec 28 2000.
- *
- * Remaps the io.h for the iSeries Io
- * Copyright (C) 2000  Allan H Trautman, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the:
- * Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330,
- * Boston, MA  02111-1307  USA
- *
- * Change Activity:
- *   Created December 28, 2000
- * End Change Activity
- */
-
-#ifdef CONFIG_PCI
-extern u8   iSeries_Read_Byte(const volatile void __iomem * IoAddress);
-extern u16  iSeries_Read_Word(const volatile void __iomem * IoAddress);
-extern u32  iSeries_Read_Long(const volatile void __iomem * IoAddress);
-extern void iSeries_Write_Byte(u8  IoData, volatile void __iomem * IoAddress);
-extern void iSeries_Write_Word(u16 IoData, volatile void __iomem * IoAddress);
-extern void iSeries_Write_Long(u32 IoData, volatile void __iomem * IoAddress);
-
-extern void iSeries_memset_io(volatile void __iomem *dest, char x, size_t n);
-extern void iSeries_memcpy_toio(volatile void __iomem *dest, void *source,
-		size_t n);
-extern void iSeries_memcpy_fromio(void *dest,
-		const volatile void __iomem *source, size_t n);
-#else
-static inline u8 iSeries_Read_Byte(const volatile void __iomem *IoAddress)
-{
-	return 0xff;
-}
-
-static inline void iSeries_Write_Byte(u8 IoData,
-		volatile void __iomem *IoAddress)
-{
-}
-#endif	/* CONFIG_PCI */
-
-#endif /* CONFIG_PPC_ISERIES */
-#endif /* _ASM_POWERPC_ISERIES_ISERIES_IO_H */
diff --git a/include/asm-powerpc/iseries/it_lp_queue.h b/include/asm-powerpc/iseries/it_lp_queue.h
index 284c5a7..3f68147 100644
--- a/include/asm-powerpc/iseries/it_lp_queue.h
+++ b/include/asm-powerpc/iseries/it_lp_queue.h
@@ -27,8 +27,6 @@
 #include <asm/types.h>
 #include <asm/ptrace.h>
 
-struct HvLpEvent;
-
 #define IT_LP_MAX_QUEUES	8
 
 #define IT_LP_NOT_USED		0	/* Queue will not be used by PLIC */
diff --git a/include/asm-powerpc/iseries/vio.h b/include/asm-powerpc/iseries/vio.h
index 72a97d3..7a95d29 100644
--- a/include/asm-powerpc/iseries/vio.h
+++ b/include/asm-powerpc/iseries/vio.h
@@ -122,6 +122,34 @@
 	viorc_openRejected = 0x0301
 };
 
+/*
+ * The structure of the events that flow between us and OS/400 for chario
+ * events.  You can't mess with this unless the OS/400 side changes too.
+ */
+struct viocharlpevent {
+	struct HvLpEvent event;
+	u32 reserved;
+	u16 version;
+	u16 subtype_result_code;
+	u8 virtual_device;
+	u8 len;
+	u8 data[VIOCHAR_MAX_DATA];
+};
+
+#define VIOCHAR_WINDOW		10
+
+enum viocharsubtype {
+	viocharopen = 0x0001,
+	viocharclose = 0x0002,
+	viochardata = 0x0003,
+	viocharack = 0x0004,
+	viocharconfig = 0x0005
+};
+
+enum viochar_rc {
+	viochar_rc_ebusy = 1
+};
+
 struct device;
 
 extern struct device *iSeries_vio_dev;
diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h
index 4dc514a..821ea0c 100644
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -27,7 +27,9 @@
 //
 //
 //----------------------------------------------------------------------------
+#include <linux/cache.h>
 #include <asm/types.h>
+#include <asm/mmu.h>
 
 /* The Hypervisor barfs if the lppaca crosses a page boundary.  A 1k
  * alignment is sufficient to prevent this */
@@ -114,7 +116,7 @@
 
 
 //=============================================================================
-// CACHE_LINE_3 0x0100 - 0x007F: This line is shared with other processors
+// CACHE_LINE_3 0x0100 - 0x017F: This line is shared with other processors
 //=============================================================================
 	// This is the yield_count.  An "odd" value (low bit on) means that
 	// the processor is yielded (either because of an OS yield or a PLIC
@@ -126,12 +128,29 @@
 	u8	reserved6[124];		// Reserved                     x04-x7F
 
 //=============================================================================
-// CACHE_LINE_4-5 0x0100 - 0x01FF Contains PMC interrupt data
+// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
 //=============================================================================
 	u8	pmc_save_area[256];	// PMC interrupt Area           x00-xFF
 } __attribute__((__aligned__(0x400)));
 
 extern struct lppaca lppaca[];
 
+/*
+ * SLB shadow buffer structure as defined in the PAPR.  The save_area
+ * contains adjacent ESID and VSID pairs for each shadowed SLB.  The
+ * ESID is stored in the lower 64bits, then the VSID.
+ */
+struct slb_shadow {
+	u32	persistent;		// Number of persistent SLBs	x00-x03
+	u32	buffer_length;		// Total shadow buffer length	x04-x07
+	u64	reserved;		// Alignment			x08-x0f
+	struct	{
+		u64     esid;
+		u64	vsid;
+	} save_area[SLB_NUM_BOLTED];	//				x10-x40
+} ____cacheline_aligned;
+
+extern struct slb_shadow slb_shadow[];
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h
index 3d5d590..0a4e5c9 100644
--- a/include/asm-powerpc/paca.h
+++ b/include/asm-powerpc/paca.h
@@ -23,6 +23,7 @@
 register struct paca_struct *local_paca asm("r13");
 #define get_paca()	local_paca
 #define get_lppaca()	(get_paca()->lppaca_ptr)
+#define get_slb_shadow()	(get_paca()->slb_shadow_ptr)
 
 struct task_struct;
 
@@ -99,6 +100,8 @@
 	u64 user_time;			/* accumulated usermode TB ticks */
 	u64 system_time;		/* accumulated system TB ticks */
 	u64 startpurr;			/* PURR/TB value snapshot */
+
+	struct slb_shadow *slb_shadow_ptr;
 };
 
 extern struct paca_struct paca[];
diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h
index fb597b3..b4d38b0 100644
--- a/include/asm-powerpc/page.h
+++ b/include/asm-powerpc/page.h
@@ -55,12 +55,6 @@
 #define PAGE_OFFSET     ASM_CONST(CONFIG_KERNEL_START)
 #define KERNELBASE      (PAGE_OFFSET + PHYSICAL_START)
 
-#ifdef CONFIG_DISCONTIGMEM
-#define page_to_pfn(page)	discontigmem_page_to_pfn(page)
-#define pfn_to_page(pfn)	discontigmem_pfn_to_page(pfn)
-#define pfn_valid(pfn)		discontigmem_pfn_valid(pfn)
-#endif
-
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif
diff --git a/include/asm-powerpc/ppc-pci.h b/include/asm-powerpc/ppc-pci.h
index cf79bc7..1115756 100644
--- a/include/asm-powerpc/ppc-pci.h
+++ b/include/asm-powerpc/ppc-pci.h
@@ -69,6 +69,17 @@
 void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
 
 /**
+ * rtas_pci_enableo - enable IO transfers for this slot
+ * @pdn:       pci device node
+ * @function:  either EEH_THAW_MMIO or EEH_THAW_DMA 
+ *
+ * Enable I/O transfers to this slot 
+ */
+#define EEH_THAW_MMIO 2
+#define EEH_THAW_DMA  3
+int rtas_pci_enable(struct pci_dn *pdn, int function);
+
+/**
  * rtas_set_slot_reset -- unfreeze a frozen slot
  *
  * Clear the EEH-frozen condition on a slot.  This routine
diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h
index 22e54a2..6cb6fb1 100644
--- a/include/asm-powerpc/processor.h
+++ b/include/asm-powerpc/processor.h
@@ -32,6 +32,7 @@
 #define _CHRP_Motorola	0x04	/* motorola chrp, the cobra */
 #define _CHRP_IBM	0x05	/* IBM chrp, the longtrail and longtrail 2 */
 #define _CHRP_Pegasos	0x06	/* Genesi/bplan's Pegasos and Pegasos2 */
+#define _CHRP_briq	0x07	/* TotalImpact's briQ */
 
 #if defined(__KERNEL__) && defined(CONFIG_PPC32)
 
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index d0fa1b9..5246297 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -72,8 +72,8 @@
 };
 
 struct device_node {
-	char	*name;
-	char	*type;
+	const char *name;
+	const char *type;
 	phandle	node;
 	phandle linux_phandle;
 	char	*full_name;
@@ -160,7 +160,7 @@
 extern void early_init_devtree(void *);
 extern int device_is_compatible(struct device_node *device, const char *);
 extern int machine_is_compatible(const char *compat);
-extern void *get_property(struct device_node *node, const char *name,
+extern const void *get_property(struct device_node *node, const char *name,
 		int *lenp);
 extern void print_properties(struct device_node *node);
 extern int prom_n_addr_cells(struct device_node* np);
@@ -197,8 +197,8 @@
  */
 
 
-/* Helper to read a big number */
-static inline u64 of_read_number(u32 *cell, int size)
+/* Helper to read a big number; size is in cells (not bytes) */
+static inline u64 of_read_number(const u32 *cell, int size)
 {
 	u64 r = 0;
 	while (size--)
@@ -206,18 +206,28 @@
 	return r;
 }
 
+/* Like of_read_number, but we want an unsigned long result */
+#ifdef CONFIG_PPC32
+static inline unsigned long of_read_ulong(const u32 *cell, int size)
+{
+	return cell[size-1];
+}
+#else
+#define of_read_ulong(cell, size)	of_read_number(cell, size)
+#endif
+
 /* Translate an OF address block into a CPU physical address
  */
 #define OF_BAD_ADDR	((u64)-1)
-extern u64 of_translate_address(struct device_node *np, u32 *addr);
+extern u64 of_translate_address(struct device_node *np, const u32 *addr);
 
 /* Extract an address from a device, returns the region size and
  * the address space flags too. The PCI version uses a BAR number
  * instead of an absolute index
  */
-extern u32 *of_get_address(struct device_node *dev, int index,
+extern const u32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
-extern u32 *of_get_pci_address(struct device_node *dev, int bar_no,
+extern const u32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 
 /* Get an address as a resource. Note that if your address is
@@ -234,7 +244,7 @@
 /* Parse the ibm,dma-window property of an OF node into the busno, phys and
  * size parameters.
  */
-void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop,
+void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
 		unsigned long *busno, unsigned long *phys, unsigned long *size);
 
 extern void kdump_move_device_tree(void);
@@ -259,7 +269,7 @@
 	u32 specifier[OF_MAX_IRQ_SPEC];	/* Specifier copy */
 };
 
-/***
+/**
  * of_irq_map_init - Initialize the irq remapper
  * @flags:	flags defining workarounds to enable
  *
@@ -272,7 +282,7 @@
 
 extern void of_irq_map_init(unsigned int flags);
 
-/***
+/**
  * of_irq_map_raw - Low level interrupt tree parsing
  * @parent:	the device interrupt parent
  * @intspec:	interrupt specifier ("interrupts" property of the device)
@@ -289,12 +299,12 @@
  *
  */
 
-extern int of_irq_map_raw(struct device_node *parent, u32 *intspec,
-			  u32 ointsize, u32 *addr,
+extern int of_irq_map_raw(struct device_node *parent, const u32 *intspec,
+			  u32 ointsize, const u32 *addr,
 			  struct of_irq *out_irq);
 
 
-/***
+/**
  * of_irq_map_one - Resolve an interrupt for a device
  * @device:	the device whose interrupt is to be resolved
  * @index:     	index of the interrupt to resolve
@@ -307,7 +317,7 @@
 extern int of_irq_map_one(struct device_node *device, int index,
 			  struct of_irq *out_irq);
 
-/***
+/**
  * of_irq_map_pci - Resolve the interrupt for a PCI device
  * @pdev:	the device whose interrupt is to be resolved
  * @out_irq:	structure of_irq filled by this function
diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h
index dc4cb9c..4435efe 100644
--- a/include/asm-powerpc/ptrace.h
+++ b/include/asm-powerpc/ptrace.h
@@ -215,12 +215,10 @@
 #define PTRACE_GETVRREGS	18
 #define PTRACE_SETVRREGS	19
 
-#ifndef __powerpc64__
 /* Get/set all the upper 32-bits of the SPE registers, accumulator, and
  * spefscr, in one go */
 #define PTRACE_GETEVRREGS	20
 #define PTRACE_SETEVRREGS	21
-#endif /* __powerpc64__ */
 
 /*
  * Get or set a debug register. The first 16 are DABR registers and the
@@ -235,7 +233,6 @@
 #define PPC_PTRACE_GETFPREGS	0x97	/* Get FPRs 0 - 31 */
 #define PPC_PTRACE_SETFPREGS	0x96	/* Set FPRs 0 - 31 */
 
-#ifdef __powerpc64__
 /* Calls to trace a 64bit program from a 32bit program */
 #define PPC_PTRACE_PEEKTEXT_3264 0x95
 #define PPC_PTRACE_PEEKDATA_3264 0x94
@@ -243,6 +240,5 @@
 #define PPC_PTRACE_POKEDATA_3264 0x92
 #define PPC_PTRACE_PEEKUSR_3264  0x91
 #define PPC_PTRACE_POKEUSR_3264  0x90
-#endif /* __powerpc64__ */
 
 #endif /* _ASM_POWERPC_PTRACE_H */
diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h
index cf73475..3a9fcc1 100644
--- a/include/asm-powerpc/reg.h
+++ b/include/asm-powerpc/reg.h
@@ -592,6 +592,7 @@
 #define PV_630p	0x0041
 #define PV_970MP	0x0044
 #define PV_BE		0x0070
+#define PV_PA6T		0x0090
 
 /*
  * Number of entries in the SLB. If this ever changes we should handle
diff --git a/include/asm-powerpc/rtas.h b/include/asm-powerpc/rtas.h
index 82a27e9..d34f9e1 100644
--- a/include/asm-powerpc/rtas.h
+++ b/include/asm-powerpc/rtas.h
@@ -230,5 +230,21 @@
 
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
+/**
+ * rtas_config_addr - Format a busno, devfn and reg for RTAS.
+ * @busno: The bus number.
+ * @devfn: The device and function number as encoded by PCI_DEVFN().
+ * @reg: The register number.
+ *
+ * This function encodes the given busno, devfn and register number as
+ * required for RTAS calls that take a "config_addr" parameter.
+ * See PAPR requirement 7.3.4-1 for more info.
+ */
+static inline u32 rtas_config_addr(int busno, int devfn, int reg)
+{
+	return ((reg & 0xf00) << 20) | ((busno & 0xff) << 16) |
+			(devfn << 8) | (reg & 0xff);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_RTAS_H */
diff --git a/include/asm-powerpc/smu.h b/include/asm-powerpc/smu.h
index 51e65fc..e49f644 100644
--- a/include/asm-powerpc/smu.h
+++ b/include/asm-powerpc/smu.h
@@ -517,7 +517,7 @@
  * This returns the pointer to an SMU "sdb" partition data or NULL
  * if not found. The data format is described below
  */
-extern struct smu_sdbp_header *smu_get_sdb_partition(int id,
+extern const struct smu_sdbp_header *smu_get_sdb_partition(int id,
 					unsigned int *size);
 
 /* Get "sdb" partition data from an SMU satellite */
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h
index c02d105..b42b53c 100644
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -106,7 +106,7 @@
 struct spu_runqueue;
 
 struct spu {
-	char *name;
+	const char *name;
 	unsigned long local_store_phys;
 	u8 *local_store;
 	unsigned long problem_phys;
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 4c9f522..4b41dea 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -177,11 +177,6 @@
 extern u32 booke_wdt_period;
 #endif /* CONFIG_BOOKE_WDT */
 
-/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */
-extern unsigned char e2a(unsigned char);
-extern unsigned char* strne2a(unsigned char *dest,
-		const unsigned char *src, size_t n);
-
 struct device_node;
 extern void note_scsi_host(struct device_node *, void *);
 
diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h
index dc9bd101..4b51d42 100644
--- a/include/asm-powerpc/vio.h
+++ b/include/asm-powerpc/vio.h
@@ -46,8 +46,8 @@
  */
 struct vio_dev {
 	struct iommu_table *iommu_table;     /* vio_map_* uses this */
-	char *name;
-	char *type;
+	const char *name;
+	const char *type;
 	uint32_t unit_address;
 	unsigned int irq;
 	struct device dev;
diff --git a/include/asm-ppc/ibm4xx.h b/include/asm-ppc/ibm4xx.h
index cf62b69..499c146 100644
--- a/include/asm-ppc/ibm4xx.h
+++ b/include/asm-ppc/ibm4xx.h
@@ -86,7 +86,7 @@
 #define PCI_DRAM_OFFSET	0
 #endif
 
-#elif CONFIG_44x
+#elif defined(CONFIG_44x)
 
 #if defined(CONFIG_BAMBOO)
 #include <platforms/4xx/bamboo.h>
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 680555b..3d9a9e6 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -327,26 +327,12 @@
 #define inl_p(port)		inl((port))
 #define outl_p(val, port)	outl((val), (port))
 
-extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
-extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
-extern void _insw(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl);
-extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
-extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
-extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
-extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl);
-
-/*
- * The *_ns versions below don't do byte-swapping.
- * Neither do the standard versions now, these are just here
- * for older code.
- */
-#define insw_ns(port, buf, ns)	_insw_ns((port)+___IO_BASE, (buf), (ns))
-#define outsw_ns(port, buf, ns)	_outsw_ns((port)+___IO_BASE, (buf), (ns))
-#define insl_ns(port, buf, nl)	_insl_ns((port)+___IO_BASE, (buf), (nl))
-#define outsl_ns(port, buf, nl)	_outsl_ns((port)+___IO_BASE, (buf), (nl))
+extern void _insb(volatile u8 __iomem *port, void *buf, long count);
+extern void _outsb(volatile u8 __iomem *port, const void *buf, long count);
+extern void _insw_ns(volatile u16 __iomem *port, void *buf, long count);
+extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count);
+extern void _insl_ns(volatile u32 __iomem *port, void *buf, long count);
+extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count);
 
 
 #define IO_SPACE_LIMIT ~0
diff --git a/include/asm-ppc/mpc8260_pci9.h b/include/asm-ppc/mpc8260_pci9.h
index 26b3f6e..9f71768 100644
--- a/include/asm-ppc/mpc8260_pci9.h
+++ b/include/asm-ppc/mpc8260_pci9.h
@@ -30,8 +30,6 @@
 #undef inb
 #undef inw
 #undef inl
-#undef insw_ns
-#undef insl_ns
 #undef memcpy_fromio
 
 extern int readb(volatile unsigned char *addr);
@@ -43,8 +41,6 @@
 extern int inb(unsigned port);
 extern int inw(unsigned port);
 extern unsigned inl(unsigned port);
-extern void insw_ns(unsigned port, void *buf, int ns);
-extern void insl_ns(unsigned port, void *buf, int nl);
 extern void *memcpy_fromio(void *dest, unsigned long src, size_t count);
 
 #endif /* !__CONFIG_8260_PCI9_DEFS */
diff --git a/include/asm-ppc/reg_booke.h b/include/asm-ppc/reg_booke.h
index 4944c0f..602fbad 100644
--- a/include/asm-ppc/reg_booke.h
+++ b/include/asm-ppc/reg_booke.h
@@ -300,14 +300,14 @@
 #define DBSR_IC		0x80000000	/* Instruction Completion */
 #define DBSR_BT		0x40000000	/* Branch taken */
 #define DBSR_TIE	0x10000000	/* Trap Instruction debug Event */
-#define DBSR_IAC1	0x00800000	/* Instruction Address Compare 1 Event */
-#define DBSR_IAC2	0x00400000	/* Instruction Address Compare 2 Event */
-#define DBSR_IAC3	0x00200000	/* Instruction Address Compare 3 Event */
-#define DBSR_IAC4	0x00100000	/* Instruction Address Compare 4 Event */
-#define DBSR_DAC1R	0x00080000	/* Data Address Compare 1 Read Event */
-#define DBSR_DAC1W	0x00040000	/* Data Address Compare 1 Write Event */
-#define DBSR_DAC2R	0x00020000	/* Data Address Compare 2 Read Event */
-#define DBSR_DAC2W	0x00010000	/* Data Address Compare 2 Write Event */
+#define DBSR_IAC1	0x04000000	/* Instruction Address Compare 1 Event */
+#define DBSR_IAC2	0x02000000	/* Instruction Address Compare 2 Event */
+#define DBSR_IAC3	0x00080000	/* Instruction Address Compare 3 Event */
+#define DBSR_IAC4	0x00040000	/* Instruction Address Compare 4 Event */
+#define DBSR_DAC1R	0x01000000	/* Data Address Compare 1 Read Event */
+#define DBSR_DAC1W	0x00800000	/* Data Address Compare 1 Write Event */
+#define DBSR_DAC2R	0x00400000	/* Data Address Compare 2 Read Event */
+#define DBSR_DAC2W	0x00200000	/* Data Address Compare 2 Write Event */
 #endif
 
 /* Bit definitions related to the ESR. */
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
index ed8955f..088969d 100644
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -1,4 +1,12 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += cmb.h debug.h
-header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h
+header-y += dasd.h
+header-y += monwriter.h
+header-y += qeth.h
+header-y += tape390.h
+header-y += ucontext.h
+header-y += vtoc.h
+header-y += z90crypt.h
+
+unifdef-y += cmb.h
+unifdef-y += debug.h
diff --git a/include/asm-s390/appldata.h b/include/asm-s390/appldata.h
new file mode 100644
index 0000000..b177070
--- /dev/null
+++ b/include/asm-s390/appldata.h
@@ -0,0 +1,90 @@
+/*
+ * include/asm-s390/appldata.h
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ */
+
+#ifndef _ASM_S390_APPLDATA_H
+#define _ASM_S390_APPLDATA_H
+
+#include <asm/io.h>
+
+#ifndef CONFIG_64BIT
+
+#define APPLDATA_START_INTERVAL_REC	0x00	/* Function codes for */
+#define APPLDATA_STOP_REC		0x01	/* DIAG 0xDC	      */
+#define APPLDATA_GEN_EVENT_REC		0x02
+#define APPLDATA_START_CONFIG_REC	0x03
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+	u16 diag;		/* The DIAGNOSE code X'00DC'	      */
+	u8  function;		/* The function code for the DIAGNOSE */
+	u8  parlist_length;	/* Length of the parameter list       */
+	u32 product_id_addr;	/* Address of the 16-byte product ID  */
+	u16 reserved;
+	u16 buffer_length;	/* Length of the application data buffer  */
+	u32 buffer_addr;	/* Address of the application data buffer */
+} __attribute__ ((packed));
+
+#else /* CONFIG_64BIT */
+
+#define APPLDATA_START_INTERVAL_REC	0x80
+#define APPLDATA_STOP_REC		0x81
+#define APPLDATA_GEN_EVENT_REC		0x82
+#define APPLDATA_START_CONFIG_REC	0x83
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+	u16 diag;
+	u8  function;
+	u8  parlist_length;
+	u32 unused01;
+	u16 reserved;
+	u16 buffer_length;
+	u32 unused02;
+	u64 product_id_addr;
+	u64 buffer_addr;
+} __attribute__ ((packed));
+
+#endif /* CONFIG_64BIT */
+
+struct appldata_product_id {
+	char prod_nr[7];	/* product number */
+	u16  prod_fn;		/* product function */
+	u8   record_nr; 	/* record number */
+	u16  version_nr;	/* version */
+	u16  release_nr;	/* release */
+	u16  mod_lvl;		/* modification level */
+} __attribute__ ((packed));
+
+static inline int appldata_asm(struct appldata_product_id *id,
+			       unsigned short fn, void *buffer,
+			       unsigned short length)
+{
+	struct appldata_parameter_list parm_list;
+	int ry;
+
+	if (!MACHINE_IS_VM)
+		return -ENOSYS;
+	parm_list.diag = 0xdc;
+	parm_list.function = fn;
+	parm_list.parlist_length = sizeof(parm_list);
+	parm_list.buffer_length = length;
+	parm_list.product_id_addr = (unsigned long) id;
+	parm_list.buffer_addr = virt_to_phys(buffer);
+	asm volatile(
+		"diag %1,%0,0xdc"
+		: "=d" (ry)
+		: "d" (&parm_list), "m" (parm_list), "m" (*id)
+		: "cc");
+	return ry;
+}
+
+#endif /* _ASM_S390_APPLDATA_H */
diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h
index 28fdd6e..da063cd 100644
--- a/include/asm-s390/cio.h
+++ b/include/asm-s390/cio.h
@@ -270,6 +270,11 @@
 	__u32 vrdccrft : 8;    /* real device feature (output) */
 } __attribute__ ((packed,aligned(4)));
 
+struct ccw_dev_id {
+	u8 ssid;
+	u16 devno;
+};
+
 extern int diag210(struct diag210 *addr);
 
 extern void wait_cons_dev(void);
@@ -280,6 +285,8 @@
 
 extern void css_schedule_reprobe(void);
 
+extern void reipl_ccw_dev(struct ccw_dev_id *id);
+
 #endif
 
 #endif
diff --git a/include/asm-s390/dma.h b/include/asm-s390/dma.h
index 02720c4..7425c6a 100644
--- a/include/asm-s390/dma.h
+++ b/include/asm-s390/dma.h
@@ -11,6 +11,6 @@
 
 #define MAX_DMA_ADDRESS         0x80000000
 
-#define free_dma(x)
+#define free_dma(x)	do { } while (0)
 
 #endif /* _ASM_DMA_H */
diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h
index ffedf14..5e261e1 100644
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -7,75 +7,21 @@
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
-#ifndef __s390x__
-#define __futex_atomic_fixup \
-		     ".section __ex_table,\"a\"\n"			\
-		     "   .align 4\n"					\
-		     "   .long  0b,4b,2b,4b,3b,4b\n"			\
-		     ".previous"
-#else /* __s390x__ */
-#define __futex_atomic_fixup \
-		     ".section __ex_table,\"a\"\n"			\
-		     "   .align 8\n"					\
-		     "   .quad  0b,4b,2b,4b,3b,4b\n"			\
-		     ".previous"
-#endif /* __s390x__ */
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
-	asm volatile("   sacf 256\n"					\
-		     "0: l   %1,0(%6)\n"				\
-		     "1: " insn						\
-		     "2: cs  %1,%2,0(%6)\n"				\
-		     "3: jl  1b\n"					\
-		     "   lhi %0,0\n"					\
-		     "4: sacf 0\n"					\
-		     __futex_atomic_fixup				\
-		     : "=d" (ret), "=&d" (oldval), "=&d" (newval),	\
-		       "=m" (*uaddr)					\
-		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
-		       "m" (*uaddr) : "cc" );
-
 static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, newval, ret;
+	int oldval, ret;
+
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	inc_preempt_count();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op("lr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	dec_preempt_count();
+	ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval);
 
 	if (!ret) {
 		switch (cmp) {
@@ -91,32 +37,13 @@
 	return ret;
 }
 
-static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr,
+						int oldval, int newval)
 {
-	int ret;
-
 	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
-	asm volatile("   sacf 256\n"
-		     "   cs   %1,%4,0(%5)\n"
-		     "0: lr   %0,%1\n"
-		     "1: sacf 0\n"
-#ifndef __s390x__
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 4\n"
-		     "   .long  0b,1b\n"
-		     ".previous"
-#else /* __s390x__ */
-		     ".section __ex_table,\"a\"\n"
-		     "   .align 8\n"
-		     "   .quad  0b,1b\n"
-		     ".previous"
-#endif /* __s390x__ */
-		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
-		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
-		     : "cc", "memory" );
-	return oldval;
+
+	return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval);
 }
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-s390/io.h b/include/asm-s390/io.h
index d4614b3..a6cc27e 100644
--- a/include/asm-s390/io.h
+++ b/include/asm-s390/io.h
@@ -116,7 +116,7 @@
 #define outb(x,addr) ((void) writeb(x,addr))
 #define outb_p(x,addr) outb(x,addr)
 
-#define mmiowb()
+#define mmiowb()	do { } while (0)
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
diff --git a/include/asm-s390/kdebug.h b/include/asm-s390/kdebug.h
new file mode 100644
index 0000000..40cc680
--- /dev/null
+++ b/include/asm-s390/kdebug.h
@@ -0,0 +1,59 @@
+#ifndef _S390_KDEBUG_H
+#define _S390_KDEBUG_H
+
+/*
+ * Feb 2006 Ported to s390 <grundym@us.ibm.com>
+ */
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+	struct pt_regs *regs;
+	const char *str;
+	long err;
+	int trapnr;
+	int signr;
+};
+
+/* Note - you should never unregister because that can race with NMIs.
+ * If you really want to do it first unregister - then synchronize_sched
+ *  - then free.
+ */
+extern int register_die_notifier(struct notifier_block *);
+extern int unregister_die_notifier(struct notifier_block *);
+extern int register_page_fault_notifier(struct notifier_block *);
+extern int unregister_page_fault_notifier(struct notifier_block *);
+extern struct atomic_notifier_head s390die_chain;
+
+
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_BPT,
+	DIE_SSTEP,
+	DIE_PANIC,
+	DIE_NMI,
+	DIE_DIE,
+	DIE_NMIWATCHDOG,
+	DIE_KERNELDEBUG,
+	DIE_TRAP,
+	DIE_GPF,
+	DIE_CALL,
+	DIE_NMI_IPI,
+	DIE_PAGE_FAULT,
+};
+
+static inline int notify_die(enum die_val val, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs = regs,
+		.str = str,
+		.err = err,
+		.trapnr = trap,
+		.signr = sig
+	};
+	return atomic_notifier_call_chain(&s390die_chain, val, &args);
+}
+
+#endif
diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h
new file mode 100644
index 0000000..b847ff0
--- /dev/null
+++ b/include/asm-s390/kprobes.h
@@ -0,0 +1,114 @@
+#ifndef _ASM_S390_KPROBES_H
+#define _ASM_S390_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2006
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
+ * 2004-Nov	Modified for PPC64 by Ananth N Mavinakayanahalli
+ *		<ananth@in.ibm.com>
+ * 2005-Dec	Used as a template for s390 by Mike Grundy
+ *		<grundym@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define  __ARCH_WANT_KPROBES_INSN_SLOT
+struct pt_regs;
+struct kprobe;
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0x0002
+
+/* Maximum instruction size is 3 (16bit) halfwords: */
+#define MAX_INSN_SIZE		0x0003
+#define MAX_STACK_SIZE		64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+
+#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)(pentry)
+
+#define ARCH_SUPPORTS_KRETPROBES
+#define ARCH_INACTIVE_KPROBE_COUNT 0
+
+#define KPROBE_SWAP_INST	0x10
+
+#define FIXUP_PSW_NORMAL	0x08
+#define FIXUP_BRANCH_NOT_TAKEN	0x04
+#define FIXUP_RETURN_REGISTER	0x02
+#define FIXUP_NOT_REQUIRED	0x01
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of original instruction */
+	kprobe_opcode_t *insn;
+	int fixup;
+	int ilen;
+	int reg;
+};
+
+struct ins_replace_args {
+	kprobe_opcode_t *ptr;
+	kprobe_opcode_t old;
+	kprobe_opcode_t new;
+};
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+	unsigned long saved_psw;
+	unsigned long kprobe_saved_imask;
+	unsigned long kprobe_saved_ctl[3];
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_saved_imask;
+	unsigned long kprobe_saved_ctl[3];
+	struct pt_regs jprobe_saved_regs;
+	unsigned long jprobe_saved_r14;
+	unsigned long jprobe_saved_r15;
+	struct prev_kprobe prev_kprobe;
+	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+int  is_prohibited_opcode(kprobe_opcode_t *instruction);
+void get_instruction_type(struct arch_specific_insn *ainsn);
+
+#define flush_insn_slot(p)	do { } while (0)
+
+#endif	/* _ASM_S390_KPROBES_H */
+
+#ifdef CONFIG_KPROBES
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+					unsigned long val, void *data);
+#else	/* !CONFIG_KPROBES */
+static inline int kprobe_exceptions_notify(struct notifier_block *self,
+						unsigned long val, void *data)
+{
+	return 0;
+}
+#endif
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 596c8b1..18695d1 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -35,6 +35,7 @@
 #define __LC_IO_NEW_PSW                 0x01f0
 #endif /* !__s390x__ */
 
+#define __LC_IPL_PARMBLOCK_PTR		0x014
 #define __LC_EXT_PARAMS                 0x080
 #define __LC_CPU_ADDRESS                0x084
 #define __LC_EXT_INT_CODE               0x086
@@ -47,6 +48,7 @@
 #define __LC_PER_ATMID			0x096
 #define __LC_PER_ADDRESS		0x098
 #define __LC_PER_ACCESS_ID		0x0A1
+#define __LC_AR_MODE_ID			0x0A3
 
 #define __LC_SUBCHANNEL_ID              0x0B8
 #define __LC_SUBCHANNEL_NR              0x0BA
@@ -106,18 +108,28 @@
 #define __LC_INT_CLOCK			0xDE8
 #endif /* __s390x__ */
 
-#define __LC_PANIC_MAGIC                0xE00
 
+#define __LC_PANIC_MAGIC		0xE00
 #ifndef __s390x__
 #define __LC_PFAULT_INTPARM             0x080
 #define __LC_CPU_TIMER_SAVE_AREA        0x0D8
+#define __LC_CLOCK_COMP_SAVE_AREA	0x0E0
+#define __LC_PSW_SAVE_AREA		0x100
+#define __LC_PREFIX_SAVE_AREA		0x108
 #define __LC_AREGS_SAVE_AREA            0x120
+#define __LC_FPREGS_SAVE_AREA		0x160
 #define __LC_GPREGS_SAVE_AREA           0x180
 #define __LC_CREGS_SAVE_AREA            0x1C0
 #else /* __s390x__ */
 #define __LC_PFAULT_INTPARM             0x11B8
+#define __LC_FPREGS_SAVE_AREA		0x1200
 #define __LC_GPREGS_SAVE_AREA           0x1280
+#define __LC_PSW_SAVE_AREA		0x1300
+#define __LC_PREFIX_SAVE_AREA		0x1318
+#define __LC_FP_CREG_SAVE_AREA		0x131C
+#define __LC_TODREG_SAVE_AREA		0x1324
 #define __LC_CPU_TIMER_SAVE_AREA        0x1328
+#define __LC_CLOCK_COMP_SAVE_AREA	0x1331
 #define __LC_AREGS_SAVE_AREA            0x1340
 #define __LC_CREGS_SAVE_AREA            0x1380
 #endif /* __s390x__ */
diff --git a/include/asm-s390/monwriter.h b/include/asm-s390/monwriter.h
new file mode 100644
index 0000000..f0cbf96
--- /dev/null
+++ b/include/asm-s390/monwriter.h
@@ -0,0 +1,33 @@
+/*
+ * include/asm-s390/monwriter.h
+ *
+ * Copyright (C) IBM Corp. 2006
+ * Character device driver for writing z/VM APPLDATA monitor records
+ * Version 1.0
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ *
+ */
+
+#ifndef _ASM_390_MONWRITER_H
+#define _ASM_390_MONWRITER_H
+
+/* mon_function values */
+#define MONWRITE_START_INTERVAL	0x00 /* start interval recording */
+#define MONWRITE_STOP_INTERVAL	0x01 /* stop interval or config recording */
+#define MONWRITE_GEN_EVENT	0x02 /* generate event record */
+#define MONWRITE_START_CONFIG	0x03 /* start configuration recording */
+
+/* the header the app uses in its write() data */
+struct monwrite_hdr {
+	unsigned char mon_function;
+	unsigned short applid;
+	unsigned char record_num;
+	unsigned short version;
+	unsigned short release;
+	unsigned short mod_level;
+	unsigned short datalen;
+	unsigned char hdrlen;
+
+} __attribute__((packed));
+
+#endif /* _ASM_390_MONWRITER_H */
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index a78e853..803bc70 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -22,6 +22,16 @@
 extern void diag10(unsigned long addr);
 
 /*
+ * Page allocation orders.
+ */
+#ifndef __s390x__
+# define PGD_ALLOC_ORDER	1
+#else /* __s390x__ */
+# define PMD_ALLOC_ORDER	2
+# define PGD_ALLOC_ORDER	2
+#endif /* __s390x__ */
+
+/*
  * Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
  * if any.
@@ -29,30 +39,23 @@
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	pgd_t *pgd;
+	pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER);
 	int i;
 
+	if (!pgd)
+		return NULL;
+	for (i = 0; i < PTRS_PER_PGD; i++)
 #ifndef __s390x__
-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1);
-        if (pgd != NULL)
-		for (i = 0; i < USER_PTRS_PER_PGD; i++)
-			pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
-#else /* __s390x__ */
-	pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2);
-        if (pgd != NULL)
-		for (i = 0; i < PTRS_PER_PGD; i++)
-			pgd_clear(pgd + i);
-#endif /* __s390x__ */
+		pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
+#else
+		pgd_clear(pgd + i);
+#endif
 	return pgd;
 }
 
 static inline void pgd_free(pgd_t *pgd)
 {
-#ifndef __s390x__
-        free_pages((unsigned long) pgd, 1);
-#else /* __s390x__ */
-        free_pages((unsigned long) pgd, 2);
-#endif /* __s390x__ */
+	free_pages((unsigned long) pgd, PGD_ALLOC_ORDER);
 }
 
 #ifndef __s390x__
@@ -68,20 +71,19 @@
 #else /* __s390x__ */
 static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pmd_t *pmd;
-        int i;
+	pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER);
+	int i;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2);
-	if (pmd != NULL) {
-		for (i=0; i < PTRS_PER_PMD; i++)
-			pmd_clear(pmd+i);
-	}
+	if (!pmd)
+		return NULL;
+	for (i=0; i < PTRS_PER_PMD; i++)
+		pmd_clear(pmd + i);
 	return pmd;
 }
 
 static inline void pmd_free (pmd_t *pmd)
 {
-	free_pages((unsigned long) pmd, 2);
+	free_pages((unsigned long) pmd, PMD_ALLOC_ORDER);
 }
 
 #define __pmd_free_tlb(tlb,pmd)			\
@@ -123,15 +125,14 @@
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
 {
-	pte_t *pte;
-        int i;
+	pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	int i;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-	if (pte != NULL) {
-		for (i=0; i < PTRS_PER_PTE; i++) {
-			pte_clear(mm, vmaddr, pte+i);
-			vmaddr += PAGE_SIZE;
-		}
+	if (!pte)
+		return NULL;
+	for (i=0; i < PTRS_PER_PTE; i++) {
+		pte_clear(mm, vmaddr, pte + i);
+		vmaddr += PAGE_SIZE;
 	}
 	return pte;
 }
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 2431238..1a07028 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -89,19 +89,6 @@
 # define PTRS_PER_PGD    2048
 #endif /* __s390x__ */
 
-/*
- * pgd entries used up by user/kernel:
- */
-#ifndef __s390x__
-# define USER_PTRS_PER_PGD  512
-# define USER_PGD_PTRS      512
-# define KERNEL_PGD_PTRS    512
-#else /* __s390x__ */
-# define USER_PTRS_PER_PGD  2048
-# define USER_PGD_PTRS      2048
-# define KERNEL_PGD_PTRS    2048
-#endif /* __s390x__ */
-
 #define FIRST_USER_ADDRESS  0
 
 #define pte_ERROR(e) \
@@ -216,12 +203,14 @@
 #define _PAGE_RO        0x200          /* HW read-only                     */
 #define _PAGE_INVALID   0x400          /* HW invalid                       */
 
-/* Mask and four different kinds of invalid pages. */
-#define _PAGE_INVALID_MASK	0x601
-#define _PAGE_INVALID_EMPTY	0x400
-#define _PAGE_INVALID_NONE	0x401
-#define _PAGE_INVALID_SWAP	0x600
-#define _PAGE_INVALID_FILE	0x601
+/* Mask and six different types of pages. */
+#define _PAGE_TYPE_MASK		0x601
+#define _PAGE_TYPE_EMPTY	0x400
+#define _PAGE_TYPE_NONE		0x401
+#define _PAGE_TYPE_SWAP		0x600
+#define _PAGE_TYPE_FILE		0x601
+#define _PAGE_TYPE_RO		0x200
+#define _PAGE_TYPE_RW		0x000
 
 #ifndef __s390x__
 
@@ -280,15 +269,14 @@
 #endif /* __s390x__ */
 
 /*
- * No mapping available
+ * Page protection definitions.
  */
-#define PAGE_NONE_SHARED  __pgprot(_PAGE_INVALID_NONE)
-#define PAGE_NONE_PRIVATE __pgprot(_PAGE_INVALID_NONE)
-#define PAGE_RO_SHARED	  __pgprot(_PAGE_RO)
-#define PAGE_RO_PRIVATE	  __pgprot(_PAGE_RO)
-#define PAGE_COPY	  __pgprot(_PAGE_RO)
-#define PAGE_SHARED	  __pgprot(0)
-#define PAGE_KERNEL	  __pgprot(0)
+#define PAGE_NONE	__pgprot(_PAGE_TYPE_NONE)
+#define PAGE_RO		__pgprot(_PAGE_TYPE_RO)
+#define PAGE_RW		__pgprot(_PAGE_TYPE_RW)
+
+#define PAGE_KERNEL	PAGE_RW
+#define PAGE_COPY	PAGE_RO
 
 /*
  * The S390 can't do page protection for execute, and considers that the
@@ -296,23 +284,23 @@
  * the closest we can get..
  */
          /*xwr*/
-#define __P000  PAGE_NONE_PRIVATE
-#define __P001  PAGE_RO_PRIVATE
-#define __P010  PAGE_COPY
-#define __P011  PAGE_COPY
-#define __P100  PAGE_RO_PRIVATE
-#define __P101  PAGE_RO_PRIVATE
-#define __P110  PAGE_COPY
-#define __P111  PAGE_COPY
+#define __P000	PAGE_NONE
+#define __P001	PAGE_RO
+#define __P010	PAGE_RO
+#define __P011	PAGE_RO
+#define __P100	PAGE_RO
+#define __P101	PAGE_RO
+#define __P110	PAGE_RO
+#define __P111	PAGE_RO
 
-#define __S000  PAGE_NONE_SHARED
-#define __S001  PAGE_RO_SHARED
-#define __S010  PAGE_SHARED
-#define __S011  PAGE_SHARED
-#define __S100  PAGE_RO_SHARED
-#define __S101  PAGE_RO_SHARED
-#define __S110  PAGE_SHARED
-#define __S111  PAGE_SHARED
+#define __S000	PAGE_NONE
+#define __S001	PAGE_RO
+#define __S010	PAGE_RW
+#define __S011	PAGE_RW
+#define __S100	PAGE_RO
+#define __S101	PAGE_RO
+#define __S110	PAGE_RW
+#define __S111	PAGE_RW
 
 /*
  * Certain architectures need to do special things when PTEs
@@ -377,18 +365,18 @@
 
 static inline int pte_none(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_EMPTY;
+	return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_EMPTY;
 }
 
 static inline int pte_present(pte_t pte)
 {
 	return !(pte_val(pte) & _PAGE_INVALID) ||
-		(pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_NONE;
+		(pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_NONE;
 }
 
 static inline int pte_file(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_INVALID_MASK) == _PAGE_INVALID_FILE;
+	return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_FILE;
 }
 
 #define pte_same(a,b)	(pte_val(a) == pte_val(b))
@@ -461,7 +449,7 @@
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-	pte_val(*ptep) = _PAGE_INVALID_EMPTY;
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
 }
 
 /*
@@ -477,7 +465,7 @@
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	/* Do not clobber _PAGE_INVALID_NONE pages!  */
+	/* Do not clobber _PAGE_TYPE_NONE pages!  */
 	if (!(pte_val(pte) & _PAGE_INVALID))
 		pte_val(pte) |= _PAGE_RO;
 	return pte;
@@ -556,26 +544,30 @@
 	return pte;
 }
 
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+#ifndef __s390x__
+		/* S390 has 1mb segments, we are emulating 4MB segments */
+		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
+#else
+		/* ipte in zarch mode can do the math */
+		pte_t *pto = ptep;
+#endif
+		asm volatile ("ipte %2,%3"
+			      : "=m" (*ptep) : "m" (*ptep),
+				"a" (pto), "a" (address) );
+	}
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+}
+
 static inline pte_t
 ptep_clear_flush(struct vm_area_struct *vma,
 		 unsigned long address, pte_t *ptep)
 {
 	pte_t pte = *ptep;
-#ifndef __s390x__
-	if (!(pte_val(pte) & _PAGE_INVALID)) {
-		/* S390 has 1mb segments, we are emulating 4MB segments */
-		pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-		__asm__ __volatile__ ("ipte %2,%3"
-				      : "=m" (*ptep) : "m" (*ptep),
-				        "a" (pto), "a" (address) );
-	}
-#else /* __s390x__ */
-	if (!(pte_val(pte) & _PAGE_INVALID)) 
-		__asm__ __volatile__ ("ipte %2,%3"
-				      : "=m" (*ptep) : "m" (*ptep),
-				        "a" (ptep), "a" (address) );
-#endif /* __s390x__ */
-	pte_val(*ptep) = _PAGE_INVALID_EMPTY;
+
+	__ptep_ipte(address, ptep);
 	return pte;
 }
 
@@ -755,7 +747,7 @@
 {
 	pte_t pte;
 	offset &= __SWP_OFFSET_MASK;
-	pte_val(pte) = _PAGE_INVALID_SWAP | ((type & 0x1f) << 2) |
+	pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) |
 		((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
 	return pte;
 }
@@ -778,7 +770,7 @@
 
 #define pgoff_to_pte(__off) \
 	((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
-		   | _PAGE_INVALID_FILE })
+		   | _PAGE_TYPE_FILE })
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 5b71d37..a3a4e5f 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -339,4 +339,21 @@
 
 #endif
 
+/*
+ * Helper macro for exception table entries
+ */
+#ifndef __s390x__
+#define EX_TABLE(_fault,_target)			\
+	".section __ex_table,\"a\"\n"			\
+	"	.align 4\n"				\
+	"	.long  " #_fault "," #_target "\n"	\
+	".previous\n"
+#else
+#define EX_TABLE(_fault,_target)			\
+	".section __ex_table,\"a\"\n"			\
+	"	.align 8\n"				\
+	"	.quad  " #_fault "," #_target "\n"	\
+	".previous\n"
+#endif
+
 #endif                                 /* __ASM_S390_PROCESSOR_H           */
diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h
index 19e3197..f195973 100644
--- a/include/asm-s390/setup.h
+++ b/include/asm-s390/setup.h
@@ -14,8 +14,6 @@
 
 #define PARMAREA		0x10400
 #define COMMAND_LINE_SIZE 	896
-#define RAMDISK_ORIGIN		0x800000
-#define RAMDISK_SIZE		0x800000
 #define MEMORY_CHUNKS		16	/* max 0x7fff */
 #define IPL_PARMBLOCK_ORIGIN	0x2000
 
@@ -46,10 +44,12 @@
 #define MACHINE_HAS_IEEE	(machine_flags & 2)
 #define MACHINE_HAS_CSP		(machine_flags & 8)
 #define MACHINE_HAS_DIAG44	(1)
+#define MACHINE_HAS_MVCOS	(0)
 #else /* __s390x__ */
 #define MACHINE_HAS_IEEE	(1)
 #define MACHINE_HAS_CSP		(1)
 #define MACHINE_HAS_DIAG44	(machine_flags & 32)
+#define MACHINE_HAS_MVCOS	(machine_flags & 512)
 #endif /* __s390x__ */
 
 
@@ -70,52 +70,76 @@
 #define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
 #define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
 
-struct ipl_list_header {
-	u32 length;
-	u8  reserved[3];
+
+struct ipl_list_hdr {
+	u32 len;
+	u8  reserved1[3];
 	u8  version;
+	u32 blk0_len;
+	u8  pbt;
+	u8  flags;
+	u16 reserved2;
 } __attribute__((packed));
 
 struct ipl_block_fcp {
-	u32 length;
-	u8  pbt;
-	u8  reserved1[322-1];
+	u8  reserved1[313-1];
+	u8  opt;
+	u8  reserved2[3];
+	u16 reserved3;
 	u16 devno;
-	u8  reserved2[4];
+	u8  reserved4[4];
 	u64 wwpn;
 	u64 lun;
 	u32 bootprog;
-	u8  reserved3[12];
+	u8  reserved5[12];
 	u64 br_lba;
 	u32 scp_data_len;
-	u8  reserved4[260];
+	u8  reserved6[260];
 	u8  scp_data[];
 } __attribute__((packed));
 
-struct ipl_parameter_block {
-	union {
-		u32 length;
-		struct ipl_list_header header;
-	} hdr;
-	struct ipl_block_fcp fcp;
+struct ipl_block_ccw {
+	u8  load_param[8];
+	u8  reserved1[84];
+	u8  reserved2[2];
+	u16 devno;
+	u8  vm_flags;
+	u8  reserved3[3];
+	u32 vm_parm_len;
 } __attribute__((packed));
 
-#define IPL_MAX_SUPPORTED_VERSION (0)
+struct ipl_parameter_block {
+	struct ipl_list_hdr hdr;
+	union {
+		struct ipl_block_fcp fcp;
+		struct ipl_block_ccw ccw;
+	} ipl_info;
+} __attribute__((packed));
 
-#define IPL_TYPE_FCP (0)
+#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
+			      sizeof(struct ipl_block_fcp))
+
+#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
+			      sizeof(struct ipl_block_ccw))
+
+#define IPL_MAX_SUPPORTED_VERSION (0)
 
 /*
  * IPL validity flags and parameters as detected in head.S
  */
-extern u32 ipl_parameter_flags;
+extern u32 ipl_flags;
 extern u16 ipl_devno;
 
-#define IPL_DEVNO_VALID		(ipl_parameter_flags & 1)
-#define IPL_PARMBLOCK_VALID	(ipl_parameter_flags & 2)
+void do_reipl(void);
+
+enum {
+	IPL_DEVNO_VALID	= 1,
+	IPL_PARMBLOCK_VALID = 2,
+};
 
 #define IPL_PARMBLOCK_START	((struct ipl_parameter_block *) \
 				 IPL_PARMBLOCK_ORIGIN)
-#define IPL_PARMBLOCK_SIZE	(IPL_PARMBLOCK_START->hdr.length)
+#define IPL_PARMBLOCK_SIZE	(IPL_PARMBLOCK_START->hdr.len)
 
 #else /* __ASSEMBLY__ */
 
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 6576460..9fb02e9 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -104,7 +104,7 @@
 #define smp_cpu_not_running(cpu)	1
 #define smp_get_cpu(cpu) ({ 0; })
 #define smp_put_cpu(cpu) ({ 0; })
-#define smp_setup_cpu_possible_map()
+#define smp_setup_cpu_possible_map()	do { } while (0)
 #endif
 
 #endif
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index 0b7c0ca..e2047b0 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -47,7 +47,7 @@
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
 	asm volatile ("lctlg 7,7,%0" : : "m" (__pto) );			\
 })
-#else
+#else /* __s390x__ */
 #define set_fs(x) \
 ({									\
 	unsigned long __pto;						\
@@ -56,7 +56,7 @@
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
 	asm volatile ("lctl  7,7,%0" : : "m" (__pto) );			\
 })
-#endif
+#endif /* __s390x__ */
 
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
@@ -85,76 +85,51 @@
         unsigned long insn, fixup;
 };
 
-#ifndef __s390x__
-#define __uaccess_fixup \
-	".section .fixup,\"ax\"\n"	\
-	"2: lhi    %0,%4\n"		\
-	"   bras   1,3f\n"		\
-	"   .long  1b\n"		\
-	"3: l      1,0(1)\n"		\
-	"   br     1\n"			\
-	".previous\n"			\
-	".section __ex_table,\"a\"\n"	\
-	"   .align 4\n"			\
-	"   .long  0b,2b\n"		\
-	".previous"
-#define __uaccess_clobber "cc", "1"
-#else /* __s390x__ */
-#define __uaccess_fixup \
-	".section .fixup,\"ax\"\n"	\
-	"2: lghi   %0,%4\n"		\
-	"   jg     1b\n"		\
-	".previous\n"			\
-	".section __ex_table,\"a\"\n"	\
-	"   .align 8\n"			\
-	"   .quad  0b,2b\n"		\
-	".previous"
-#define __uaccess_clobber "cc"
-#endif /* __s390x__ */
+struct uaccess_ops {
+	size_t (*copy_from_user)(size_t, const void __user *, void *);
+	size_t (*copy_from_user_small)(size_t, const void __user *, void *);
+	size_t (*copy_to_user)(size_t, void __user *, const void *);
+	size_t (*copy_to_user_small)(size_t, void __user *, const void *);
+	size_t (*copy_in_user)(size_t, void __user *, const void __user *);
+	size_t (*clear_user)(size_t, void __user *);
+	size_t (*strnlen_user)(size_t, const char __user *);
+	size_t (*strncpy_from_user)(size_t, const char __user *, char *);
+	int (*futex_atomic_op)(int op, int __user *, int oparg, int *old);
+	int (*futex_atomic_cmpxchg)(int __user *, int old, int new);
+};
+
+extern struct uaccess_ops uaccess;
+extern struct uaccess_ops uaccess_std;
+extern struct uaccess_ops uaccess_mvcos;
+
+static inline int __put_user_fn(size_t size, void __user *ptr, void *x)
+{
+	size = uaccess.copy_to_user_small(size, ptr, x);
+	return size ? -EFAULT : size;
+}
+
+static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
+{
+	size = uaccess.copy_from_user_small(size, ptr, x);
+	return size ? -EFAULT : size;
+}
 
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
  */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-#define __put_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile(						\
-		"0: mvcs  0(%1,%2),%3,%0\n"			\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err)					\
-		: "d" (sizeof(*(ptr))), "a" (ptr), "Q" (x),	\
-		  "K" (-EFAULT)					\
-		: __uaccess_clobber );				\
-})
-#else
-#define __put_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile(						\
-		"0: mvcs  0(%1,%2),0(%3),%0\n"			\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err)					\
-		: "d" (sizeof(*(ptr))), "a" (ptr), "a" (&(x)),	\
-		  "K" (-EFAULT), "m" (x)			\
-		: __uaccess_clobber );				\
-})
-#endif
-
 #define __put_user(x, ptr) \
 ({								\
 	__typeof__(*(ptr)) __x = (x);				\
-	int __pu_err;						\
+	int __pu_err = -EFAULT;					\
         __chk_user_ptr(ptr);                                    \
 	switch (sizeof (*(ptr))) {				\
 	case 1:							\
 	case 2:							\
 	case 4:							\
 	case 8:							\
-		__put_user_asm(__x, ptr, __pu_err);		\
+		__pu_err = __put_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		break;						\
 	default:						\
 		__put_user_bad();				\
@@ -172,60 +147,36 @@
 
 extern int __put_user_bad(void) __attribute__((noreturn));
 
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-#define __get_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile (						\
-		"0: mvcp  %O1(%2,%R1),0(%3),%0\n"		\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err), "=Q" (x)				\
-		: "d" (sizeof(*(ptr))), "a" (ptr),		\
-		  "K" (-EFAULT)					\
-		: __uaccess_clobber );				\
-})
-#else
-#define __get_user_asm(x, ptr, err) \
-({								\
-	err = 0;						\
-	asm volatile (						\
-		"0: mvcp  0(%2,%5),0(%3),%0\n"			\
-		"1:\n"						\
-		__uaccess_fixup					\
-		: "+&d" (err), "=m" (x)				\
-		: "d" (sizeof(*(ptr))), "a" (ptr),		\
-		  "K" (-EFAULT), "a" (&(x))			\
-		: __uaccess_clobber );				\
-})
-#endif
-
 #define __get_user(x, ptr)					\
 ({								\
-	int __gu_err;						\
-        __chk_user_ptr(ptr);                                    \
+	int __gu_err = -EFAULT;					\
+	__chk_user_ptr(ptr);					\
 	switch (sizeof(*(ptr))) {				\
 	case 1: {						\
 		unsigned char __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 2: {						\
 		unsigned short __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 4: {						\
 		unsigned int __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 8: {						\
 		unsigned long long __x;				\
-		__get_user_asm(__x, ptr, __gu_err);		\
+		__gu_err = __get_user_fn(sizeof (*(ptr)),	\
+					 ptr, &__x);		\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
@@ -247,8 +198,6 @@
 #define __put_user_unaligned __put_user
 #define __get_user_unaligned __get_user
 
-extern long __copy_to_user_asm(const void *from, long n, void __user *to);
-
 /**
  * __copy_to_user: - Copy a block of data into user space, with less checking.
  * @to:   Destination address, in user space.
@@ -266,7 +215,10 @@
 static inline unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	return __copy_to_user_asm(from, n, to);
+	if (__builtin_constant_p(n) && (n <= 256))
+		return uaccess.copy_to_user_small(n, to, from);
+	else
+		return uaccess.copy_to_user(n, to, from);
 }
 
 #define __copy_to_user_inatomic __copy_to_user
@@ -294,8 +246,6 @@
 	return n;
 }
 
-extern long __copy_from_user_asm(void *to, long n, const void __user *from);
-
 /**
  * __copy_from_user: - Copy a block of data from user space, with less checking.
  * @to:   Destination address, in kernel space.
@@ -316,7 +266,10 @@
 static inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	return __copy_from_user_asm(to, n, from);
+	if (__builtin_constant_p(n) && (n <= 256))
+		return uaccess.copy_from_user_small(n, from, to);
+	else
+		return uaccess.copy_from_user(n, from, to);
 }
 
 /**
@@ -346,13 +299,10 @@
 	return n;
 }
 
-extern unsigned long __copy_in_user_asm(const void __user *from, long n,
-							void __user *to);
-
 static inline unsigned long
 __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	return __copy_in_user_asm(from, n, to);
+	return uaccess.copy_in_user(n, to, from);
 }
 
 static inline unsigned long
@@ -360,34 +310,28 @@
 {
 	might_sleep();
 	if (__access_ok(from,n) && __access_ok(to,n))
-		n = __copy_in_user_asm(from, n, to);
+		n = __copy_in_user(to, from, n);
 	return n;
 }
 
 /*
  * Copy a null terminated string from userspace.
  */
-extern long __strncpy_from_user_asm(long count, char *dst,
-					const char __user *src);
-
 static inline long
 strncpy_from_user(char *dst, const char __user *src, long count)
 {
         long res = -EFAULT;
         might_sleep();
         if (access_ok(VERIFY_READ, src, 1))
-                res = __strncpy_from_user_asm(count, dst, src);
+		res = uaccess.strncpy_from_user(count, src, dst);
         return res;
 }
 
-
-extern long __strnlen_user_asm(long count, const char __user *src);
-
 static inline unsigned long
 strnlen_user(const char __user * src, unsigned long n)
 {
 	might_sleep();
-	return __strnlen_user_asm(n, src);
+	return uaccess.strnlen_user(n, src);
 }
 
 /**
@@ -410,12 +354,10 @@
  * Zero Userspace
  */
 
-extern long __clear_user_asm(void __user *to, long n);
-
 static inline unsigned long
 __clear_user(void __user *to, unsigned long n)
 {
-	return __clear_user_asm(to, n);
+	return uaccess.clear_user(n, to);
 }
 
 static inline unsigned long
@@ -423,7 +365,7 @@
 {
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, to, n))
-		n = __clear_user_asm(to, n);
+		n = uaccess.clear_user(n, to);
 	return n;
 }
 
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index aa7a243..02b942d 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -25,17 +25,12 @@
 #define __NR_unlink              10
 #define __NR_execve              11
 #define __NR_chdir               12
-#define __NR_time                13
 #define __NR_mknod               14
 #define __NR_chmod               15
-#define __NR_lchown              16
 #define __NR_lseek               19
 #define __NR_getpid              20
 #define __NR_mount               21
 #define __NR_umount              22
-#define __NR_setuid              23
-#define __NR_getuid              24
-#define __NR_stime               25
 #define __NR_ptrace              26
 #define __NR_alarm               27
 #define __NR_pause               29
@@ -51,11 +46,7 @@
 #define __NR_pipe                42
 #define __NR_times               43
 #define __NR_brk                 45
-#define __NR_setgid              46
-#define __NR_getgid              47
 #define __NR_signal              48
-#define __NR_geteuid             49
-#define __NR_getegid             50
 #define __NR_acct                51
 #define __NR_umount2             52
 #define __NR_ioctl               54
@@ -69,18 +60,13 @@
 #define __NR_getpgrp             65
 #define __NR_setsid              66
 #define __NR_sigaction           67
-#define __NR_setreuid            70
-#define __NR_setregid            71
 #define __NR_sigsuspend          72
 #define __NR_sigpending          73
 #define __NR_sethostname         74
 #define __NR_setrlimit           75
-#define __NR_getrlimit           76
 #define __NR_getrusage           77
 #define __NR_gettimeofday        78
 #define __NR_settimeofday        79
-#define __NR_getgroups           80
-#define __NR_setgroups           81
 #define __NR_symlink             83
 #define __NR_readlink            85
 #define __NR_uselib              86
@@ -92,12 +78,10 @@
 #define __NR_truncate            92
 #define __NR_ftruncate           93
 #define __NR_fchmod              94
-#define __NR_fchown              95
 #define __NR_getpriority         96
 #define __NR_setpriority         97
 #define __NR_statfs              99
 #define __NR_fstatfs            100
-#define __NR_ioperm             101
 #define __NR_socketcall         102
 #define __NR_syslog             103
 #define __NR_setitimer          104
@@ -131,11 +115,7 @@
 #define __NR_sysfs              135
 #define __NR_personality        136
 #define __NR_afs_syscall        137 /* Syscall for Andrew File System */
-#define __NR_setfsuid           138
-#define __NR_setfsgid           139
-#define __NR__llseek            140
 #define __NR_getdents           141
-#define __NR__newselect         142
 #define __NR_flock              143
 #define __NR_msync              144
 #define __NR_readv              145
@@ -157,13 +137,9 @@
 #define __NR_sched_rr_get_interval      161
 #define __NR_nanosleep          162
 #define __NR_mremap             163
-#define __NR_setresuid          164
-#define __NR_getresuid          165
 #define __NR_query_module       167
 #define __NR_poll               168
 #define __NR_nfsservctl         169
-#define __NR_setresgid          170
-#define __NR_getresgid          171
 #define __NR_prctl              172
 #define __NR_rt_sigreturn       173
 #define __NR_rt_sigaction       174
@@ -174,7 +150,6 @@
 #define __NR_rt_sigsuspend      179
 #define __NR_pread64            180
 #define __NR_pwrite64           181
-#define __NR_chown              182
 #define __NR_getcwd             183
 #define __NR_capget             184
 #define __NR_capset             185
@@ -183,39 +158,11 @@
 #define __NR_getpmsg		188
 #define __NR_putpmsg		189
 #define __NR_vfork		190
-#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#define __NR_lchown32		198
-#define __NR_getuid32		199
-#define __NR_getgid32		200
-#define __NR_geteuid32		201
-#define __NR_getegid32		202
-#define __NR_setreuid32		203
-#define __NR_setregid32		204
-#define __NR_getgroups32	205
-#define __NR_setgroups32	206
-#define __NR_fchown32		207
-#define __NR_setresuid32	208
-#define __NR_getresuid32	209
-#define __NR_setresgid32	210
-#define __NR_getresgid32	211
-#define __NR_chown32		212
-#define __NR_setuid32		213
-#define __NR_setgid32		214
-#define __NR_setfsuid32		215
-#define __NR_setfsgid32		216
 #define __NR_pivot_root         217
 #define __NR_mincore            218
 #define __NR_madvise            219
 #define __NR_getdents64		220
-#define __NR_fcntl64		221
 #define __NR_readahead		222
-#define __NR_sendfile64		223
 #define __NR_setxattr		224
 #define __NR_lsetxattr		225
 #define __NR_fsetxattr		226
@@ -256,7 +203,6 @@
 #define __NR_clock_getres	(__NR_timer_create+7)
 #define __NR_clock_nanosleep	(__NR_timer_create+8)
 /* Number 263 is reserved for vserver */
-#define __NR_fadvise64_64	264
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
 #define __NR_remap_file_pages	267
@@ -285,7 +231,6 @@
 #define __NR_mknodat		290
 #define __NR_fchownat		291
 #define __NR_futimesat		292
-#define __NR_fstatat64		293
 #define __NR_unlinkat		294
 #define __NR_renameat		295
 #define __NR_linkat		296
@@ -310,62 +255,65 @@
  * have a different name although they do the same (e.g. __NR_chown32
  * is __NR_chown on 64 bit).
  */
-#ifdef __s390x__
-#undef  __NR_time
-#undef  __NR_lchown
-#undef  __NR_setuid
-#undef  __NR_getuid
-#undef  __NR_stime
-#undef  __NR_setgid
-#undef  __NR_getgid
-#undef  __NR_geteuid
-#undef  __NR_getegid
-#undef  __NR_setreuid
-#undef  __NR_setregid
-#undef  __NR_getrlimit
-#undef  __NR_getgroups
-#undef  __NR_setgroups
-#undef  __NR_fchown
-#undef  __NR_ioperm
-#undef  __NR_setfsuid
-#undef  __NR_setfsgid
-#undef  __NR__llseek
-#undef  __NR__newselect
-#undef  __NR_setresuid
-#undef  __NR_getresuid
-#undef  __NR_setresgid
-#undef  __NR_getresgid
-#undef  __NR_chown
-#undef  __NR_ugetrlimit
-#undef  __NR_mmap2
-#undef  __NR_truncate64
-#undef  __NR_ftruncate64
-#undef  __NR_stat64
-#undef  __NR_lstat64
-#undef  __NR_fstat64
-#undef  __NR_lchown32
-#undef  __NR_getuid32
-#undef  __NR_getgid32
-#undef  __NR_geteuid32
-#undef  __NR_getegid32
-#undef  __NR_setreuid32
-#undef  __NR_setregid32
-#undef  __NR_getgroups32
-#undef  __NR_setgroups32
-#undef  __NR_fchown32
-#undef  __NR_setresuid32
-#undef  __NR_getresuid32
-#undef  __NR_setresgid32
-#undef  __NR_getresgid32
-#undef  __NR_chown32
-#undef  __NR_setuid32
-#undef  __NR_setgid32
-#undef  __NR_setfsuid32
-#undef  __NR_setfsgid32
-#undef  __NR_fcntl64
-#undef  __NR_sendfile64
-#undef  __NR_fadvise64_64
-#undef  __NR_fstatat64
+#ifndef __s390x__
+
+#define __NR_time		 13
+#define __NR_lchown		 16
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_getrlimit		 76
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_fchown		 95
+#define __NR_ioperm		101
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR__newselect 	142
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_chown		182
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_fcntl64		221
+#define __NR_sendfile64		223
+#define __NR_fadvise64_64	264
+#define __NR_fstatat64		293
+
+#else
 
 #define __NR_select		142
 #define __NR_getrlimit		191	/* SuS compliant getrlimit */
diff --git a/include/asm-s390/z90crypt.h b/include/asm-s390/z90crypt.h
deleted file mode 100644
index 31a2439..0000000
--- a/include/asm-s390/z90crypt.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- *  include/asm-s390/z90crypt.h
- *
- *  z90crypt 1.3.3 (user-visible header)
- *
- *  Copyright (C)  2001, 2005 IBM Corporation
- *  Author(s): Robert Burroughs
- *             Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __ASM_S390_Z90CRYPT_H
-#define __ASM_S390_Z90CRYPT_H
-#include <linux/ioctl.h>
-
-#define z90crypt_VERSION 1
-#define z90crypt_RELEASE 3	// 2 = PCIXCC, 3 = rewrite for coding standards
-#define z90crypt_VARIANT 3	// 3 = CEX2A support
-
-/**
- * struct ica_rsa_modexpo
- *
- * Requirements:
- * - outputdatalength is at least as large as inputdatalength.
- * - All key parts are right justified in their fields, padded on
- *   the left with zeroes.
- * - length(b_key) = inputdatalength
- * - length(n_modulus) = inputdatalength
- */
-struct ica_rsa_modexpo {
-	char __user *	inputdata;
-	unsigned int	inputdatalength;
-	char __user *	outputdata;
-	unsigned int	outputdatalength;
-	char __user *	b_key;
-	char __user *	n_modulus;
-};
-
-/**
- * struct ica_rsa_modexpo_crt
- *
- * Requirements:
- * - inputdatalength is even.
- * - outputdatalength is at least as large as inputdatalength.
- * - All key parts are right justified in their fields, padded on
- *   the left with zeroes.
- * - length(bp_key)	= inputdatalength/2 + 8
- * - length(bq_key)	= inputdatalength/2
- * - length(np_key)	= inputdatalength/2 + 8
- * - length(nq_key)	= inputdatalength/2
- * - length(u_mult_inv) = inputdatalength/2 + 8
- */
-struct ica_rsa_modexpo_crt {
-	char __user *	inputdata;
-	unsigned int	inputdatalength;
-	char __user *	outputdata;
-	unsigned int	outputdatalength;
-	char __user *	bp_key;
-	char __user *	bq_key;
-	char __user *	np_prime;
-	char __user *	nq_prime;
-	char __user *	u_mult_inv;
-};
-
-#define Z90_IOCTL_MAGIC 'z'  // NOTE:  Need to allocate from linux folks
-
-/**
- * Interface notes:
- *
- * The ioctl()s which are implemented (along with relevant details)
- * are:
- *
- *   ICARSAMODEXPO
- *     Perform an RSA operation using a Modulus-Exponent pair
- *     This takes an ica_rsa_modexpo struct as its arg.
- *
- *     NOTE: please refer to the comments preceding this structure
- *           for the implementation details for the contents of the
- *           block
- *
- *   ICARSACRT
- *     Perform an RSA operation using a Chinese-Remainder Theorem key
- *     This takes an ica_rsa_modexpo_crt struct as its arg.
- *
- *     NOTE: please refer to the comments preceding this structure
- *           for the implementation details for the contents of the
- *           block
- *
- *   Z90STAT_TOTALCOUNT
- *     Return an integer count of all device types together.
- *
- *   Z90STAT_PCICACOUNT
- *     Return an integer count of all PCICAs.
- *
- *   Z90STAT_PCICCCOUNT
- *     Return an integer count of all PCICCs.
- *
- *   Z90STAT_PCIXCCMCL2COUNT
- *     Return an integer count of all MCL2 PCIXCCs.
- *
- *   Z90STAT_PCIXCCMCL3COUNT
- *     Return an integer count of all MCL3 PCIXCCs.
- *
- *   Z90STAT_CEX2CCOUNT
- *     Return an integer count of all CEX2Cs.
- *
- *   Z90STAT_CEX2ACOUNT
- *     Return an integer count of all CEX2As.
- *
- *   Z90STAT_REQUESTQ_COUNT
- *     Return an integer count of the number of entries waiting to be
- *     sent to a device.
- *
- *   Z90STAT_PENDINGQ_COUNT
- *     Return an integer count of the number of entries sent to a
- *     device awaiting the reply.
- *
- *   Z90STAT_TOTALOPEN_COUNT
- *     Return an integer count of the number of open file handles.
- *
- *   Z90STAT_DOMAIN_INDEX
- *     Return the integer value of the Cryptographic Domain.
- *
- *   Z90STAT_STATUS_MASK
- *     Return an 64 element array of unsigned chars for the status of
- *     all devices.
- *       0x01: PCICA
- *       0x02: PCICC
- *       0x03: PCIXCC_MCL2
- *       0x04: PCIXCC_MCL3
- *       0x05: CEX2C
- *       0x06: CEX2A
- *       0x0d: device is disabled via the proc filesystem
- *
- *   Z90STAT_QDEPTH_MASK
- *     Return an 64 element array of unsigned chars for the queue
- *     depth of all devices.
- *
- *   Z90STAT_PERDEV_REQCNT
- *     Return an 64 element array of unsigned integers for the number
- *     of successfully completed requests per device since the device
- *     was detected and made available.
- *
- *   ICAZ90STATUS (deprecated)
- *     Return some device driver status in a ica_z90_status struct
- *     This takes an ica_z90_status struct as its arg.
- *
- *     NOTE: this ioctl() is deprecated, and has been replaced with
- *           single ioctl()s for each type of status being requested
- *
- *   Z90STAT_PCIXCCCOUNT (deprecated)
- *     Return an integer count of all PCIXCCs (MCL2 + MCL3).
- *     This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
- *     MCL2 PCIXCCs.
- *
- *   Z90QUIESCE (not recommended)
- *     Quiesce the driver.  This is intended to stop all new
- *     requests from being processed.  Its use is NOT recommended,
- *     except in circumstances where there is no other way to stop
- *     callers from accessing the driver.  Its original use was to
- *     allow the driver to be "drained" of work in preparation for
- *     a system shutdown.
- *
- *     NOTE: once issued, this ban on new work cannot be undone
- *           except by unloading and reloading the driver.
- */
-
-/**
- * Supported ioctl calls
- */
-#define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, Z90_IOCTL_MAGIC, 0x05, 0)
-#define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, Z90_IOCTL_MAGIC, 0x06, 0)
-
-/* DEPRECATED status calls (bound for removal at some point) */
-#define ICAZ90STATUS	_IOR(Z90_IOCTL_MAGIC, 0x10, struct ica_z90_status)
-#define Z90STAT_PCIXCCCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x43, int)
-
-/* unrelated to ICA callers */
-#define Z90QUIESCE	_IO(Z90_IOCTL_MAGIC, 0x11)
-
-/* New status calls */
-#define Z90STAT_TOTALCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x40, int)
-#define Z90STAT_PCICACOUNT	_IOR(Z90_IOCTL_MAGIC, 0x41, int)
-#define Z90STAT_PCICCCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x42, int)
-#define Z90STAT_PCIXCCMCL2COUNT	_IOR(Z90_IOCTL_MAGIC, 0x4b, int)
-#define Z90STAT_PCIXCCMCL3COUNT	_IOR(Z90_IOCTL_MAGIC, 0x4c, int)
-#define Z90STAT_CEX2CCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x4d, int)
-#define Z90STAT_CEX2ACOUNT	_IOR(Z90_IOCTL_MAGIC, 0x4e, int)
-#define Z90STAT_REQUESTQ_COUNT	_IOR(Z90_IOCTL_MAGIC, 0x44, int)
-#define Z90STAT_PENDINGQ_COUNT	_IOR(Z90_IOCTL_MAGIC, 0x45, int)
-#define Z90STAT_TOTALOPEN_COUNT _IOR(Z90_IOCTL_MAGIC, 0x46, int)
-#define Z90STAT_DOMAIN_INDEX	_IOR(Z90_IOCTL_MAGIC, 0x47, int)
-#define Z90STAT_STATUS_MASK	_IOR(Z90_IOCTL_MAGIC, 0x48, char[64])
-#define Z90STAT_QDEPTH_MASK	_IOR(Z90_IOCTL_MAGIC, 0x49, char[64])
-#define Z90STAT_PERDEV_REQCNT	_IOR(Z90_IOCTL_MAGIC, 0x4a, int[64])
-
-#endif /* __ASM_S390_Z90CRYPT_H */
diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h
new file mode 100644
index 0000000..7244c68
--- /dev/null
+++ b/include/asm-s390/zcrypt.h
@@ -0,0 +1,285 @@
+/*
+ *  include/asm-s390/zcrypt.h
+ *
+ *  zcrypt 2.1.0 (user-visible header)
+ *
+ *  Copyright (C)  2001, 2006 IBM Corporation
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ASM_S390_ZCRYPT_H
+#define __ASM_S390_ZCRYPT_H
+
+#define ZCRYPT_VERSION 2
+#define ZCRYPT_RELEASE 1
+#define ZCRYPT_VARIANT 0
+
+#include <linux/ioctl.h>
+#include <linux/compiler.h>
+
+/**
+ * struct ica_rsa_modexpo
+ *
+ * Requirements:
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(b_key) = inputdatalength
+ * - length(n_modulus) = inputdatalength
+ */
+struct ica_rsa_modexpo {
+	char __user *	inputdata;
+	unsigned int	inputdatalength;
+	char __user *	outputdata;
+	unsigned int	outputdatalength;
+	char __user *	b_key;
+	char __user *	n_modulus;
+};
+
+/**
+ * struct ica_rsa_modexpo_crt
+ *
+ * Requirements:
+ * - inputdatalength is even.
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(bp_key)	= inputdatalength/2 + 8
+ * - length(bq_key)	= inputdatalength/2
+ * - length(np_key)	= inputdatalength/2 + 8
+ * - length(nq_key)	= inputdatalength/2
+ * - length(u_mult_inv) = inputdatalength/2 + 8
+ */
+struct ica_rsa_modexpo_crt {
+	char __user *	inputdata;
+	unsigned int	inputdatalength;
+	char __user *	outputdata;
+	unsigned int	outputdatalength;
+	char __user *	bp_key;
+	char __user *	bq_key;
+	char __user *	np_prime;
+	char __user *	nq_prime;
+	char __user *	u_mult_inv;
+};
+
+/**
+ * CPRBX
+ *	  Note that all shorts and ints are big-endian.
+ *	  All pointer fields are 16 bytes long, and mean nothing.
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct ica_CPRBX {
+	unsigned short	cprb_len;	/* CPRB length	      220	 */
+	unsigned char	cprb_ver_id;	/* CPRB version id.   0x02	 */
+	unsigned char	pad_000[3];	/* Alignment pad bytes		 */
+	unsigned char	func_id[2];	/* function id	      0x5432	 */
+	unsigned char	cprb_flags[4];	/* Flags			 */
+	unsigned int	req_parml;	/* request parameter buffer len	 */
+	unsigned int	req_datal;	/* request data buffer		 */
+	unsigned int	rpl_msgbl;	/* reply  message block length	 */
+	unsigned int	rpld_parml;	/* replied parameter block len	 */
+	unsigned int	rpl_datal;	/* reply data block len		 */
+	unsigned int	rpld_datal;	/* replied data block len	 */
+	unsigned int	req_extbl;	/* request extension block len	 */
+	unsigned char	pad_001[4];	/* reserved			 */
+	unsigned int	rpld_extbl;	/* replied extension block len	 */
+	unsigned char	padx000[16 - sizeof (char *)];
+	unsigned char *	req_parmb;	/* request parm block 'address'	 */
+	unsigned char	padx001[16 - sizeof (char *)];
+	unsigned char *	req_datab;	/* request data block 'address'	 */
+	unsigned char	padx002[16 - sizeof (char *)];
+	unsigned char *	rpl_parmb;	/* reply parm block 'address'	 */
+	unsigned char	padx003[16 - sizeof (char *)];
+	unsigned char *	rpl_datab;	/* reply data block 'address'	 */
+	unsigned char	padx004[16 - sizeof (char *)];
+	unsigned char *	req_extb;	/* request extension block 'addr'*/
+	unsigned char	padx005[16 - sizeof (char *)];
+	unsigned char *	rpl_extb;	/* reply extension block 'addres'*/
+	unsigned short	ccp_rtcode;	/* server return code		 */
+	unsigned short	ccp_rscode;	/* server reason code		 */
+	unsigned int	mac_data_len;	/* Mac Data Length		 */
+	unsigned char	logon_id[8];	/* Logon Identifier		 */
+	unsigned char	mac_value[8];	/* Mac Value			 */
+	unsigned char	mac_content_flgs;/* Mac content flag byte	 */
+	unsigned char	pad_002;	/* Alignment			 */
+	unsigned short	domain;		/* Domain			 */
+	unsigned char	usage_domain[4];/* Usage domain			 */
+	unsigned char	cntrl_domain[4];/* Control domain		 */
+	unsigned char	S390enf_mask[4];/* S/390 enforcement mask	 */
+	unsigned char	pad_004[36];	/* reserved			 */
+};
+
+/**
+ * xcRB
+ */
+struct ica_xcRB {
+	unsigned short	agent_ID;
+	unsigned int	user_defined;
+	unsigned short	request_ID;
+	unsigned int	request_control_blk_length;
+	unsigned char	padding1[16 - sizeof (char *)];
+	char __user *	request_control_blk_addr;
+	unsigned int	request_data_length;
+	char		padding2[16 - sizeof (char *)];
+	char __user *	request_data_address;
+	unsigned int	reply_control_blk_length;
+	char		padding3[16 - sizeof (char *)];
+	char __user *	reply_control_blk_addr;
+	unsigned int	reply_data_length;
+	char		padding4[16 - sizeof (char *)];
+	char __user *	reply_data_addr;
+	unsigned short	priority_window;
+	unsigned int	status;
+} __attribute__((packed));
+#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
+
+#define ZCRYPT_IOCTL_MAGIC 'z'
+
+/**
+ * Interface notes:
+ *
+ * The ioctl()s which are implemented (along with relevant details)
+ * are:
+ *
+ *   ICARSAMODEXPO
+ *     Perform an RSA operation using a Modulus-Exponent pair
+ *     This takes an ica_rsa_modexpo struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   ICARSACRT
+ *     Perform an RSA operation using a Chinese-Remainder Theorem key
+ *     This takes an ica_rsa_modexpo_crt struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   Z90STAT_TOTALCOUNT
+ *     Return an integer count of all device types together.
+ *
+ *   Z90STAT_PCICACOUNT
+ *     Return an integer count of all PCICAs.
+ *
+ *   Z90STAT_PCICCCOUNT
+ *     Return an integer count of all PCICCs.
+ *
+ *   Z90STAT_PCIXCCMCL2COUNT
+ *     Return an integer count of all MCL2 PCIXCCs.
+ *
+ *   Z90STAT_PCIXCCMCL3COUNT
+ *     Return an integer count of all MCL3 PCIXCCs.
+ *
+ *   Z90STAT_CEX2CCOUNT
+ *     Return an integer count of all CEX2Cs.
+ *
+ *   Z90STAT_CEX2ACOUNT
+ *     Return an integer count of all CEX2As.
+ *
+ *   Z90STAT_REQUESTQ_COUNT
+ *     Return an integer count of the number of entries waiting to be
+ *     sent to a device.
+ *
+ *   Z90STAT_PENDINGQ_COUNT
+ *     Return an integer count of the number of entries sent to a
+ *     device awaiting the reply.
+ *
+ *   Z90STAT_TOTALOPEN_COUNT
+ *     Return an integer count of the number of open file handles.
+ *
+ *   Z90STAT_DOMAIN_INDEX
+ *     Return the integer value of the Cryptographic Domain.
+ *
+ *   Z90STAT_STATUS_MASK
+ *     Return an 64 element array of unsigned chars for the status of
+ *     all devices.
+ *	 0x01: PCICA
+ *	 0x02: PCICC
+ *	 0x03: PCIXCC_MCL2
+ *	 0x04: PCIXCC_MCL3
+ *	 0x05: CEX2C
+ *	 0x06: CEX2A
+ *	 0x0d: device is disabled via the proc filesystem
+ *
+ *   Z90STAT_QDEPTH_MASK
+ *     Return an 64 element array of unsigned chars for the queue
+ *     depth of all devices.
+ *
+ *   Z90STAT_PERDEV_REQCNT
+ *     Return an 64 element array of unsigned integers for the number
+ *     of successfully completed requests per device since the device
+ *     was detected and made available.
+ *
+ *   ICAZ90STATUS (deprecated)
+ *     Return some device driver status in a ica_z90_status struct
+ *     This takes an ica_z90_status struct as its arg.
+ *
+ *     NOTE: this ioctl() is deprecated, and has been replaced with
+ *	     single ioctl()s for each type of status being requested
+ *
+ *   Z90STAT_PCIXCCCOUNT (deprecated)
+ *     Return an integer count of all PCIXCCs (MCL2 + MCL3).
+ *     This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
+ *     MCL2 PCIXCCs.
+ *
+ *   Z90QUIESCE (not recommended)
+ *     Quiesce the driver.  This is intended to stop all new
+ *     requests from being processed.  Its use is NOT recommended,
+ *     except in circumstances where there is no other way to stop
+ *     callers from accessing the driver.  Its original use was to
+ *     allow the driver to be "drained" of work in preparation for
+ *     a system shutdown.
+ *
+ *     NOTE: once issued, this ban on new work cannot be undone
+ *	     except by unloading and reloading the driver.
+ */
+
+/**
+ * Supported ioctl calls
+ */
+#define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+
+/* New status calls */
+#define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
+#define Z90STAT_PCICACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int)
+#define Z90STAT_PCICCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int)
+#define Z90STAT_PCIXCCMCL2COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int)
+#define Z90STAT_PCIXCCMCL3COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int)
+#define Z90STAT_CEX2CCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int)
+#define Z90STAT_CEX2ACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int)
+#define Z90STAT_REQUESTQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
+#define Z90STAT_PENDINGQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
+#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
+#define Z90STAT_DOMAIN_INDEX	_IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
+#define Z90STAT_STATUS_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+#define Z90STAT_QDEPTH_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+#define Z90STAT_PERDEV_REQCNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+#endif /* __ASM_S390_ZCRYPT_H */
diff --git a/include/asm-sparc/Kbuild b/include/asm-sparc/Kbuild
index e2a57fd..b22b67a 100644
--- a/include/asm-sparc/Kbuild
+++ b/include/asm-sparc/Kbuild
@@ -1,6 +1,22 @@
 include include/asm-generic/Kbuild.asm
 
-unifdef-y += fbio.h perfctr.h psr.h
-header-y += apc.h asi.h auxio.h bpp.h head.h ipc.h jsflash.h	\
-	openpromio.h pbm.h pconf.h pgtsun4.h reg.h traps.h	\
-	turbosparc.h vfc_ioctls.h winmacro.h
+header-y += apc.h
+header-y += asi.h
+header-y += auxio.h
+header-y += bpp.h
+header-y += head.h
+header-y += ipc.h
+header-y += jsflash.h
+header-y += openpromio.h
+header-y += pbm.h
+header-y += pconf.h
+header-y += pgtsun4.h
+header-y += reg.h
+header-y += traps.h
+header-y += turbosparc.h
+header-y += vfc_ioctls.h
+header-y += winmacro.h
+
+unifdef-y += fbio.h
+unifdef-y += perfctr.h
+unifdef-y += psr.h
diff --git a/include/asm-sparc64/Kbuild b/include/asm-sparc64/Kbuild
index 9284c3c..4b59ce4 100644
--- a/include/asm-sparc64/Kbuild
+++ b/include/asm-sparc64/Kbuild
@@ -4,7 +4,26 @@
 ARCHDEF := defined __sparc__ && defined __arch64__
 ALTARCHDEF := defined __sparc__ && !defined __arch64__
 
-unifdef-y += fbio.h perfctr.h
-header-y += apb.h asi.h bbc.h bpp.h display7seg.h envctrl.h floppy.h	\
-	ipc.h kdebug.h mostek.h openprom.h openpromio.h parport.h	\
-	pconf.h psrcompat.h pstate.h reg.h uctx.h utrap.h watchdog.h
+header-y += apb.h
+header-y += asi.h
+header-y += bbc.h
+header-y += bpp.h
+header-y += display7seg.h
+header-y += envctrl.h
+header-y += floppy.h
+header-y += ipc.h
+header-y += kdebug.h
+header-y += mostek.h
+header-y += openprom.h
+header-y += openpromio.h
+header-y += parport.h
+header-y += pconf.h
+header-y += psrcompat.h
+header-y += pstate.h
+header-y += reg.h
+header-y += uctx.h
+header-y += utrap.h
+header-y += watchdog.h
+
+unifdef-y += fbio.h
+unifdef-y += perfctr.h
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index dc4d101..40f2f13 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -4,8 +4,18 @@
 ARCHDEF := defined __x86_64__
 ALTARCHDEF := defined __i386__
 
-header-y += boot.h bootsetup.h cpufeature.h debugreg.h ldt.h \
-	 msr.h prctl.h setup.h sigcontext32.h ucontext.h \
-	 vsyscall32.h
+header-y += boot.h
+header-y += bootsetup.h
+header-y += cpufeature.h
+header-y += debugreg.h
+header-y += ldt.h
+header-y += msr.h
+header-y += prctl.h
+header-y += setup.h
+header-y += sigcontext32.h
+header-y += ucontext.h
+header-y += vsyscall32.h
 
-unifdef-y += mce.h mtrr.h vsyscall.h
+unifdef-y += mce.h
+unifdef-y += mtrr.h
+unifdef-y += vsyscall.h
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
new file mode 100644
index 0000000..5748aec
--- /dev/null
+++ b/include/crypto/algapi.h
@@ -0,0 +1,156 @@
+/*
+ * Cryptographic API for algorithms (i.e., low-level API).
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ALGAPI_H
+#define _CRYPTO_ALGAPI_H
+
+#include <linux/crypto.h>
+
+struct module;
+struct seq_file;
+
+struct crypto_type {
+	unsigned int (*ctxsize)(struct crypto_alg *alg);
+	int (*init)(struct crypto_tfm *tfm);
+	void (*exit)(struct crypto_tfm *tfm);
+	void (*show)(struct seq_file *m, struct crypto_alg *alg);
+};
+
+struct crypto_instance {
+	struct crypto_alg alg;
+
+	struct crypto_template *tmpl;
+	struct hlist_node list;
+
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+struct crypto_template {
+	struct list_head list;
+	struct hlist_head instances;
+	struct module *module;
+
+	struct crypto_instance *(*alloc)(void *param, unsigned int len);
+	void (*free)(struct crypto_instance *inst);
+
+	char name[CRYPTO_MAX_ALG_NAME];
+};
+
+struct crypto_spawn {
+	struct list_head list;
+	struct crypto_alg *alg;
+	struct crypto_instance *inst;
+};
+
+struct scatter_walk {
+	struct scatterlist *sg;
+	unsigned int offset;
+};
+
+struct blkcipher_walk {
+	union {
+		struct {
+			struct page *page;
+			unsigned long offset;
+		} phys;
+
+		struct {
+			u8 *page;
+			u8 *addr;
+		} virt;
+	} src, dst;
+
+	struct scatter_walk in;
+	unsigned int nbytes;
+
+	struct scatter_walk out;
+	unsigned int total;
+
+	void *page;
+	u8 *buffer;
+	u8 *iv;
+
+	int flags;
+};
+
+extern const struct crypto_type crypto_blkcipher_type;
+extern const struct crypto_type crypto_hash_type;
+
+void crypto_mod_put(struct crypto_alg *alg);
+
+int crypto_register_template(struct crypto_template *tmpl);
+void crypto_unregister_template(struct crypto_template *tmpl);
+struct crypto_template *crypto_lookup_template(const char *name);
+
+int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
+		      struct crypto_instance *inst);
+void crypto_drop_spawn(struct crypto_spawn *spawn);
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn);
+
+struct crypto_alg *crypto_get_attr_alg(void *param, unsigned int len,
+				       u32 type, u32 mask);
+struct crypto_instance *crypto_alloc_instance(const char *name,
+					      struct crypto_alg *alg);
+
+int blkcipher_walk_done(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk, int err);
+int blkcipher_walk_virt(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk);
+int blkcipher_walk_phys(struct blkcipher_desc *desc,
+			struct blkcipher_walk *walk);
+
+static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
+{
+	unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
+	unsigned long align = crypto_tfm_alg_alignmask(tfm);
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return (void *)ALIGN(addr, align);
+}
+
+static inline void *crypto_instance_ctx(struct crypto_instance *inst)
+{
+	return inst->__ctx;
+}
+
+static inline void *crypto_blkcipher_ctx(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_ctx(&tfm->base);
+}
+
+static inline void *crypto_blkcipher_ctx_aligned(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_ctx_aligned(&tfm->base);
+}
+
+static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
+{
+	return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
+}
+
+static inline void *crypto_hash_ctx_aligned(struct crypto_hash *tfm)
+{
+	return crypto_tfm_ctx_aligned(&tfm->base);
+}
+
+static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
+				       struct scatterlist *dst,
+				       struct scatterlist *src,
+				       unsigned int nbytes)
+{
+	walk->in.sg = src;
+	walk->out.sg = dst;
+	walk->total = nbytes;
+}
+
+#endif	/* _CRYPTO_ALGAPI_H */
+
diff --git a/include/crypto/twofish.h b/include/crypto/twofish.h
new file mode 100644
index 0000000..c408522
--- /dev/null
+++ b/include/crypto/twofish.h
@@ -0,0 +1,22 @@
+#ifndef _CRYPTO_TWOFISH_H
+#define _CRYPTO_TWOFISH_H
+
+#include <linux/types.h>
+
+#define TF_MIN_KEY_SIZE 16
+#define TF_MAX_KEY_SIZE 32
+#define TF_BLOCK_SIZE 16
+
+struct crypto_tfm;
+
+/* Structure for an expanded Twofish key.  s contains the key-dependent
+ * S-boxes composed with the MDS matrix; w contains the eight "whitening"
+ * subkeys, K[0] through K[7].	k holds the remaining, "round" subkeys.  Note
+ * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
+struct twofish_ctx {
+	u32 s[4][256], w[8], k[32];
+};
+
+int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len);
+
+#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 2b8a7d6..7d076d9 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -1,63 +1,343 @@
-header-y := byteorder/ dvb/ hdlc/ isdn/ nfsd/ raid/ sunrpc/ tc_act/	\
-	netfilter/ netfilter_arp/ netfilter_bridge/ netfilter_ipv4/	\
-	netfilter_ipv6/
+header-y += byteorder/
+header-y += dvb/
+header-y += hdlc/
+header-y += isdn/
+header-y += nfsd/
+header-y += raid/
+header-y += sunrpc/
+header-y += tc_act/
+header-y += netfilter/
+header-y += netfilter_arp/
+header-y += netfilter_bridge/
+header-y += netfilter_ipv4/
+header-y += netfilter_ipv6/
 
-header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h	\
-	atmapi.h atmbr2684.h atmclip.h atm_eni.h atm_he.h		\
-	atm_idt77105.h atmioc.h atmlec.h atmmpc.h atm_nicstar.h		\
-	atmppp.h atmsap.h atmsvc.h atm_zatm.h auto_fs4.h auxvec.h	\
-	awe_voice.h ax25.h b1lli.h baycom.h bfs_fs.h blkpg.h		\
-	bpqether.h cdk.h chio.h coda_psdev.h coff.h comstats.h		\
-	consolemap.h cycx_cfm.h dm-ioctl.h dn.h dqblk_v1.h		\
-	dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h	\
-	fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h	\
-	fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h	\
-	hpfs_fs.h hysdn_if.h i2c-dev.h i8k.h icmp.h			\
-	if_arcnet.h if_arp.h if_bonding.h if_cablemodem.h if_fc.h	\
-	if_fddi.h if.h if_hippi.h if_infiniband.h if_packet.h		\
-	if_plip.h if_ppp.h if_slip.h if_strip.h if_tunnel.h in6.h	\
-	in_route.h ioctl.h ip.h ipmi_msgdefs.h ip_mp_alg.h ipsec.h	\
-	ipx.h irda.h isdn_divertif.h iso_fs.h ite_gpio.h ixjuser.h	\
-	jffs2.h keyctl.h limits.h major.h matroxfb.h meye.h minix_fs.h	\
-	mmtimer.h mqueue.h mtio.h ncp_no.h netfilter_arp.h netrom.h	\
-	nfs2.h nfs4_mount.h nfs_mount.h openprom_fs.h param.h		\
-	pci_ids.h pci_regs.h personality.h pfkeyv2.h pg.h pkt_cls.h	\
-	pkt_sched.h posix_types.h ppdev.h prctl.h ps2esdi.h qic117.h	\
-	qnxtypes.h quotaio_v1.h quotaio_v2.h radeonfb.h raw.h		\
-	resource.h rose.h sctp.h smbno.h snmp.h sockios.h som.h		\
-	sound.h stddef.h synclink.h telephony.h termios.h ticable.h	\
-	times.h tiocl.h tipc.h toshiba.h ultrasound.h un.h utime.h	\
-	utsname.h video_decoder.h video_encoder.h videotext.h vt.h	\
-	wavefront.h wireless.h xattr.h x25.h zorro_ids.h
+header-y += affs_fs.h
+header-y += affs_hardblocks.h
+header-y += aio_abi.h
+header-y += a.out.h
+header-y += arcfb.h
+header-y += atmapi.h
+header-y += atmbr2684.h
+header-y += atmclip.h
+header-y += atm_eni.h
+header-y += atm_he.h
+header-y += atm_idt77105.h
+header-y += atmioc.h
+header-y += atmlec.h
+header-y += atmmpc.h
+header-y += atm_nicstar.h
+header-y += atmppp.h
+header-y += atmsap.h
+header-y += atmsvc.h
+header-y += atm_zatm.h
+header-y += auto_fs4.h
+header-y += auxvec.h
+header-y += awe_voice.h
+header-y += ax25.h
+header-y += b1lli.h
+header-y += baycom.h
+header-y += bfs_fs.h
+header-y += blkpg.h
+header-y += bpqether.h
+header-y += cdk.h
+header-y += chio.h
+header-y += coda_psdev.h
+header-y += coff.h
+header-y += comstats.h
+header-y += consolemap.h
+header-y += cycx_cfm.h
+header-y += dm-ioctl.h
+header-y += dn.h
+header-y += dqblk_v1.h
+header-y += dqblk_v2.h
+header-y += dqblk_xfs.h
+header-y += efs_fs_sb.h
+header-y += elf-fdpic.h
+header-y += elf.h
+header-y += elf-em.h
+header-y += fadvise.h
+header-y += fd.h
+header-y += fdreg.h
+header-y += ftape-header-segment.h
+header-y += ftape-vendors.h
+header-y += fuse.h
+header-y += futex.h
+header-y += genetlink.h
+header-y += gen_stats.h
+header-y += gigaset_dev.h
+header-y += hdsmart.h
+header-y += hpfs_fs.h
+header-y += hysdn_if.h
+header-y += i2c-dev.h
+header-y += i8k.h
+header-y += icmp.h
+header-y += if_arcnet.h
+header-y += if_arp.h
+header-y += if_bonding.h
+header-y += if_cablemodem.h
+header-y += if_fc.h
+header-y += if_fddi.h
+header-y += if.h
+header-y += if_hippi.h
+header-y += if_infiniband.h
+header-y += if_packet.h
+header-y += if_plip.h
+header-y += if_ppp.h
+header-y += if_slip.h
+header-y += if_strip.h
+header-y += if_tunnel.h
+header-y += in6.h
+header-y += in_route.h
+header-y += ioctl.h
+header-y += ip.h
+header-y += ipmi_msgdefs.h
+header-y += ip_mp_alg.h
+header-y += ipsec.h
+header-y += ipx.h
+header-y += irda.h
+header-y += isdn_divertif.h
+header-y += iso_fs.h
+header-y += ite_gpio.h
+header-y += ixjuser.h
+header-y += jffs2.h
+header-y += keyctl.h
+header-y += limits.h
+header-y += major.h
+header-y += matroxfb.h
+header-y += meye.h
+header-y += minix_fs.h
+header-y += mmtimer.h
+header-y += mqueue.h
+header-y += mtio.h
+header-y += ncp_no.h
+header-y += netfilter_arp.h
+header-y += netrom.h
+header-y += nfs2.h
+header-y += nfs4_mount.h
+header-y += nfs_mount.h
+header-y += openprom_fs.h
+header-y += param.h
+header-y += pci_ids.h
+header-y += pci_regs.h
+header-y += personality.h
+header-y += pfkeyv2.h
+header-y += pg.h
+header-y += pkt_cls.h
+header-y += pkt_sched.h
+header-y += posix_types.h
+header-y += ppdev.h
+header-y += prctl.h
+header-y += ps2esdi.h
+header-y += qic117.h
+header-y += qnxtypes.h
+header-y += quotaio_v1.h
+header-y += quotaio_v2.h
+header-y += radeonfb.h
+header-y += raw.h
+header-y += resource.h
+header-y += rose.h
+header-y += sctp.h
+header-y += smbno.h
+header-y += snmp.h
+header-y += sockios.h
+header-y += som.h
+header-y += sound.h
+header-y += stddef.h
+header-y += synclink.h
+header-y += telephony.h
+header-y += termios.h
+header-y += ticable.h
+header-y += times.h
+header-y += tiocl.h
+header-y += tipc.h
+header-y += toshiba.h
+header-y += ultrasound.h
+header-y += un.h
+header-y += utime.h
+header-y += utsname.h
+header-y += video_decoder.h
+header-y += video_encoder.h
+header-y += videotext.h
+header-y += vt.h
+header-y += wavefront.h
+header-y += wireless.h
+header-y += xattr.h
+header-y += x25.h
+header-y += zorro_ids.h
 
-unifdef-y += acct.h adb.h adfs_fs.h agpgart.h apm_bios.h atalk.h	\
-	atmarp.h atmdev.h atm.h atm_tcp.h audit.h auto_fs.h binfmts.h	\
-	capability.h capi.h cciss_ioctl.h cdrom.h cm4000_cs.h		\
-	cn_proc.h coda.h connector.h cramfs_fs.h cuda.h cyclades.h	\
-	dccp.h dirent.h divert.h elfcore.h errno.h errqueue.h		\
-	ethtool.h eventpoll.h ext2_fs.h ext3_fs.h fb.h fcntl.h		\
-	filter.h flat.h fs.h ftape.h gameport.h generic_serial.h	\
-	genhd.h hayesesp.h hdlcdrv.h hdlc.h hdreg.h hiddev.h hpet.h	\
-	i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h			\
-	if_eql.h if_ether.h if_frad.h if_ltalk.h if_pppox.h		\
-	if_shaper.h if_tr.h if_tun.h if_vlan.h if_wanpipe.h igmp.h	\
-	inet_diag.h in.h inotify.h input.h ipc.h ipmi.h ipv6.h		\
-	ipv6_route.h isdn.h isdnif.h isdn_ppp.h isicom.h jbd.h		\
-	joystick.h kdev_t.h kd.h kernelcapi.h kernel.h keyboard.h	\
-	llc.h loop.h lp.h mempolicy.h mii.h mman.h mroute.h msdos_fs.h	\
-	msg.h nbd.h ncp_fs.h ncp.h ncp_mount.h netdevice.h		\
-	netfilter_bridge.h netfilter_decnet.h netfilter.h		\
-	netfilter_ipv4.h netfilter_ipv6.h netfilter_logging.h net.h	\
-	netlink.h nfs3.h nfs4.h nfsacl.h nfs_fs.h nfs.h nfs_idmap.h	\
-	n_r3964.h nubus.h nvram.h parport.h patchkey.h pci.h pktcdvd.h	\
-	pmu.h poll.h ppp_defs.h ppp-comp.h ptrace.h qnx4_fs.h quota.h	\
-	random.h reboot.h reiserfs_fs.h reiserfs_xattr.h romfs_fs.h	\
-	route.h rtc.h rtnetlink.h scc.h sched.h sdla.h			\
-	selinux_netlink.h sem.h serial_core.h serial.h serio.h shm.h	\
-	signal.h smb_fs.h smb.h smb_mount.h socket.h sonet.h sonypi.h	\
-	soundcard.h stat.h sysctl.h tcp.h time.h timex.h tty.h types.h	\
-	udf_fs_i.h udp.h uinput.h uio.h unistd.h usb_ch9.h		\
-	usbdevice_fs.h user.h videodev2.h videodev.h wait.h		\
-	wanrouter.h watchdog.h xfrm.h zftape.h
+unifdef-y += acct.h
+unifdef-y += adb.h
+unifdef-y += adfs_fs.h
+unifdef-y += agpgart.h
+unifdef-y += apm_bios.h
+unifdef-y += atalk.h
+unifdef-y += atmarp.h
+unifdef-y += atmdev.h
+unifdef-y += atm.h
+unifdef-y += atm_tcp.h
+unifdef-y += audit.h
+unifdef-y += auto_fs.h
+unifdef-y += binfmts.h
+unifdef-y += capability.h
+unifdef-y += capi.h
+unifdef-y += cciss_ioctl.h
+unifdef-y += cdrom.h
+unifdef-y += cm4000_cs.h
+unifdef-y += cn_proc.h
+unifdef-y += coda.h
+unifdef-y += connector.h
+unifdef-y += cramfs_fs.h
+unifdef-y += cuda.h
+unifdef-y += cyclades.h
+unifdef-y += dccp.h
+unifdef-y += dirent.h
+unifdef-y += divert.h
+unifdef-y += elfcore.h
+unifdef-y += errno.h
+unifdef-y += errqueue.h
+unifdef-y += ethtool.h
+unifdef-y += eventpoll.h
+unifdef-y += ext2_fs.h
+unifdef-y += ext3_fs.h
+unifdef-y += fb.h
+unifdef-y += fcntl.h
+unifdef-y += filter.h
+unifdef-y += flat.h
+unifdef-y += fs.h
+unifdef-y += ftape.h
+unifdef-y += gameport.h
+unifdef-y += generic_serial.h
+unifdef-y += genhd.h
+unifdef-y += hayesesp.h
+unifdef-y += hdlcdrv.h
+unifdef-y += hdlc.h
+unifdef-y += hdreg.h
+unifdef-y += hiddev.h
+unifdef-y += hpet.h
+unifdef-y += i2c.h
+unifdef-y += i2o-dev.h
+unifdef-y += icmpv6.h
+unifdef-y += if_bridge.h
+unifdef-y += if_ec.h
+unifdef-y += if_eql.h
+unifdef-y += if_ether.h
+unifdef-y += if_frad.h
+unifdef-y += if_ltalk.h
+unifdef-y += if_pppox.h
+unifdef-y += if_shaper.h
+unifdef-y += if_tr.h
+unifdef-y += if_tun.h
+unifdef-y += if_vlan.h
+unifdef-y += if_wanpipe.h
+unifdef-y += igmp.h
+unifdef-y += inet_diag.h
+unifdef-y += in.h
+unifdef-y += inotify.h
+unifdef-y += input.h
+unifdef-y += ipc.h
+unifdef-y += ipmi.h
+unifdef-y += ipv6.h
+unifdef-y += ipv6_route.h
+unifdef-y += isdn.h
+unifdef-y += isdnif.h
+unifdef-y += isdn_ppp.h
+unifdef-y += isicom.h
+unifdef-y += jbd.h
+unifdef-y += joystick.h
+unifdef-y += kdev_t.h
+unifdef-y += kd.h
+unifdef-y += kernelcapi.h
+unifdef-y += kernel.h
+unifdef-y += keyboard.h
+unifdef-y += llc.h
+unifdef-y += loop.h
+unifdef-y += lp.h
+unifdef-y += mempolicy.h
+unifdef-y += mii.h
+unifdef-y += mman.h
+unifdef-y += mroute.h
+unifdef-y += msdos_fs.h
+unifdef-y += msg.h
+unifdef-y += nbd.h
+unifdef-y += ncp_fs.h
+unifdef-y += ncp.h
+unifdef-y += ncp_mount.h
+unifdef-y += netdevice.h
+unifdef-y += netfilter_bridge.h
+unifdef-y += netfilter_decnet.h
+unifdef-y += netfilter.h
+unifdef-y += netfilter_ipv4.h
+unifdef-y += netfilter_ipv6.h
+unifdef-y += netfilter_logging.h
+unifdef-y += net.h
+unifdef-y += netlink.h
+unifdef-y += nfs3.h
+unifdef-y += nfs4.h
+unifdef-y += nfsacl.h
+unifdef-y += nfs_fs.h
+unifdef-y += nfs.h
+unifdef-y += nfs_idmap.h
+unifdef-y += n_r3964.h
+unifdef-y += nubus.h
+unifdef-y += nvram.h
+unifdef-y += parport.h
+unifdef-y += patchkey.h
+unifdef-y += pci.h
+unifdef-y += pktcdvd.h
+unifdef-y += pmu.h
+unifdef-y += poll.h
+unifdef-y += ppp_defs.h
+unifdef-y += ppp-comp.h
+unifdef-y += ptrace.h
+unifdef-y += qnx4_fs.h
+unifdef-y += quota.h
+unifdef-y += random.h
+unifdef-y += reboot.h
+unifdef-y += reiserfs_fs.h
+unifdef-y += reiserfs_xattr.h
+unifdef-y += romfs_fs.h
+unifdef-y += route.h
+unifdef-y += rtc.h
+unifdef-y += rtnetlink.h
+unifdef-y += scc.h
+unifdef-y += sched.h
+unifdef-y += sdla.h
+unifdef-y += selinux_netlink.h
+unifdef-y += sem.h
+unifdef-y += serial_core.h
+unifdef-y += serial.h
+unifdef-y += serio.h
+unifdef-y += shm.h
+unifdef-y += signal.h
+unifdef-y += smb_fs.h
+unifdef-y += smb.h
+unifdef-y += smb_mount.h
+unifdef-y += socket.h
+unifdef-y += sonet.h
+unifdef-y += sonypi.h
+unifdef-y += soundcard.h
+unifdef-y += stat.h
+unifdef-y += sysctl.h
+unifdef-y += tcp.h
+unifdef-y += time.h
+unifdef-y += timex.h
+unifdef-y += tty.h
+unifdef-y += types.h
+unifdef-y += udf_fs_i.h
+unifdef-y += udp.h
+unifdef-y += uinput.h
+unifdef-y += uio.h
+unifdef-y += unistd.h
+unifdef-y += usb_ch9.h
+unifdef-y += usbdevice_fs.h
+unifdef-y += user.h
+unifdef-y += videodev2.h
+unifdef-y += videodev.h
+unifdef-y += wait.h
+unifdef-y += wanrouter.h
+unifdef-y += watchdog.h
+unifdef-y += xfrm.h
+unifdef-y += zftape.h
 
-objhdr-y := version.h
+objhdr-y += version.h
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aafe827..c773ee5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -746,6 +746,9 @@
 extern int blk_queue_resize_tags(request_queue_t *, int);
 extern void blk_queue_invalidate_tags(request_queue_t *);
 extern long blk_congestion_wait(int rw, long timeout);
+extern struct blk_queue_tag *blk_init_tags(int);
+extern void blk_free_tags(struct blk_queue_tag *);
+extern void blk_congestion_end(int rw);
 
 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1021f50..e319c64 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -114,7 +114,7 @@
 #else
 #define HASHDIST_DEFAULT 0
 #endif
-extern int __initdata hashdist;		/* Distribute hashes across NUMA nodes? */
+extern int hashdist;		/* Distribute hashes across NUMA nodes? */
 
 
 #endif /* _LINUX_BOOTMEM_H */
diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index 84a57d4..56499ab 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,2 +1,7 @@
-unifdef-y += generic.h swabb.h swab.h
-header-y += big_endian.h little_endian.h pdp_endian.h
+header-y += big_endian.h
+header-y += little_endian.h
+header-y += pdp_endian.h
+
+unifdef-y += generic.h
+unifdef-y += swabb.h
+unifdef-y += swab.h
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7f94624..8f2ffa4 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -17,20 +17,36 @@
 #ifndef _LINUX_CRYPTO_H
 #define _LINUX_CRYPTO_H
 
+#include <asm/atomic.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/types.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/string.h>
-#include <asm/page.h>
+#include <linux/uaccess.h>
 
 /*
  * Algorithm masks and types.
  */
-#define CRYPTO_ALG_TYPE_MASK		0x000000ff
+#define CRYPTO_ALG_TYPE_MASK		0x0000000f
 #define CRYPTO_ALG_TYPE_CIPHER		0x00000001
 #define CRYPTO_ALG_TYPE_DIGEST		0x00000002
-#define CRYPTO_ALG_TYPE_COMPRESS	0x00000004
+#define CRYPTO_ALG_TYPE_HASH		0x00000003
+#define CRYPTO_ALG_TYPE_BLKCIPHER	0x00000004
+#define CRYPTO_ALG_TYPE_COMPRESS	0x00000005
+
+#define CRYPTO_ALG_TYPE_HASH_MASK	0x0000000e
+
+#define CRYPTO_ALG_LARVAL		0x00000010
+#define CRYPTO_ALG_DEAD			0x00000020
+#define CRYPTO_ALG_DYING		0x00000040
+#define CRYPTO_ALG_ASYNC		0x00000080
+
+/*
+ * Set this bit if and only if the algorithm requires another algorithm of
+ * the same type to handle corner cases.
+ */
+#define CRYPTO_ALG_NEED_FALLBACK	0x00000100
 
 /*
  * Transform masks and values (for crt_flags).
@@ -61,8 +77,37 @@
 #define CRYPTO_DIR_ENCRYPT		1
 #define CRYPTO_DIR_DECRYPT		0
 
+/*
+ * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual
+ * declaration) is used to ensure that the crypto_tfm context structure is
+ * aligned correctly for the given architecture so that there are no alignment
+ * faults for C data types.  In particular, this is required on platforms such
+ * as arm where pointers are 32-bit aligned but there are data types such as
+ * u64 which require 64-bit alignment.
+ */
+#if defined(ARCH_KMALLOC_MINALIGN)
+#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
+#elif defined(ARCH_SLAB_MINALIGN)
+#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN
+#endif
+
+#ifdef CRYPTO_MINALIGN
+#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
+#else
+#define CRYPTO_MINALIGN_ATTR
+#endif
+
 struct scatterlist;
+struct crypto_blkcipher;
+struct crypto_hash;
 struct crypto_tfm;
+struct crypto_type;
+
+struct blkcipher_desc {
+	struct crypto_blkcipher *tfm;
+	void *info;
+	u32 flags;
+};
 
 struct cipher_desc {
 	struct crypto_tfm *tfm;
@@ -72,30 +117,50 @@
 	void *info;
 };
 
+struct hash_desc {
+	struct crypto_hash *tfm;
+	u32 flags;
+};
+
 /*
  * Algorithms: modular crypto algorithm implementations, managed
  * via crypto_register_alg() and crypto_unregister_alg().
  */
+struct blkcipher_alg {
+	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
+	              unsigned int keylen);
+	int (*encrypt)(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes);
+	int (*decrypt)(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes);
+
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+	unsigned int ivsize;
+};
+
 struct cipher_alg {
 	unsigned int cia_min_keysize;
 	unsigned int cia_max_keysize;
 	int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key,
-	                  unsigned int keylen, u32 *flags);
+	                  unsigned int keylen);
 	void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 
 	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
 					u8 *dst, const u8 *src,
-					unsigned int nbytes);
+					unsigned int nbytes) __deprecated;
 };
 
 struct digest_alg {
@@ -105,7 +170,20 @@
 			   unsigned int len);
 	void (*dia_final)(struct crypto_tfm *tfm, u8 *out);
 	int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key,
-	                  unsigned int keylen, u32 *flags);
+	                  unsigned int keylen);
+};
+
+struct hash_alg {
+	int (*init)(struct hash_desc *desc);
+	int (*update)(struct hash_desc *desc, struct scatterlist *sg,
+		      unsigned int nbytes);
+	int (*final)(struct hash_desc *desc, u8 *out);
+	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
+		      unsigned int nbytes, u8 *out);
+	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
+		      unsigned int keylen);
+
+	unsigned int digestsize;
 };
 
 struct compress_alg {
@@ -115,30 +193,40 @@
 			      unsigned int slen, u8 *dst, unsigned int *dlen);
 };
 
+#define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
 #define cra_digest	cra_u.digest
+#define cra_hash	cra_u.hash
 #define cra_compress	cra_u.compress
 
 struct crypto_alg {
 	struct list_head cra_list;
+	struct list_head cra_users;
+
 	u32 cra_flags;
 	unsigned int cra_blocksize;
 	unsigned int cra_ctxsize;
 	unsigned int cra_alignmask;
 
 	int cra_priority;
+	atomic_t cra_refcnt;
 
 	char cra_name[CRYPTO_MAX_ALG_NAME];
 	char cra_driver_name[CRYPTO_MAX_ALG_NAME];
 
+	const struct crypto_type *cra_type;
+
 	union {
+		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct digest_alg digest;
+		struct hash_alg hash;
 		struct compress_alg compress;
 	} cra_u;
 
 	int (*cra_init)(struct crypto_tfm *tfm);
 	void (*cra_exit)(struct crypto_tfm *tfm);
+	void (*cra_destroy)(struct crypto_alg *alg);
 	
 	struct module *cra_module;
 };
@@ -153,20 +241,39 @@
  * Algorithm query interface.
  */
 #ifdef CONFIG_CRYPTO
-int crypto_alg_available(const char *name, u32 flags);
+int crypto_alg_available(const char *name, u32 flags)
+	__deprecated_for_modules;
+int crypto_has_alg(const char *name, u32 type, u32 mask);
 #else
+static int crypto_alg_available(const char *name, u32 flags);
+	__deprecated_for_modules;
 static inline int crypto_alg_available(const char *name, u32 flags)
 {
 	return 0;
 }
+
+static inline int crypto_has_alg(const char *name, u32 type, u32 mask)
+{
+	return 0;
+}
 #endif
 
 /*
  * Transforms: user-instantiated objects which encapsulate algorithms
- * and core processing logic.  Managed via crypto_alloc_tfm() and
- * crypto_free_tfm(), as well as the various helpers below.
+ * and core processing logic.  Managed via crypto_alloc_*() and
+ * crypto_free_*(), as well as the various helpers below.
  */
 
+struct blkcipher_tfm {
+	void *iv;
+	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
+		      unsigned int keylen);
+	int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes);
+	int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes);
+};
+
 struct cipher_tfm {
 	void *cit_iv;
 	unsigned int cit_ivsize;
@@ -190,20 +297,20 @@
 			   struct scatterlist *src,
 			   unsigned int nbytes, u8 *iv);
 	void (*cit_xor_block)(u8 *dst, const u8 *src);
+	void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+	void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
 
-struct digest_tfm {
-	void (*dit_init)(struct crypto_tfm *tfm);
-	void (*dit_update)(struct crypto_tfm *tfm,
-	                   struct scatterlist *sg, unsigned int nsg);
-	void (*dit_final)(struct crypto_tfm *tfm, u8 *out);
-	void (*dit_digest)(struct crypto_tfm *tfm, struct scatterlist *sg,
-	                   unsigned int nsg, u8 *out);
-	int (*dit_setkey)(struct crypto_tfm *tfm,
-	                  const u8 *key, unsigned int keylen);
-#ifdef CONFIG_CRYPTO_HMAC
-	void *dit_hmac_block;
-#endif
+struct hash_tfm {
+	int (*init)(struct hash_desc *desc);
+	int (*update)(struct hash_desc *desc,
+		      struct scatterlist *sg, unsigned int nsg);
+	int (*final)(struct hash_desc *desc, u8 *out);
+	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
+		      unsigned int nsg, u8 *out);
+	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
+		      unsigned int keylen);
+	unsigned int digestsize;
 };
 
 struct compress_tfm {
@@ -215,8 +322,9 @@
 	                      u8 *dst, unsigned int *dlen);
 };
 
+#define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
-#define crt_digest	crt_u.digest
+#define crt_hash	crt_u.hash
 #define crt_compress	crt_u.compress
 
 struct crypto_tfm {
@@ -224,30 +332,43 @@
 	u32 crt_flags;
 	
 	union {
+		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
-		struct digest_tfm digest;
+		struct hash_tfm hash;
 		struct compress_tfm compress;
 	} crt_u;
 	
 	struct crypto_alg *__crt_alg;
 
-	char __crt_ctx[] __attribute__ ((__aligned__));
+	void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+#define crypto_cipher crypto_tfm
+#define crypto_comp crypto_tfm
+
+struct crypto_blkcipher {
+	struct crypto_tfm base;
+};
+
+struct crypto_hash {
+	struct crypto_tfm base;
+};
+
+enum {
+	CRYPTOA_UNSPEC,
+	CRYPTOA_ALG,
+};
+
+struct crypto_attr_alg {
+	char name[CRYPTO_MAX_ALG_NAME];
 };
 
 /* 
  * Transform user interface.
  */
  
-/*
- * crypto_alloc_tfm() will first attempt to locate an already loaded algorithm.
- * If that fails and the kernel supports dynamically loadable modules, it
- * will then attempt to load a module of the same name or alias.  A refcount
- * is grabbed on the algorithm which is then associated with the new transform.
- *
- * crypto_free_tfm() frees up the transform and any associated resources,
- * then drops the refcount on the associated algorithm.
- */
 struct crypto_tfm *crypto_alloc_tfm(const char *alg_name, u32 tfm_flags);
+struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask);
 void crypto_free_tfm(struct crypto_tfm *tfm);
 
 /*
@@ -258,6 +379,16 @@
 	return tfm->__crt_alg->cra_name;
 }
 
+static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_driver_name;
+}
+
+static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_priority;
+}
+
 static inline const char *crypto_tfm_alg_modname(struct crypto_tfm *tfm)
 {
 	return module_name(tfm->__crt_alg->cra_module);
@@ -268,18 +399,23 @@
 	return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
 }
 
+static unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm)
+	__deprecated;
 static inline unsigned int crypto_tfm_alg_min_keysize(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
 	return tfm->__crt_alg->cra_cipher.cia_min_keysize;
 }
 
+static unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm)
+	__deprecated;
 static inline unsigned int crypto_tfm_alg_max_keysize(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
 	return tfm->__crt_alg->cra_cipher.cia_max_keysize;
 }
 
+static unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm) __deprecated;
 static inline unsigned int crypto_tfm_alg_ivsize(struct crypto_tfm *tfm)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
@@ -302,6 +438,21 @@
 	return tfm->__crt_alg->cra_alignmask;
 }
 
+static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm)
+{
+	return tfm->crt_flags;
+}
+
+static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags)
+{
+	tfm->crt_flags |= flags;
+}
+
+static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags)
+{
+	tfm->crt_flags &= ~flags;
+}
+
 static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
 {
 	return tfm->__crt_ctx;
@@ -316,50 +467,374 @@
 /*
  * API wrappers.
  */
-static inline void crypto_digest_init(struct crypto_tfm *tfm)
+static inline struct crypto_blkcipher *__crypto_blkcipher_cast(
+	struct crypto_tfm *tfm)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_init(tfm);
+	return (struct crypto_blkcipher *)tfm;
 }
 
-static inline void crypto_digest_update(struct crypto_tfm *tfm,
-                                        struct scatterlist *sg,
-                                        unsigned int nsg)
+static inline struct crypto_blkcipher *crypto_blkcipher_cast(
+	struct crypto_tfm *tfm)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_update(tfm, sg, nsg);
+	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_BLKCIPHER);
+	return __crypto_blkcipher_cast(tfm);
 }
 
-static inline void crypto_digest_final(struct crypto_tfm *tfm, u8 *out)
+static inline struct crypto_blkcipher *crypto_alloc_blkcipher(
+	const char *alg_name, u32 type, u32 mask)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_final(tfm, out);
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask));
 }
 
-static inline void crypto_digest_digest(struct crypto_tfm *tfm,
-                                        struct scatterlist *sg,
-                                        unsigned int nsg, u8 *out)
+static inline struct crypto_tfm *crypto_blkcipher_tfm(
+	struct crypto_blkcipher *tfm)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	tfm->crt_digest.dit_digest(tfm, sg, nsg, out);
+	return &tfm->base;
 }
 
+static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm)
+{
+	crypto_free_tfm(crypto_blkcipher_tfm(tfm));
+}
+
+static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
+static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm));
+}
+
+static inline struct blkcipher_tfm *crypto_blkcipher_crt(
+	struct crypto_blkcipher *tfm)
+{
+	return &crypto_blkcipher_tfm(tfm)->crt_blkcipher;
+}
+
+static inline struct blkcipher_alg *crypto_blkcipher_alg(
+	struct crypto_blkcipher *tfm)
+{
+	return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher;
+}
+
+static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm)
+{
+	return crypto_blkcipher_alg(tfm)->ivsize;
+}
+
+static inline unsigned int crypto_blkcipher_blocksize(
+	struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm));
+}
+
+static inline unsigned int crypto_blkcipher_alignmask(
+	struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm));
+}
+
+static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm)
+{
+	return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm));
+}
+
+static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm,
+					      u32 flags)
+{
+	crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags);
+}
+
+static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm,
+						u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags);
+}
+
+static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm,
+					  const u8 *key, unsigned int keylen)
+{
+	return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm),
+						 key, keylen);
+}
+
+static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc,
+					   struct scatterlist *dst,
+					   struct scatterlist *src,
+					   unsigned int nbytes)
+{
+	desc->info = crypto_blkcipher_crt(desc->tfm)->iv;
+	return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
+}
+
+static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc,
+					      struct scatterlist *dst,
+					      struct scatterlist *src,
+					      unsigned int nbytes)
+{
+	return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
+}
+
+static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc,
+					   struct scatterlist *dst,
+					   struct scatterlist *src,
+					   unsigned int nbytes)
+{
+	desc->info = crypto_blkcipher_crt(desc->tfm)->iv;
+	return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
+}
+
+static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc,
+					      struct scatterlist *dst,
+					      struct scatterlist *src,
+					      unsigned int nbytes)
+{
+	return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
+}
+
+static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm,
+					   const u8 *src, unsigned int len)
+{
+	memcpy(crypto_blkcipher_crt(tfm)->iv, src, len);
+}
+
+static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm,
+					   u8 *dst, unsigned int len)
+{
+	memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len);
+}
+
+static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm)
+{
+	return (struct crypto_cipher *)tfm;
+}
+
+static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm)
+{
+	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
+	return __crypto_cipher_cast(tfm);
+}
+
+static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name,
+							u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_CIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask));
+}
+
+static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm)
+{
+	return tfm;
+}
+
+static inline void crypto_free_cipher(struct crypto_cipher *tfm)
+{
+	crypto_free_tfm(crypto_cipher_tfm(tfm));
+}
+
+static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_CIPHER;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
+static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm)
+{
+	return &crypto_cipher_tfm(tfm)->crt_cipher;
+}
+
+static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm));
+}
+
+static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm));
+}
+
+static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm)
+{
+	return crypto_tfm_get_flags(crypto_cipher_tfm(tfm));
+}
+
+static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm,
+					   u32 flags)
+{
+	crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags);
+}
+
+static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm,
+					     u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags);
+}
+
+static inline int crypto_cipher_setkey(struct crypto_cipher *tfm,
+                                       const u8 *key, unsigned int keylen)
+{
+	return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm),
+						  key, keylen);
+}
+
+static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
+					     u8 *dst, const u8 *src)
+{
+	crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm),
+						dst, src);
+}
+
+static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
+					     u8 *dst, const u8 *src)
+{
+	crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm),
+						dst, src);
+}
+
+void crypto_digest_init(struct crypto_tfm *tfm) __deprecated_for_modules;
+void crypto_digest_update(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg)
+	__deprecated_for_modules;
+void crypto_digest_final(struct crypto_tfm *tfm, u8 *out)
+	__deprecated_for_modules;
+void crypto_digest_digest(struct crypto_tfm *tfm,
+			  struct scatterlist *sg, unsigned int nsg, u8 *out)
+	__deprecated_for_modules;
+
+static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm)
+{
+	return (struct crypto_hash *)tfm;
+}
+
+static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm)
+{
+	BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_HASH) &
+	       CRYPTO_ALG_TYPE_HASH_MASK);
+	return __crypto_hash_cast(tfm);
+}
+
+static int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key,
+				unsigned int keylen) __deprecated;
 static inline int crypto_digest_setkey(struct crypto_tfm *tfm,
                                        const u8 *key, unsigned int keylen)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_DIGEST);
-	if (tfm->crt_digest.dit_setkey == NULL)
-		return -ENOSYS;
-	return tfm->crt_digest.dit_setkey(tfm, key, keylen);
+	return tfm->crt_hash.setkey(crypto_hash_cast(tfm), key, keylen);
 }
 
-static inline int crypto_cipher_setkey(struct crypto_tfm *tfm,
-                                       const u8 *key, unsigned int keylen)
+static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name,
+						    u32 type, u32 mask)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	return tfm->crt_cipher.cit_setkey(tfm, key, keylen);
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_HASH;
+	mask |= CRYPTO_ALG_TYPE_HASH_MASK;
+
+	return __crypto_hash_cast(crypto_alloc_base(alg_name, type, mask));
 }
 
+static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm)
+{
+	return &tfm->base;
+}
+
+static inline void crypto_free_hash(struct crypto_hash *tfm)
+{
+	crypto_free_tfm(crypto_hash_tfm(tfm));
+}
+
+static inline int crypto_has_hash(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_HASH;
+	mask |= CRYPTO_ALG_TYPE_HASH_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
+static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm)
+{
+	return &crypto_hash_tfm(tfm)->crt_hash;
+}
+
+static inline unsigned int crypto_hash_blocksize(struct crypto_hash *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_hash_tfm(tfm));
+}
+
+static inline unsigned int crypto_hash_alignmask(struct crypto_hash *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_hash_tfm(tfm));
+}
+
+static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm)
+{
+	return crypto_hash_crt(tfm)->digestsize;
+}
+
+static inline u32 crypto_hash_get_flags(struct crypto_hash *tfm)
+{
+	return crypto_tfm_get_flags(crypto_hash_tfm(tfm));
+}
+
+static inline void crypto_hash_set_flags(struct crypto_hash *tfm, u32 flags)
+{
+	crypto_tfm_set_flags(crypto_hash_tfm(tfm), flags);
+}
+
+static inline void crypto_hash_clear_flags(struct crypto_hash *tfm, u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_hash_tfm(tfm), flags);
+}
+
+static inline int crypto_hash_init(struct hash_desc *desc)
+{
+	return crypto_hash_crt(desc->tfm)->init(desc);
+}
+
+static inline int crypto_hash_update(struct hash_desc *desc,
+				     struct scatterlist *sg,
+				     unsigned int nbytes)
+{
+	return crypto_hash_crt(desc->tfm)->update(desc, sg, nbytes);
+}
+
+static inline int crypto_hash_final(struct hash_desc *desc, u8 *out)
+{
+	return crypto_hash_crt(desc->tfm)->final(desc, out);
+}
+
+static inline int crypto_hash_digest(struct hash_desc *desc,
+				     struct scatterlist *sg,
+				     unsigned int nbytes, u8 *out)
+{
+	return crypto_hash_crt(desc->tfm)->digest(desc, sg, nbytes, out);
+}
+
+static inline int crypto_hash_setkey(struct crypto_hash *hash,
+				     const u8 *key, unsigned int keylen)
+{
+	return crypto_hash_crt(hash)->setkey(hash, key, keylen);
+}
+
+static int crypto_cipher_encrypt(struct crypto_tfm *tfm,
+				 struct scatterlist *dst,
+				 struct scatterlist *src,
+				 unsigned int nbytes) __deprecated;
 static inline int crypto_cipher_encrypt(struct crypto_tfm *tfm,
                                         struct scatterlist *dst,
                                         struct scatterlist *src,
@@ -369,16 +844,23 @@
 	return tfm->crt_cipher.cit_encrypt(tfm, dst, src, nbytes);
 }                                        
 
+static int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
+				    struct scatterlist *dst,
+				    struct scatterlist *src,
+				    unsigned int nbytes, u8 *iv) __deprecated;
 static inline int crypto_cipher_encrypt_iv(struct crypto_tfm *tfm,
                                            struct scatterlist *dst,
                                            struct scatterlist *src,
                                            unsigned int nbytes, u8 *iv)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB);
 	return tfm->crt_cipher.cit_encrypt_iv(tfm, dst, src, nbytes, iv);
 }                                        
 
+static int crypto_cipher_decrypt(struct crypto_tfm *tfm,
+				 struct scatterlist *dst,
+				 struct scatterlist *src,
+				 unsigned int nbytes) __deprecated;
 static inline int crypto_cipher_decrypt(struct crypto_tfm *tfm,
                                         struct scatterlist *dst,
                                         struct scatterlist *src,
@@ -388,16 +870,21 @@
 	return tfm->crt_cipher.cit_decrypt(tfm, dst, src, nbytes);
 }
 
+static int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
+				    struct scatterlist *dst,
+				    struct scatterlist *src,
+				    unsigned int nbytes, u8 *iv) __deprecated;
 static inline int crypto_cipher_decrypt_iv(struct crypto_tfm *tfm,
                                            struct scatterlist *dst,
                                            struct scatterlist *src,
                                            unsigned int nbytes, u8 *iv)
 {
 	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
-	BUG_ON(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_ECB);
 	return tfm->crt_cipher.cit_decrypt_iv(tfm, dst, src, nbytes, iv);
 }
 
+static void crypto_cipher_set_iv(struct crypto_tfm *tfm,
+				 const u8 *src, unsigned int len) __deprecated;
 static inline void crypto_cipher_set_iv(struct crypto_tfm *tfm,
                                         const u8 *src, unsigned int len)
 {
@@ -405,6 +892,8 @@
 	memcpy(tfm->crt_cipher.cit_iv, src, len);
 }
 
+static void crypto_cipher_get_iv(struct crypto_tfm *tfm,
+				 u8 *dst, unsigned int len) __deprecated;
 static inline void crypto_cipher_get_iv(struct crypto_tfm *tfm,
                                         u8 *dst, unsigned int len)
 {
@@ -412,34 +901,70 @@
 	memcpy(dst, tfm->crt_cipher.cit_iv, len);
 }
 
-static inline int crypto_comp_compress(struct crypto_tfm *tfm,
+static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm)
+{
+	return (struct crypto_comp *)tfm;
+}
+
+static inline struct crypto_comp *crypto_comp_cast(struct crypto_tfm *tfm)
+{
+	BUG_ON((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_COMPRESS) &
+	       CRYPTO_ALG_TYPE_MASK);
+	return __crypto_comp_cast(tfm);
+}
+
+static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name,
+						    u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_COMPRESS;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask));
+}
+
+static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm)
+{
+	return tfm;
+}
+
+static inline void crypto_free_comp(struct crypto_comp *tfm)
+{
+	crypto_free_tfm(crypto_comp_tfm(tfm));
+}
+
+static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask)
+{
+	type &= ~CRYPTO_ALG_TYPE_MASK;
+	type |= CRYPTO_ALG_TYPE_COMPRESS;
+	mask |= CRYPTO_ALG_TYPE_MASK;
+
+	return crypto_has_alg(alg_name, type, mask);
+}
+
+static inline const char *crypto_comp_name(struct crypto_comp *tfm)
+{
+	return crypto_tfm_alg_name(crypto_comp_tfm(tfm));
+}
+
+static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm)
+{
+	return &crypto_comp_tfm(tfm)->crt_compress;
+}
+
+static inline int crypto_comp_compress(struct crypto_comp *tfm,
                                        const u8 *src, unsigned int slen,
                                        u8 *dst, unsigned int *dlen)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS);
-	return tfm->crt_compress.cot_compress(tfm, src, slen, dst, dlen);
+	return crypto_comp_crt(tfm)->cot_compress(tfm, src, slen, dst, dlen);
 }
 
-static inline int crypto_comp_decompress(struct crypto_tfm *tfm,
+static inline int crypto_comp_decompress(struct crypto_comp *tfm,
                                          const u8 *src, unsigned int slen,
                                          u8 *dst, unsigned int *dlen)
 {
-	BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_COMPRESS);
-	return tfm->crt_compress.cot_decompress(tfm, src, slen, dst, dlen);
+	return crypto_comp_crt(tfm)->cot_decompress(tfm, src, slen, dst, dlen);
 }
 
-/*
- * HMAC support.
- */
-#ifdef CONFIG_CRYPTO_HMAC
-void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen);
-void crypto_hmac_update(struct crypto_tfm *tfm,
-                        struct scatterlist *sg, unsigned int nsg);
-void crypto_hmac_final(struct crypto_tfm *tfm, u8 *key,
-                       unsigned int *keylen, u8 *out);
-void crypto_hmac(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen,
-                 struct scatterlist *sg, unsigned int nsg, u8 *out);
-#endif	/* CONFIG_CRYPTO_HMAC */
-
 #endif	/* _LINUX_CRYPTO_H */
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 471781f..44605be 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -221,6 +221,7 @@
  */
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
+extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
 extern void d_delete(struct dentry *);
 
 /* allocate/de-allocate */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 676333b..2d7671c 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -438,6 +438,7 @@
  * @dccps_role - Role of this sock, one of %dccp_role
  * @dccps_ndp_count - number of Non Data Packets since last data packet
  * @dccps_hc_rx_ackvec - rx half connection ack vector
+ * @dccps_xmit_timer - timer for when CCID is not ready to send
  */
 struct dccp_sock {
 	/* inet_connection_sock has to be the first member of dccp_sock */
@@ -470,6 +471,7 @@
 	enum dccp_role			dccps_role:2;
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
+	struct timer_list		dccps_xmit_timer;
 };
  
 static inline struct dccp_sock *dccp_sk(const struct sock *sk)
diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild
index 63973af..d97b3a5 100644
--- a/include/linux/dvb/Kbuild
+++ b/include/linux/dvb/Kbuild
@@ -1,2 +1,9 @@
-header-y += ca.h frontend.h net.h osd.h version.h
-unifdef-y := audio.h dmx.h video.h
+header-y += ca.h
+header-y += frontend.h
+header-y += net.h
+header-y += osd.h
+header-y += version.h
+
+unifdef-y += audio.h
+unifdef-y += dmx.h
+unifdef-y += video.h
diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
new file mode 100644
index 0000000..4418c8d
--- /dev/null
+++ b/include/linux/fib_rules.h
@@ -0,0 +1,65 @@
+#ifndef __LINUX_FIB_RULES_H
+#define __LINUX_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/rtnetlink.h>
+
+/* rule is permanent, and cannot be deleted */
+#define FIB_RULE_PERMANENT	1
+
+struct fib_rule_hdr
+{
+	__u8		family;
+	__u8		dst_len;
+	__u8		src_len;
+	__u8		tos;
+
+	__u8		table;
+	__u8		res1;	/* reserved */
+	__u8		res2;	/* reserved */
+	__u8		action;
+
+	__u32		flags;
+};
+
+enum
+{
+	FRA_UNSPEC,
+	FRA_DST,	/* destination address */
+	FRA_SRC,	/* source address */
+	FRA_IFNAME,	/* interface name */
+	FRA_UNUSED1,
+	FRA_UNUSED2,
+	FRA_PRIORITY,	/* priority/preference */
+	FRA_UNUSED3,
+	FRA_UNUSED4,
+	FRA_UNUSED5,
+	FRA_FWMARK,	/* netfilter mark */
+	FRA_FLOW,	/* flow/class id */
+	FRA_UNUSED6,
+	FRA_UNUSED7,
+	FRA_UNUSED8,
+	FRA_TABLE,	/* Extended table id */
+	FRA_FWMASK,	/* mask for netfilter mark */
+	__FRA_MAX
+};
+
+#define FRA_MAX (__FRA_MAX - 1)
+
+enum
+{
+	FR_ACT_UNSPEC,
+	FR_ACT_TO_TBL,		/* Pass to fixed table */
+	FR_ACT_RES1,
+	FR_ACT_RES2,
+	FR_ACT_RES3,
+	FR_ACT_RES4,
+	FR_ACT_BLACKHOLE,	/* Drop without notification */
+	FR_ACT_UNREACHABLE,	/* Drop with ENETUNREACH */
+	FR_ACT_PROHIBIT,	/* Drop with EACCES */
+	__FR_ACT_MAX,
+};
+
+#define FR_ACT_MAX (__FR_ACT_MAX - 1)
+
+#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c6cb8f0..91b2e3b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -25,10 +25,10 @@
  
 struct sock_filter	/* Filter block */
 {
-        __u16	code;   /* Actual filter code */
-        __u8	jt;	/* Jump true */
-        __u8	jf;	/* Jump false */
-        __u32	k;      /* Generic multiuse field */
+	__u16	code;   /* Actual filter code */
+	__u8	jt;	/* Jump true */
+	__u8	jf;	/* Jump false */
+	__u32	k;      /* Generic multiuse field */
 };
 
 struct sock_fprog	/* Required for SO_ATTACH_FILTER. */
@@ -41,8 +41,9 @@
 struct sk_filter
 {
 	atomic_t		refcnt;
-        unsigned int         	len;	/* Number of filter blocks */
-        struct sock_filter     	insns[0];
+	unsigned int         	len;	/* Number of filter blocks */
+	struct rcu_head		rcu;
+	struct sock_filter     	insns[0];
 };
 
 static inline unsigned int sk_filter_len(struct sk_filter *fp)
diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 84f12a4..9049dc6 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -16,6 +16,8 @@
 
 #define GENL_HDRLEN	NLMSG_ALIGN(sizeof(struct genlmsghdr))
 
+#define GENL_ADMIN_PERM		0x01
+
 /*
  * List of reserved static generic netlink identifiers:
  */
@@ -43,9 +45,25 @@
 	CTRL_ATTR_UNSPEC,
 	CTRL_ATTR_FAMILY_ID,
 	CTRL_ATTR_FAMILY_NAME,
+	CTRL_ATTR_VERSION,
+	CTRL_ATTR_HDRSIZE,
+	CTRL_ATTR_MAXATTR,
+	CTRL_ATTR_OPS,
 	__CTRL_ATTR_MAX,
 };
 
 #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
 
+enum {
+	CTRL_ATTR_OP_UNSPEC,
+	CTRL_ATTR_OP_ID,
+	CTRL_ATTR_OP_FLAGS,
+	CTRL_ATTR_OP_POLICY,
+	CTRL_ATTR_OP_DOIT,
+	CTRL_ATTR_OP_DUMPIT,
+	__CTRL_ATTR_OP_MAX,
+};
+
+#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
+
 #endif	/* __LINUX_GENERIC_NETLINK_H */
diff --git a/include/linux/if.h b/include/linux/if.h
index 374e20a..cd080d7 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -212,5 +212,134 @@
 #define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
 #define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
 
+/* The struct should be in sync with struct net_device_stats */
+struct rtnl_link_stats
+{
+	__u32	rx_packets;		/* total packets received	*/
+	__u32	tx_packets;		/* total packets transmitted	*/
+	__u32	rx_bytes;		/* total bytes received 	*/
+	__u32	tx_bytes;		/* total bytes transmitted	*/
+	__u32	rx_errors;		/* bad packets received		*/
+	__u32	tx_errors;		/* packet transmit problems	*/
+	__u32	rx_dropped;		/* no space in linux buffers	*/
+	__u32	tx_dropped;		/* no space available in linux	*/
+	__u32	multicast;		/* multicast packets received	*/
+	__u32	collisions;
+
+	/* detailed rx_errors: */
+	__u32	rx_length_errors;
+	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u32	rx_frame_errors;	/* recv'd frame alignment error */
+	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u32	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u32	tx_aborted_errors;
+	__u32	tx_carrier_errors;
+	__u32	tx_fifo_errors;
+	__u32	tx_heartbeat_errors;
+	__u32	tx_window_errors;
+
+	/* for cslip etc */
+	__u32	rx_compressed;
+	__u32	tx_compressed;
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap
+{
+	__u64	mem_start;
+	__u64	mem_end;
+	__u64	base_addr;
+	__u16	irq;
+	__u8	dma;
+	__u8	port;
+};
+
+enum
+{
+	IFLA_UNSPEC,
+	IFLA_ADDRESS,
+	IFLA_BROADCAST,
+	IFLA_IFNAME,
+	IFLA_MTU,
+	IFLA_LINK,
+	IFLA_QDISC,
+	IFLA_STATS,
+	IFLA_COST,
+#define IFLA_COST IFLA_COST
+	IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+	IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+	IFLA_PROTINFO,		/* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+	IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+	IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+	IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+	IFLA_OPERSTATE,
+	IFLA_LINKMODE,
+	__IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* ifi_flags.
+
+   IFF_* flags.
+
+   The only change is:
+   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+   more not changeable by user. They describe link media
+   characteristics and set by device driver.
+
+   Comments:
+   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+   - If neither of these three flags are set;
+     the interface is NBMA.
+
+   - IFF_MULTICAST does not mean anything special:
+   multicasts can be used on all not-NBMA links.
+   IFF_MULTICAST means that this media uses special encapsulation
+   for multicast frames. Apparently, all IFF_POINTOPOINT and
+   IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+   For usual devices it is equal ifi_index.
+   If it is a "virtual interface" (f.e. tunnel), ifi_link
+   can point to real physical interface (f.e. for bandwidth calculations),
+   or maybe 0, what means, that real media is unknown (usual
+   for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum
+{
+	IFLA_INET6_UNSPEC,
+	IFLA_INET6_FLAGS,	/* link flags			*/
+	IFLA_INET6_CONF,	/* sysctl parameters		*/
+	IFLA_INET6_STATS,	/* statistics			*/
+	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
+	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
+	__IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
+
+struct ifla_cacheinfo
+{
+	__u32	max_reasm_len;
+	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
+	__u32	reachable_time;
+	__u32	retrans_time;
+};
 
 #endif /* _LINUX_IF_H */
diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
new file mode 100644
index 0000000..dbe8f61
--- /dev/null
+++ b/include/linux/if_addr.h
@@ -0,0 +1,55 @@
+#ifndef __LINUX_IF_ADDR_H
+#define __LINUX_IF_ADDR_H
+
+#include <linux/netlink.h>
+
+struct ifaddrmsg
+{
+	__u8		ifa_family;
+	__u8		ifa_prefixlen;	/* The prefix length		*/
+	__u8		ifa_flags;	/* Flags			*/
+	__u8		ifa_scope;	/* Address scope		*/
+	__u32		ifa_index;	/* Link index			*/
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ */
+enum
+{
+	IFA_UNSPEC,
+	IFA_ADDRESS,
+	IFA_LOCAL,
+	IFA_LABEL,
+	IFA_BROADCAST,
+	IFA_ANYCAST,
+	IFA_CACHEINFO,
+	IFA_MULTICAST,
+	__IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY		0x01
+#define IFA_F_TEMPORARY		IFA_F_SECONDARY
+
+#define	IFA_F_NODAD		0x02
+#define	IFA_F_HOMEADDRESS	0x10
+#define IFA_F_DEPRECATED	0x20
+#define IFA_F_TENTATIVE		0x40
+#define IFA_F_PERMANENT		0x80
+
+struct ifa_cacheinfo
+{
+	__u32	ifa_prefered;
+	__u32	ifa_valid;
+	__u32	cstamp; /* created timestamp, hundredths of seconds */
+	__u32	tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+#endif
diff --git a/include/linux/in.h b/include/linux/in.h
index 94f557f..bcaca83 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -52,7 +52,7 @@
 
 /* Internet address. */
 struct in_addr {
-	__u32	s_addr;
+	__be32	s_addr;
 };
 
 #define IP_TOS		1
@@ -177,7 +177,7 @@
 #define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
 struct sockaddr_in {
   sa_family_t		sin_family;	/* Address family		*/
-  unsigned short int	sin_port;	/* Port number			*/
+  __be16		sin_port;	/* Port number			*/
   struct in_addr	sin_addr;	/* Internet address		*/
 
   /* Pad to size of `struct sockaddr'. */
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 304aaed..d776829 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -134,6 +134,7 @@
 #define IPPROTO_ICMPV6		58	/* ICMPv6			*/
 #define IPPROTO_NONE		59	/* IPv6 no next header		*/
 #define IPPROTO_DSTOPTS		60	/* IPv6 destination options	*/
+#define IPPROTO_MH		135	/* IPv6 mobility header		*/
 
 /*
  *	IPv6 TLV options.
@@ -142,6 +143,7 @@
 #define IPV6_TLV_PADN		1
 #define IPV6_TLV_ROUTERALERT	5
 #define IPV6_TLV_JUMBO		194
+#define IPV6_TLV_HAO		201	/* home address option */
 
 /*
  *	IPV6 socket options
diff --git a/include/linux/inet.h b/include/linux/inet.h
index 6c5587a..b7c6da7 100644
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -46,5 +46,7 @@
 #include <linux/types.h>
 
 extern __be32 in_aton(const char *str);
+extern int in4_pton(const char *src, int srclen, u8 *dst, char delim, const char **end);
+extern int in6_pton(const char *src, int srclen, u8 *dst, char delim, const char **end);
 #endif
 #endif	/* _LINUX_INET_H */
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 4b55cf1..2f46001 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -57,6 +57,7 @@
 #define IPOPT_SEC	(2 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_LSRR	(3 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_TIMESTAMP	(4 |IPOPT_MEASUREMENT)
+#define IPOPT_CIPSO	(6 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_RR	(7 |IPOPT_CONTROL)
 #define IPOPT_SID	(8 |IPOPT_CONTROL|IPOPT_COPY)
 #define IPOPT_SSRR	(9 |IPOPT_CONTROL|IPOPT_COPY)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 297853c..caca57d 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -29,6 +29,7 @@
 
 #define IPV6_SRCRT_STRICT	0x01	/* this hop must be a neighbor	*/
 #define IPV6_SRCRT_TYPE_0	0	/* IPv6 type 0 Routing Header	*/
+#define IPV6_SRCRT_TYPE_2	2	/* IPv6 type 2 Routing Header	*/
 
 /*
  *	routing header
@@ -73,6 +74,28 @@
 #define rt0_type		rt_hdr.type
 };
 
+/*
+ *	routing header type 2
+ */
+
+struct rt2_hdr {
+	struct ipv6_rt_hdr	rt_hdr;
+	__u32			reserved;
+	struct in6_addr		addr;
+
+#define rt2_type		rt_hdr.type
+};
+
+/*
+ *	home address option in destination options header
+ */
+
+struct ipv6_destopt_hao {
+	__u8			type;
+	__u8			length;
+	struct in6_addr		addr;
+} __attribute__ ((__packed__));
+
 struct ipv6_auth_hdr {
 	__u8  nexthdr;
 	__u8  hdrlen;           /* This one is measured in 32 bit units! */
@@ -153,6 +176,7 @@
 	__s32		accept_ra_rt_info_max_plen;
 #endif
 #endif
+	__s32		proxy_ndp;
 	void		*sysctl;
 };
 
@@ -180,6 +204,7 @@
 	DEVCONF_ACCEPT_RA_RTR_PREF,
 	DEVCONF_RTR_PROBE_INTERVAL,
 	DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN,
+	DEVCONF_PROXY_NDP,
 	DEVCONF_MAX
 };
 
@@ -206,6 +231,9 @@
 	__u16			lastopt;
 	__u32			nhoff;
 	__u16			flags;
+#ifdef CONFIG_IPV6_MIP6
+	__u16			dsthao;
+#endif
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 };
@@ -242,6 +270,9 @@
 	struct in6_addr 	rcv_saddr;
 	struct in6_addr		daddr;
 	struct in6_addr		*daddr_cache;
+#ifdef CONFIG_IPV6_SUBTREES
+	struct in6_addr		*saddr_cache;
+#endif
 
 	__u32			flow_label;
 	__u32			frag_size;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 851aa1b..2b2ae4f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -31,7 +31,7 @@
 #define STACK_MAGIC	0xdeadbeef
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0b135c..224178a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1013,6 +1013,7 @@
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+pgprot_t vm_get_page_prot(unsigned long vm_flags);
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct page *vmalloc_to_page(void *addr);
 unsigned long vmalloc_to_pfn(void *addr);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f697770..f7ca0b0 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -148,6 +148,17 @@
 #define CCW_DEVICE_ID_MATCH_DEVICE_TYPE		0x04
 #define CCW_DEVICE_ID_MATCH_DEVICE_MODEL	0x08
 
+/* s390 AP bus devices */
+struct ap_device_id {
+	__u16 match_flags;	/* which fields to match against */
+	__u8 dev_type;		/* device type */
+	__u8 pad1;
+	__u32 pad2;
+	kernel_ulong_t driver_info;
+};
+
+#define AP_DEVICE_ID_MATCH_DEVICE_TYPE		0x01
+
 
 #define PNP_ID_LEN	8
 #define PNP_MAX_DEVICES	8
diff --git a/include/linux/module.h b/include/linux/module.h
index 0dfb794..d4486cc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -156,6 +156,11 @@
 */
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
 
+/* Optional firmware file (or files) needed by the module
+ * format is simply firmware file name.  Multiple firmware
+ * files require multiple MODULE_FIRMWARE() specifiers */
+#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
+
 /* Given an address, look for it in the exception tables */
 const struct exception_table_entry *search_exception_tables(unsigned long add);
 
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 09bfae6..123948b 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -199,6 +199,18 @@
 	uint8_t  TopBottom;
 } __attribute__((packed));
 
+/* Vendor-Specific PRI for Atmel chips (command set 0x0002) */
+
+struct cfi_pri_atmel {
+	uint8_t pri[3];
+	uint8_t MajorVersion;
+	uint8_t MinorVersion;
+	uint8_t Features;
+	uint8_t BottomBoot;
+	uint8_t BurstMode;
+	uint8_t PageMode;
+} __attribute__((packed));
+
 struct cfi_pri_query {
 	uint8_t  NumFields;
 	uint32_t ProtField[1]; /* Not host ordered */
@@ -464,6 +476,7 @@
 #define CFI_ID_ANY  0xffff
 
 #define CFI_MFR_AMD 0x0001
+#define CFI_MFR_ATMEL 0x001F
 #define CFI_MFR_ST  0x0020 	/* STMicroelectronics */
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
new file mode 100644
index 0000000..bd3bbf6
--- /dev/null
+++ b/include/linux/neighbour.h
@@ -0,0 +1,159 @@
+#ifndef __LINUX_NEIGHBOUR_H
+#define __LINUX_NEIGHBOUR_H
+
+#include <linux/netlink.h>
+
+struct ndmsg
+{
+	__u8		ndm_family;
+	__u8		ndm_pad1;
+	__u16		ndm_pad2;
+	__s32		ndm_ifindex;
+	__u16		ndm_state;
+	__u8		ndm_flags;
+	__u8		ndm_type;
+};
+
+enum
+{
+	NDA_UNSPEC,
+	NDA_DST,
+	NDA_LLADDR,
+	NDA_CACHEINFO,
+	NDA_PROBES,
+	__NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ *	Neighbor Cache Entry Flags
+ */
+
+#define NTF_PROXY	0x08	/* == ATF_PUBL */
+#define NTF_ROUTER	0x80
+
+/*
+ *	Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE	0x01
+#define NUD_REACHABLE	0x02
+#define NUD_STALE	0x04
+#define NUD_DELAY	0x08
+#define NUD_PROBE	0x10
+#define NUD_FAILED	0x20
+
+/* Dummy states */
+#define NUD_NOARP	0x40
+#define NUD_PERMANENT	0x80
+#define NUD_NONE	0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
+   and make no address resolution or NUD.
+   NUD_PERMANENT is also cannot be deleted by garbage collectors.
+ */
+
+struct nda_cacheinfo
+{
+	__u32		ndm_confirmed;
+	__u32		ndm_used;
+	__u32		ndm_updated;
+	__u32		ndm_refcnt;
+};
+
+/*****************************************************************
+ *		Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats
+{
+	__u64		ndts_allocs;
+	__u64		ndts_destroys;
+	__u64		ndts_hash_grows;
+	__u64		ndts_res_failed;
+	__u64		ndts_lookups;
+	__u64		ndts_hits;
+	__u64		ndts_rcv_probes_mcast;
+	__u64		ndts_rcv_probes_ucast;
+	__u64		ndts_periodic_gc_runs;
+	__u64		ndts_forced_gc_runs;
+};
+
+enum {
+	NDTPA_UNSPEC,
+	NDTPA_IFINDEX,			/* u32, unchangeable */
+	NDTPA_REFCNT,			/* u32, read-only */
+	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
+	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
+	NDTPA_RETRANS_TIME,		/* u64, msecs */
+	NDTPA_GC_STALETIME,		/* u64, msecs */
+	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
+	NDTPA_QUEUE_LEN,		/* u32 */
+	NDTPA_APP_PROBES,		/* u32 */
+	NDTPA_UCAST_PROBES,		/* u32 */
+	NDTPA_MCAST_PROBES,		/* u32 */
+	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_QLEN,		/* u32 */
+	NDTPA_LOCKTIME,			/* u64, msecs */
+	__NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+	__u8		ndtm_family;
+	__u8		ndtm_pad1;
+	__u16		ndtm_pad2;
+};
+
+struct ndt_config
+{
+	__u16		ndtc_key_len;
+	__u16		ndtc_entry_size;
+	__u32		ndtc_entries;
+	__u32		ndtc_last_flush;	/* delta to now in msecs */
+	__u32		ndtc_last_rand;		/* delta to now in msecs */
+	__u32		ndtc_hash_rnd;
+	__u32		ndtc_hash_mask;
+	__u32		ndtc_hash_chain_gc;
+	__u32		ndtc_proxy_qlen;
+};
+
+enum {
+	NDTA_UNSPEC,
+	NDTA_NAME,			/* char *, unchangeable */
+	NDTA_THRESH1,			/* u32 */
+	NDTA_THRESH2,			/* u32 */
+	NDTA_THRESH3,			/* u32 */
+	NDTA_CONFIG,			/* struct ndt_config, read-only */
+	NDTA_PARMS,			/* nested TLV NDTPA_* */
+	NDTA_STATS,			/* struct ndt_stats, read-only */
+	NDTA_GC_INTERVAL,		/* u64, msecs */
+	__NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+#endif
diff --git a/include/linux/net.h b/include/linux/net.h
index b20c53c..c257f71 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -169,11 +169,6 @@
 struct net_proto_family {
 	int		family;
 	int		(*create)(struct socket *sock, int protocol);
-	/* These are counters for the number of different methods of
-	   each we support */
-	short		authentication;
-	short		encryption;
-	short		encrypt_net;
 	struct module	*owner;
 };
 
@@ -181,8 +176,8 @@
 struct kvec;
 
 extern int	     sock_wake_async(struct socket *sk, int how, int band);
-extern int	     sock_register(struct net_proto_family *fam);
-extern int	     sock_unregister(int family);
+extern int	     sock_register(const struct net_proto_family *fam);
+extern void	     sock_unregister(int family);
 extern int	     sock_create(int family, int type, int proto,
 				 struct socket **res);
 extern int	     sock_create_kern(int family, int type, int proto,
@@ -208,6 +203,25 @@
 				    struct kvec *vec, size_t num,
 				    size_t len, int flags);
 
+extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
+		       int addrlen);
+extern int kernel_listen(struct socket *sock, int backlog);
+extern int kernel_accept(struct socket *sock, struct socket **newsock,
+			 int flags);
+extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
+			  int addrlen, int flags);
+extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			      int *addrlen);
+extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			      int *addrlen);
+extern int kernel_getsockopt(struct socket *sock, int level, int optname,
+			     char *optval, int *optlen);
+extern int kernel_setsockopt(struct socket *sock, int level, int optname,
+			     char *optval, int optlen);
+extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+			   size_t size, int flags);
+extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+
 #ifndef CONFIG_SMP
 #define SOCKOPS_WRAPPED(name) name
 #define SOCKOPS_WRAP(name, fam)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 50a4719..4f2c2b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -976,7 +976,7 @@
 extern int		netdev_max_backlog;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
-extern int skb_checksum_help(struct sk_buff *skb, int inward);
+extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
@@ -1012,7 +1012,7 @@
 {
 	return skb_is_gso(skb) &&
 	       (!skb_gso_ok(skb, dev->features) ||
-		unlikely(skb->ip_summed != CHECKSUM_HW));
+		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
 /* On bonding slaves other than the currently active slave, suppress
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 10168e2..b7e67d1 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -282,6 +282,12 @@
    Returns true or false. */
 extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);
 
+extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval,
+				u_int32_t csum);
+extern u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+				      u_int32_t oldval, u_int32_t newval,
+				      u_int16_t csum, int pseudohdr);
+
 struct nf_afinfo {
 	unsigned short	family;
 	unsigned int	(*checksum)(struct sk_buff *skb, unsigned int hook,
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 1d3a14e..9a285ce 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -1,11 +1,38 @@
-header-y := nf_conntrack_sctp.h nf_conntrack_tuple_common.h		\
-	    nfnetlink_conntrack.h nfnetlink_log.h nfnetlink_queue.h	\
-	    xt_CLASSIFY.h xt_comment.h xt_connbytes.h xt_connmark.h	\
-	    xt_CONNMARK.h xt_conntrack.h xt_dccp.h xt_esp.h		\
-	    xt_helper.h xt_length.h xt_limit.h xt_mac.h xt_mark.h	\
-	    xt_MARK.h xt_multiport.h xt_NFQUEUE.h xt_pkttype.h		\
-	    xt_policy.h xt_realm.h xt_sctp.h xt_state.h xt_string.h	\
-	    xt_tcpmss.h xt_tcpudp.h xt_SECMARK.h xt_CONNSECMARK.h
+header-y += nf_conntrack_sctp.h
+header-y += nf_conntrack_tuple_common.h
+header-y += nfnetlink_conntrack.h
+header-y += nfnetlink_log.h
+header-y += nfnetlink_queue.h
+header-y += xt_CLASSIFY.h
+header-y += xt_comment.h
+header-y += xt_connbytes.h
+header-y += xt_connmark.h
+header-y += xt_CONNMARK.h
+header-y += xt_conntrack.h
+header-y += xt_dccp.h
+header-y += xt_esp.h
+header-y += xt_helper.h
+header-y += xt_length.h
+header-y += xt_limit.h
+header-y += xt_mac.h
+header-y += xt_mark.h
+header-y += xt_MARK.h
+header-y += xt_multiport.h
+header-y += xt_NFQUEUE.h
+header-y += xt_pkttype.h
+header-y += xt_policy.h
+header-y += xt_realm.h
+header-y += xt_sctp.h
+header-y += xt_state.h
+header-y += xt_string.h
+header-y += xt_tcpmss.h
+header-y += xt_tcpudp.h
+header-y += xt_SECMARK.h
+header-y += xt_CONNSECMARK.h
 
-unifdef-y := nf_conntrack_common.h nf_conntrack_ftp.h		\
-	nf_conntrack_tcp.h nfnetlink.h x_tables.h xt_physdev.h
+unifdef-y += nf_conntrack_common.h
+unifdef-y += nf_conntrack_ftp.h
+unifdef-y += nf_conntrack_tcp.h
+unifdef-y += nfnetlink.h
+unifdef-y += x_tables.h
+unifdef-y += xt_physdev.h
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index d2e4bd7..9e0dae0 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -125,6 +125,10 @@
 	/* Counter highest bit has been set */
 	IPCT_COUNTER_FILLING_BIT = 11,
 	IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
+
+	/* Mark is set */
+	IPCT_MARK_BIT = 12,
+	IPCT_MARK = (1 << IPCT_MARK_BIT),
 };
 
 enum ip_conntrack_expect_events {
diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index b2feeff..6b01ba2 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -49,6 +49,7 @@
 	u_int32_t	last_seq;	/* Last sequence number seen in dir */
 	u_int32_t	last_ack;	/* Last sequence number seen in opposite dir */
 	u_int32_t	last_end;	/* Last seq + len */
+	u_int16_t	last_win;	/* Last window advertisement seen in dir */
 };
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 9f5b12c..6d8e3e5 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -43,7 +43,7 @@
 	u_int16_t nfa_len;
 	u_int16_t nfa_type;	/* we use 15 bits for the type, and the highest
 				 * bit to indicate whether the payload is nested */
-} __attribute__ ((packed));
+};
 
 /* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from
  * rtnetlink.h, it's time to put this in a generic file */
@@ -79,7 +79,7 @@
 	u_int8_t  nfgen_family;		/* AF_xxx */
 	u_int8_t  version;		/* nfnetlink version */
 	u_int16_t res_id;		/* resource id */
-} __attribute__ ((packed));
+};
 
 #define NFNETLINK_V0	0
 
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index a7497c7..87b92f8 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -19,18 +19,18 @@
 	u_int16_t	hw_protocol;	/* hw protocol (network order) */
 	u_int8_t	hook;		/* netfilter hook */
 	u_int8_t	_pad;
-} __attribute__ ((packed));
+};
 
 struct nfulnl_msg_packet_hw {
 	u_int16_t	hw_addrlen;
 	u_int16_t	_pad;
 	u_int8_t	hw_addr[8];
-} __attribute__ ((packed));
+};
 
 struct nfulnl_msg_packet_timestamp {
 	aligned_u64	sec;
 	aligned_u64	usec;
-} __attribute__ ((packed));
+};
 
 #define NFULNL_PREFIXLEN	30	/* just like old log target */
 
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 9e77437..36af036 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -22,12 +22,12 @@
 	u_int16_t	hw_addrlen;
 	u_int16_t	_pad;
 	u_int8_t	hw_addr[8];
-} __attribute__ ((packed));
+};
 
 struct nfqnl_msg_packet_timestamp {
 	aligned_u64	sec;
 	aligned_u64	usec;
-} __attribute__ ((packed));
+};
 
 enum nfqnl_attr_type {
 	NFQA_UNSPEC,
@@ -49,7 +49,7 @@
 struct nfqnl_msg_verdict_hdr {
 	u_int32_t verdict;
 	u_int32_t id;
-} __attribute__ ((packed));
+};
 
 
 enum nfqnl_msg_config_cmds {
@@ -64,7 +64,7 @@
 	u_int8_t	command;	/* nfqnl_msg_config_cmds */
 	u_int8_t	_pad;
 	u_int16_t	pf;		/* AF_xxx for PF_[UN]BIND */
-} __attribute__ ((packed));
+};
 
 enum nfqnl_config_mode {
 	NFQNL_COPY_NONE,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 48cc32d8..739a98e 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -138,16 +138,6 @@
 
 #include <linux/netdevice.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#ifdef CONFIG_COMPAT
-#define COMPAT_TO_USER		1
-#define COMPAT_FROM_USER	-1
-#define COMPAT_CALC_SIZE	0
-#endif
-
 struct xt_match
 {
 	struct list_head list;
@@ -174,21 +164,24 @@
 			  const void *ip,
 			  const struct xt_match *match,
 			  void *matchinfo,
-			  unsigned int matchinfosize,
 			  unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
-	void (*destroy)(const struct xt_match *match, void *matchinfo,
-			unsigned int matchinfosize);
+	void (*destroy)(const struct xt_match *match, void *matchinfo);
 
 	/* Called when userspace align differs from kernel space one */
-	int (*compat)(void *match, void **dstptr, int *size, int convert);
+	void (*compat_from_user)(void *dst, void *src);
+	int (*compat_to_user)(void __user *dst, void *src);
 
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
+	/* Free to use by each match */
+	unsigned long data;
+
 	char *table;
 	unsigned int matchsize;
+	unsigned int compatsize;
 	unsigned int hooks;
 	unsigned short proto;
 
@@ -211,8 +204,7 @@
 			       const struct net_device *out,
 			       unsigned int hooknum,
 			       const struct xt_target *target,
-			       const void *targinfo,
-			       void *userdata);
+			       const void *targinfo);
 
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
@@ -222,21 +214,21 @@
 			  const void *entry,
 			  const struct xt_target *target,
 			  void *targinfo,
-			  unsigned int targinfosize,
 			  unsigned int hook_mask);
 
 	/* Called when entry of this type deleted. */
-	void (*destroy)(const struct xt_target *target, void *targinfo,
-			unsigned int targinfosize);
+	void (*destroy)(const struct xt_target *target, void *targinfo);
 
 	/* Called when userspace align differs from kernel space one */
-	int (*compat)(void *target, void **dstptr, int *size, int convert);
+	void (*compat_from_user)(void *dst, void *src);
+	int (*compat_to_user)(void __user *dst, void *src);
 
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
 	char *table;
 	unsigned int targetsize;
+	unsigned int compatsize;
 	unsigned int hooks;
 	unsigned short proto;
 
@@ -290,8 +282,13 @@
 
 extern int xt_register_target(struct xt_target *target);
 extern void xt_unregister_target(struct xt_target *target);
+extern int xt_register_targets(struct xt_target *target, unsigned int n);
+extern void xt_unregister_targets(struct xt_target *target, unsigned int n);
+
 extern int xt_register_match(struct xt_match *target);
 extern void xt_unregister_match(struct xt_match *target);
+extern int xt_register_matches(struct xt_match *match, unsigned int n);
+extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
 
 extern int xt_check_match(const struct xt_match *match, unsigned short family,
 			  unsigned int size, const char *table, unsigned int hook,
@@ -388,9 +385,18 @@
 
 extern void xt_compat_lock(int af);
 extern void xt_compat_unlock(int af);
-extern int xt_compat_match(void *match, void **dstptr, int *size, int convert);
-extern int xt_compat_target(void *target, void **dstptr, int *size,
-		int convert);
+
+extern int xt_compat_match_offset(struct xt_match *match);
+extern void xt_compat_match_from_user(struct xt_entry_match *m,
+				      void **dstptr, int *size);
+extern int xt_compat_match_to_user(struct xt_entry_match *m,
+				   void * __user *dstptr, int *size);
+
+extern int xt_compat_target_offset(struct xt_target *target);
+extern void xt_compat_target_from_user(struct xt_entry_target *t,
+				       void **dstptr, int *size);
+extern int xt_compat_target_to_user(struct xt_entry_target *t,
+				    void * __user *dstptr, int *size);
 
 #endif /* CONFIG_COMPAT */
 #endif /* __KERNEL__ */
diff --git a/include/linux/netfilter/xt_DSCP.h b/include/linux/netfilter/xt_DSCP.h
new file mode 100644
index 0000000..3c7c963
--- /dev/null
+++ b/include/linux/netfilter/xt_DSCP.h
@@ -0,0 +1,20 @@
+/* x_tables module for setting the IPv4/IPv6 DSCP field
+ *
+ * (C) 2002 Harald Welte <laforge@gnumonks.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ * This software is distributed under GNU GPL v2, 1991
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp
+*/
+#ifndef _XT_DSCP_TARGET_H
+#define _XT_DSCP_TARGET_H
+#include <linux/netfilter/xt_dscp.h>
+
+/* target info */
+struct xt_DSCP_info {
+	u_int8_t dscp;
+};
+
+#endif /* _XT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter/xt_dscp.h b/include/linux/netfilter/xt_dscp.h
new file mode 100644
index 0000000..1da61e6
--- /dev/null
+++ b/include/linux/netfilter/xt_dscp.h
@@ -0,0 +1,23 @@
+/* x_tables module for matching the IPv4/IPv6 DSCP field
+ *
+ * (C) 2002 Harald Welte <laforge@gnumonks.org>
+ * This software is distributed under GNU GPL v2, 1991
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp
+*/
+#ifndef _XT_DSCP_H
+#define _XT_DSCP_H
+
+#define XT_DSCP_MASK	0xfc	/* 11111100 */
+#define XT_DSCP_SHIFT	2
+#define XT_DSCP_MAX	0x3f	/* 00111111 */
+
+/* match info */
+struct xt_dscp_info {
+	u_int8_t dscp;
+	u_int8_t invert;
+};
+
+#endif /* _XT_DSCP_H */
diff --git a/include/linux/netfilter_arp/Kbuild b/include/linux/netfilter_arp/Kbuild
index 198ec5e..4f13dfc 100644
--- a/include/linux/netfilter_arp/Kbuild
+++ b/include/linux/netfilter_arp/Kbuild
@@ -1,2 +1,3 @@
-header-y := arpt_mangle.h
-unifdef-y := arp_tables.h
+header-y += arpt_mangle.h
+
+unifdef-y += arp_tables.h
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 62cc27d..149e87c 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -248,8 +248,7 @@
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
-				  struct arpt_table *table,
-				  void *userdata);
+				  struct arpt_table *table);
 
 #define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1))
 #endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 427c67f..9a4dd11 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -5,9 +5,8 @@
  */
 
 #include <linux/netfilter.h>
-#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER)
 #include <linux/if_ether.h>
-#endif
+#include <linux/if_vlan.h>
 
 /* Bridge Hooks */
 /* After promisc drops, checksum checks. */
@@ -47,40 +46,20 @@
 
 
 /* Only used in br_forward.c */
-static inline
-int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+extern int nf_bridge_copy_header(struct sk_buff *skb);
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
-	int err;
-
-	if (skb->nf_bridge) {
-		if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
-			err = skb_cow(skb, 18);
-			if (err)
-				return err;
-			memcpy(skb->data - 18, skb->nf_bridge->data, 18);
-			skb_push(skb, 4);
-		} else {
-			err = skb_cow(skb, 16);
-			if (err)
-				return err;
-			memcpy(skb->data - 16, skb->nf_bridge->data, 16);
-		}
-	}
-	return 0;
+	if (skb->nf_bridge)
+		return nf_bridge_copy_header(skb);
+  	return 0;
 }
 
 /* This is called by the IP fragmenting code and it ensures there is
  * enough room for the encapsulating header (if there is one). */
-static inline
-int nf_bridge_pad(struct sk_buff *skb)
+static inline int nf_bridge_pad(const struct sk_buff *skb)
 {
-	if (skb->protocol == __constant_htons(ETH_P_IP))
-		return 0;
-	if (skb->nf_bridge) {
-		if (skb->protocol == __constant_htons(ETH_P_8021Q))
-			return 4;
-	}
-	return 0;
+ 	return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q))
+		? VLAN_HLEN : 0;
 }
 
 struct bridge_skb_cb {
@@ -90,6 +69,9 @@
 };
 
 extern int brnf_deferred_hooks;
+#else
+#define nf_bridge_maybe_copy_header(skb)	(0)
+#define nf_bridge_pad(skb)			(0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild
index 5b1aba6..76ff4c4 100644
--- a/include/linux/netfilter_bridge/Kbuild
+++ b/include/linux/netfilter_bridge/Kbuild
@@ -1,4 +1,17 @@
-header-y += ebt_among.h ebt_arp.h ebt_arpreply.h ebt_ip.h ebt_limit.h	\
-	ebt_log.h ebt_mark_m.h ebt_mark_t.h ebt_nat.h ebt_pkttype.h	\
-	ebt_redirect.h ebt_stp.h ebt_ulog.h ebt_vlan.h
-unifdef-y := ebtables.h ebt_802_3.h
+header-y += ebt_among.h
+header-y += ebt_arp.h
+header-y += ebt_arpreply.h
+header-y += ebt_ip.h
+header-y += ebt_limit.h
+header-y += ebt_log.h
+header-y += ebt_mark_m.h
+header-y += ebt_mark_t.h
+header-y += ebt_nat.h
+header-y += ebt_pkttype.h
+header-y += ebt_redirect.h
+header-y += ebt_stp.h
+header-y += ebt_ulog.h
+header-y += ebt_vlan.h
+
+unifdef-y += ebtables.h
+unifdef-y += ebt_802_3.h
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 04e4d27..591c1a8 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -1,21 +1,63 @@
+header-y += ip_conntrack_helper.h
+header-y += ip_conntrack_helper_h323_asn1.h
+header-y += ip_conntrack_helper_h323_types.h
+header-y += ip_conntrack_protocol.h
+header-y += ip_conntrack_sctp.h
+header-y += ip_conntrack_tcp.h
+header-y += ip_conntrack_tftp.h
+header-y += ip_nat_pptp.h
+header-y += ipt_addrtype.h
+header-y += ipt_ah.h
+header-y += ipt_CLASSIFY.h
+header-y += ipt_CLUSTERIP.h
+header-y += ipt_comment.h
+header-y += ipt_connbytes.h
+header-y += ipt_connmark.h
+header-y += ipt_CONNMARK.h
+header-y += ipt_conntrack.h
+header-y += ipt_dccp.h
+header-y += ipt_dscp.h
+header-y += ipt_DSCP.h
+header-y += ipt_ecn.h
+header-y += ipt_ECN.h
+header-y += ipt_esp.h
+header-y += ipt_hashlimit.h
+header-y += ipt_helper.h
+header-y += ipt_iprange.h
+header-y += ipt_length.h
+header-y += ipt_limit.h
+header-y += ipt_LOG.h
+header-y += ipt_mac.h
+header-y += ipt_mark.h
+header-y += ipt_MARK.h
+header-y += ipt_multiport.h
+header-y += ipt_NFQUEUE.h
+header-y += ipt_owner.h
+header-y += ipt_physdev.h
+header-y += ipt_pkttype.h
+header-y += ipt_policy.h
+header-y += ipt_realm.h
+header-y += ipt_recent.h
+header-y += ipt_REJECT.h
+header-y += ipt_SAME.h
+header-y += ipt_sctp.h
+header-y += ipt_state.h
+header-y += ipt_string.h
+header-y += ipt_tcpmss.h
+header-y += ipt_TCPMSS.h
+header-y += ipt_tos.h
+header-y += ipt_TOS.h
+header-y += ipt_ttl.h
+header-y += ipt_TTL.h
+header-y += ipt_ULOG.h
 
-header-y := ip_conntrack_helper.h ip_conntrack_helper_h323_asn1.h	\
-	    ip_conntrack_helper_h323_types.h ip_conntrack_protocol.h	\
-	    ip_conntrack_sctp.h ip_conntrack_tcp.h ip_conntrack_tftp.h	\
-	    ip_nat_pptp.h ipt_addrtype.h ipt_ah.h	\
-	    ipt_CLASSIFY.h ipt_CLUSTERIP.h ipt_comment.h		\
-	    ipt_connbytes.h ipt_connmark.h ipt_CONNMARK.h		\
-	    ipt_conntrack.h ipt_dccp.h ipt_dscp.h ipt_DSCP.h ipt_ecn.h	\
-	    ipt_ECN.h ipt_esp.h ipt_hashlimit.h ipt_helper.h		\
-	    ipt_iprange.h ipt_length.h ipt_limit.h ipt_LOG.h ipt_mac.h	\
-	    ipt_mark.h ipt_MARK.h ipt_multiport.h ipt_NFQUEUE.h		\
-	    ipt_owner.h ipt_physdev.h ipt_pkttype.h ipt_policy.h	\
-	    ipt_realm.h ipt_recent.h ipt_REJECT.h ipt_SAME.h		\
-	    ipt_sctp.h ipt_state.h ipt_string.h ipt_tcpmss.h		\
-	    ipt_TCPMSS.h ipt_tos.h ipt_TOS.h ipt_ttl.h ipt_TTL.h	\
-	    ipt_ULOG.h
-
-unifdef-y := ip_conntrack.h ip_conntrack_h323.h ip_conntrack_irc.h	\
-	ip_conntrack_pptp.h ip_conntrack_proto_gre.h			\
-	ip_conntrack_tuple.h ip_nat.h ip_nat_rule.h ip_queue.h		\
-	ip_tables.h
+unifdef-y += ip_conntrack.h
+unifdef-y += ip_conntrack_h323.h
+unifdef-y += ip_conntrack_irc.h
+unifdef-y += ip_conntrack_pptp.h
+unifdef-y += ip_conntrack_proto_gre.h
+unifdef-y += ip_conntrack_tuple.h
+unifdef-y += ip_nat.h
+unifdef-y += ip_nat_rule.h
+unifdef-y += ip_queue.h
+unifdef-y += ip_tables.h
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
index 8d69279..77fe868 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
@@ -25,6 +25,8 @@
 		    struct ip_conntrack *ct,
 		    enum ip_conntrack_info conntrackinfo);
 
+	void (*destroy)(struct ip_conntrack *ct);
+
 	int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
 };
 
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
index 816144c..2644b1f 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -31,8 +31,8 @@
 	/* everything below is going to be per-expectation in newnat,
 	 * since there could be more than one call within one session */
 	enum pptp_ctrlcall_state cstate;	/* call state */
-	u_int16_t pac_call_id;			/* call id of PAC, host byte order */
-	u_int16_t pns_call_id;			/* call id of PNS, host byte order */
+	__be16 pac_call_id;			/* call id of PAC, host byte order */
+	__be16 pns_call_id;			/* call id of PNS, host byte order */
 
 	/* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack
 	 * and therefore imposes a fixed limit on the number of maps */
@@ -42,8 +42,8 @@
 /* conntrack_expect private member */
 struct ip_ct_pptp_expect {
 	enum pptp_ctrlcall_state cstate; 	/* call state */
-	u_int16_t pac_call_id;			/* call id of PAC */
-	u_int16_t pns_call_id;			/* call id of PNS */
+	__be16 pac_call_id;			/* call id of PAC */
+	__be16 pns_call_id;			/* call id of PNS */
 };
 
 
@@ -107,8 +107,7 @@
 
 struct PptpStartSessionRequest {
 	__be16	protocolVersion;
-	__u8	reserved1;
-	__u8	reserved2;
+	__u16	reserved1;
 	__be32	framingCapability;
 	__be32	bearerCapability;
 	__be16	maxChannels;
@@ -143,6 +142,8 @@
 
 struct PptpStopSessionRequest {
 	__u8	reason;
+	__u8	reserved1;
+	__u16	reserved2;
 };
 
 /* PptpStopSessionResultCode */
@@ -152,6 +153,7 @@
 struct PptpStopSessionReply {
 	__u8	resultCode;
 	__u8	generalErrorCode;
+	__u16	reserved1;
 };
 
 struct PptpEchoRequest {
@@ -188,9 +190,8 @@
 	__be32	framingType;
 	__be16	packetWindow;
 	__be16	packetProcDelay;
-	__u16	reserved1;
 	__be16	phoneNumberLength;
-	__u16	reserved2;
+	__u16	reserved1;
 	__u8	phoneNumber[64];
 	__u8	subAddress[64];
 };
@@ -285,19 +286,19 @@
 };
 
 union pptp_ctrl_union {
-		struct PptpStartSessionRequest	sreq;
-		struct PptpStartSessionReply	srep;
-		struct PptpStopSessionRequest	streq;
-		struct PptpStopSessionReply	strep;
-                struct PptpOutCallRequest       ocreq;
-                struct PptpOutCallReply         ocack;
-                struct PptpInCallRequest        icreq;
-                struct PptpInCallReply          icack;
-                struct PptpInCallConnected      iccon;
-		struct PptpClearCallRequest	clrreq;
-                struct PptpCallDisconnectNotify disc;
-                struct PptpWanErrorNotify       wanerr;
-                struct PptpSetLinkInfo          setlink;
+	struct PptpStartSessionRequest	sreq;
+	struct PptpStartSessionReply	srep;
+	struct PptpStopSessionRequest	streq;
+	struct PptpStopSessionReply	strep;
+	struct PptpOutCallRequest	ocreq;
+	struct PptpOutCallReply		ocack;
+	struct PptpInCallRequest	icreq;
+	struct PptpInCallReply		icack;
+	struct PptpInCallConnected	iccon;
+	struct PptpClearCallRequest	clrreq;
+	struct PptpCallDisconnectNotify disc;
+	struct PptpWanErrorNotify	wanerr;
+	struct PptpSetLinkInfo		setlink;
 };
 
 extern int
@@ -314,7 +315,7 @@
 			  struct PptpControlHeader *ctlh,
 			  union pptp_ctrl_union *pptpReq);
 
-extern int
+extern void
 (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig,
 			    struct ip_conntrack_expect *exp_reply);
 
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
index 8d090ef..1d853aa 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
@@ -49,18 +49,18 @@
 #else
 #error "Adjust your <asm/byteorder.h> defines"
 #endif
-	__u16	protocol;
+	__be16	protocol;
 };
 
 /* modified GRE header for PPTP */
 struct gre_hdr_pptp {
-	__u8  flags;		/* bitfield */
-	__u8  version;		/* should be GRE_VERSION_PPTP */
-	__u16 protocol;		/* should be GRE_PROTOCOL_PPTP */
-	__u16 payload_len;	/* size of ppp payload, not inc. gre header */
-	__u16 call_id;		/* peer's call_id for this session */
-	__u32 seq;		/* sequence number.  Present if S==1 */
-	__u32 ack;		/* seq number of highest packet recieved by */
+	__u8   flags;		/* bitfield */
+	__u8   version;		/* should be GRE_VERSION_PPTP */
+	__be16 protocol;	/* should be GRE_PROTOCOL_PPTP */
+	__be16 payload_len;	/* size of ppp payload, not inc. gre header */
+	__be16 call_id;		/* peer's call_id for this session */
+	__be32 seq;		/* sequence number.  Present if S==1 */
+	__be32 ack;		/* seq number of highest packet recieved by */
 				/*  sender in this session */
 };
 
@@ -92,13 +92,13 @@
 
 
 /* get pointer to gre key, if present */
-static inline u_int32_t *gre_key(struct gre_hdr *greh)
+static inline __be32 *gre_key(struct gre_hdr *greh)
 {
 	if (!greh->key)
 		return NULL;
 	if (greh->csum || greh->routing)
-		return (u_int32_t *) (greh+sizeof(*greh)+4);
-	return (u_int32_t *) (greh+sizeof(*greh));
+		return (__be32 *) (greh+sizeof(*greh)+4);
+	return (__be32 *) (greh+sizeof(*greh));
 }
 
 /* get pointer ot gre csum, if present */
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index e9f5ed1..98f8407 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -72,10 +72,6 @@
 extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack *ignored_conntrack);
 
-/* Calculate relative checksum. */
-extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
-				    u_int32_t newval,
-				    u_int16_t oldcheck);
 #else  /* !__KERNEL__: iptables wants this to compile. */
 #define ip_nat_multi_range ip_nat_multi_range_compat
 #endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
index 30db23f..60566f9f 100644
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ b/include/linux/netfilter_ipv4/ip_nat_core.h
@@ -11,8 +11,8 @@
 			       unsigned int hooknum,
 			       struct sk_buff **pskb);
 
-extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-					 struct ip_conntrack *ct,
-					 enum ip_nat_manip_type manip,
-					 enum ip_conntrack_dir dir);
+extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+					 enum ip_conntrack_info ctinfo,
+					 unsigned int hooknum,
+					 struct sk_buff **pskb);
 #endif /* _IP_NAT_CORE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
index eaf66c2..36668bf 100644
--- a/include/linux/netfilter_ipv4/ip_nat_pptp.h
+++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h
@@ -4,8 +4,8 @@
 
 /* conntrack private data */
 struct ip_nat_pptp {
-	u_int16_t pns_call_id;		/* NAT'ed PNS call id */
-	u_int16_t pac_call_id;		/* NAT'ed PAC call id */
+	__be16 pns_call_id;		/* NAT'ed PNS call id */
+	__be16 pac_call_id;		/* NAT'ed PAC call id */
 };
 
 #endif /* _NAT_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index c0dac16..a536bbd 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -312,8 +312,7 @@
 				 unsigned int hook,
 				 const struct net_device *in,
 				 const struct net_device *out,
-				 struct ipt_table *table,
-				 void *userdata);
+				 struct ipt_table *table);
 
 #define IPT_ALIGN(s) XT_ALIGN(s)
 
diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h
index b30f510..3491e52 100644
--- a/include/linux/netfilter_ipv4/ipt_DSCP.h
+++ b/include/linux/netfilter_ipv4/ipt_DSCP.h
@@ -11,10 +11,8 @@
 #ifndef _IPT_DSCP_TARGET_H
 #define _IPT_DSCP_TARGET_H
 #include <linux/netfilter_ipv4/ipt_dscp.h>
+#include <linux/netfilter/xt_DSCP.h>
 
-/* target info */
-struct ipt_DSCP_info {
-	u_int8_t dscp;
-};
+#define ipt_DSCP_info xt_DSCP_info
 
 #endif /* _IPT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h
index 2fa6dfe..4b82ca9 100644
--- a/include/linux/netfilter_ipv4/ipt_dscp.h
+++ b/include/linux/netfilter_ipv4/ipt_dscp.h
@@ -10,14 +10,12 @@
 #ifndef _IPT_DSCP_H
 #define _IPT_DSCP_H
 
-#define IPT_DSCP_MASK	0xfc	/* 11111100 */
-#define IPT_DSCP_SHIFT	2
-#define IPT_DSCP_MAX	0x3f	/* 00111111 */
+#include <linux/netfilter/xt_dscp.h>
 
-/* match info */
-struct ipt_dscp_info {
-	u_int8_t dscp;
-	u_int8_t invert;
-};
+#define IPT_DSCP_MASK	XT_DSCP_MASK
+#define IPT_DSCP_SHIFT	XT_DSCP_SHIFT
+#define IPT_DSCP_MAX	XT_DSCP_MAX
+
+#define ipt_dscp_info	xt_dscp_info
 
 #endif /* _IPT_DSCP_H */
diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h
deleted file mode 100644
index 5d92cf0..0000000
--- a/include/linux/netfilter_ipv4/listhelp.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef _LISTHELP_H
-#define _LISTHELP_H
-#include <linux/list.h>
-
-/* Header to do more comprehensive job than linux/list.h; assume list
-   is first entry in structure. */
-
-/* Return pointer to first true entry, if any, or NULL.  A macro
-   required to allow inlining of cmpfn. */
-#define LIST_FIND(head, cmpfn, type, args...)		\
-({							\
-	const struct list_head *__i, *__j = NULL;	\
-							\
-	ASSERT_READ_LOCK(head);				\
-	list_for_each(__i, (head))			\
-		if (cmpfn((const type)__i , ## args)) {	\
-			__j = __i;			\
-			break;				\
-		}					\
-	(type)__j;					\
-})
-
-#define LIST_FIND_W(head, cmpfn, type, args...)		\
-({							\
-	const struct list_head *__i, *__j = NULL;	\
-							\
-	ASSERT_WRITE_LOCK(head);			\
-	list_for_each(__i, (head))			\
-		if (cmpfn((type)__i , ## args)) {	\
-			__j = __i;			\
-			break;				\
-		}					\
-	(type)__j;					\
-})
-
-/* Just like LIST_FIND but we search backwards */
-#define LIST_FIND_B(head, cmpfn, type, args...)		\
-({							\
-	const struct list_head *__i, *__j = NULL;	\
-							\
-	ASSERT_READ_LOCK(head);				\
-	list_for_each_prev(__i, (head))			\
-		if (cmpfn((const type)__i , ## args)) {	\
-			__j = __i;			\
-			break;				\
-		}					\
-	(type)__j;					\
-})
-
-static inline int
-__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; }
-
-/* Is this entry in the list? */
-static inline int
-list_inlist(struct list_head *head, const void *entry)
-{
-	return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL;
-}
-
-/* Delete from list. */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define LIST_DELETE(head, oldentry)					\
-do {									\
-	ASSERT_WRITE_LOCK(head);					\
-	if (!list_inlist(head, oldentry))				\
-		printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n",	\
-		       __FILE__, __LINE__, #oldentry, oldentry, #head);	\
-        else list_del((struct list_head *)oldentry);			\
-} while(0)
-#else
-#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry)
-#endif
-
-/* Append. */
-static inline void
-list_append(struct list_head *head, void *new)
-{
-	ASSERT_WRITE_LOCK(head);
-	list_add((new), (head)->prev);
-}
-
-/* Prepend. */
-static inline void
-list_prepend(struct list_head *head, void *new)
-{
-	ASSERT_WRITE_LOCK(head);
-	list_add(new, head);
-}
-
-/* Insert according to ordering function; insert before first true. */
-#define LIST_INSERT(head, new, cmpfn)				\
-do {								\
-	struct list_head *__i;					\
-	ASSERT_WRITE_LOCK(head);				\
-	list_for_each(__i, (head))				\
-		if ((new), (typeof (new))__i)			\
-			break;					\
-	list_add((struct list_head *)(new), __i->prev);		\
-} while(0)
-
-/* If the field after the list_head is a nul-terminated string, you
-   can use these functions. */
-static inline int __list_cmp_name(const void *i, const char *name)
-{
-	return strcmp(name, i+sizeof(struct list_head)) == 0;
-}
-
-/* Returns false if same name already in list, otherwise does insert. */
-static inline int
-list_named_insert(struct list_head *head, void *new)
-{
-	if (LIST_FIND(head, __list_cmp_name, void *,
-		      new + sizeof(struct list_head)))
-		return 0;
-	list_prepend(head, new);
-	return 1;
-}
-
-/* Find this named element in the list. */
-#define list_named_find(head, name)			\
-LIST_FIND(head, __list_cmp_name, void *, name)
-
-#endif /*_LISTHELP_H*/
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 52a7b9e..d97e268 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -73,6 +73,7 @@
 };
 
 #ifdef CONFIG_NETFILTER
+extern int ip6_route_me_harder(struct sk_buff *skb);
 extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 				    unsigned int dataoff, u_int8_t protocol);
 
diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
index 913ddbf..9dd978d 100644
--- a/include/linux/netfilter_ipv6/Kbuild
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -1,6 +1,21 @@
-header-y += ip6t_HL.h ip6t_LOG.h ip6t_MARK.h ip6t_REJECT.h ip6t_ah.h	\
-	ip6t_esp.h ip6t_frag.h ip6t_hl.h ip6t_ipv6header.h		\
-	ip6t_length.h ip6t_limit.h ip6t_mac.h ip6t_mark.h		\
-	ip6t_multiport.h ip6t_opts.h ip6t_owner.h ip6t_policy.h		\
-	ip6t_physdev.h ip6t_rt.h
-unifdef-y := ip6_tables.h
+header-y += ip6t_HL.h
+header-y += ip6t_LOG.h
+header-y += ip6t_MARK.h
+header-y += ip6t_REJECT.h
+header-y += ip6t_ah.h
+header-y += ip6t_esp.h
+header-y += ip6t_frag.h
+header-y += ip6t_hl.h
+header-y += ip6t_ipv6header.h
+header-y += ip6t_length.h
+header-y += ip6t_limit.h
+header-y += ip6t_mac.h
+header-y += ip6t_mark.h
+header-y += ip6t_multiport.h
+header-y += ip6t_opts.h
+header-y += ip6t_owner.h
+header-y += ip6t_policy.h
+header-y += ip6t_physdev.h
+header-y += ip6t_rt.h
+
+unifdef-y += ip6_tables.h
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index d0d5d1e..d7a8e9c 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -300,8 +300,7 @@
 				  unsigned int hook,
 				  const struct net_device *in,
 				  const struct net_device *out,
-				  struct ip6t_table *table,
-				  void *userdata);
+				  struct ip6t_table *table);
 
 /* Check for an extension */
 extern int ip6t_ext_hdr(u8 nexthdr);
diff --git a/include/linux/netfilter_logging.h b/include/linux/netfilter_logging.h
deleted file mode 100644
index 562bb6a..0000000
--- a/include/linux/netfilter_logging.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Internal logging interface, which relies on the real 
-   LOG target modules */
-#ifndef __LINUX_NETFILTER_LOGGING_H
-#define __LINUX_NETFILTER_LOGGING_H
-
-#ifdef __KERNEL__
-#include <asm/atomic.h>
-
-struct nf_logging_t {
-	void (*nf_log_packet)(struct sk_buff **pskb,
-			      unsigned int hooknum,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      const char *prefix);
-	void (*nf_log)(char *pfh, size_t len,
-		       const char *prefix);
-};
-
-extern void nf_log_register(int pf, const struct nf_logging_t *logging);
-extern void nf_log_unregister(int pf, const struct nf_logging_t *logging);
-
-extern void nf_log_packet(int pf,
-			  struct sk_buff **pskb,
-			  unsigned int hooknum,
-			  const struct net_device *in,
-			  const struct net_device *out,
-			  const char *fmt, ...);
-extern void nf_log(int pf,
-		   char *pfh, size_t len,
-		   const char *fmt, ...);
-#endif /*__KERNEL__*/
-
-#endif /*__LINUX_NETFILTER_LOGGING_H*/
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 855b446..6641162 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -21,6 +21,8 @@
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
 #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
 #define NETLINK_GENERIC		16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
 
 #define MAX_LINKS 32		
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6c2066c..3b5b041 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
 
@@ -69,6 +70,8 @@
  * NFSv3/v4 Access mode cache entry
  */
 struct nfs_access_entry {
+	struct rb_node		rb_node;
+	struct list_head	lru;
 	unsigned long		jiffies;
 	struct rpc_cred *	cred;
 	int			mask;
@@ -145,7 +148,9 @@
 	 */
 	atomic_t		data_updates;
 
-	struct nfs_access_entry	cache_access;
+	struct rb_root		access_cache;
+	struct list_head	access_cache_entry_lru;
+	struct list_head	access_cache_inode_lru;
 #ifdef CONFIG_NFS_V3_ACL
 	struct posix_acl	*acl_access;
 	struct posix_acl	*acl_default;
@@ -199,6 +204,7 @@
 #define NFS_INO_REVALIDATING	(0)		/* revalidating attrs */
 #define NFS_INO_ADVISE_RDPLUS	(1)		/* advise readdirplus */
 #define NFS_INO_STALE		(2)		/* possible stale inode */
+#define NFS_INO_ACL_LRU_SET	(3)		/* Inode is on the LRU list */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
@@ -209,8 +215,7 @@
 #define NFS_FH(inode)			(&NFS_I(inode)->fh)
 #define NFS_SERVER(inode)		(NFS_SB(inode->i_sb))
 #define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
-#define NFS_PROTO(inode)		(NFS_SERVER(inode)->rpc_ops)
-#define NFS_ADDR(inode)			(RPC_PEERADDR(NFS_CLIENT(inode)))
+#define NFS_PROTO(inode)		(NFS_SERVER(inode)->nfs_client->rpc_ops)
 #define NFS_COOKIEVERF(inode)		(NFS_I(inode)->cookieverf)
 #define NFS_READTIME(inode)		(NFS_I(inode)->read_cache_jiffies)
 #define NFS_CHANGE_ATTR(inode)		(NFS_I(inode)->change_attr)
@@ -297,6 +302,7 @@
 extern int nfs_permission(struct inode *, int, struct nameidata *);
 extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_zap_cache(struct inode *inode);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
@@ -579,6 +585,7 @@
 #define NFSDBG_FILE		0x0040
 #define NFSDBG_ROOT		0x0080
 #define NFSDBG_CALLBACK		0x0100
+#define NFSDBG_CLIENT		0x0200
 #define NFSDBG_ALL		0xFFFF
 
 #ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6b4a13c..7ccfc7e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -7,13 +7,79 @@
 struct nfs_iostats;
 
 /*
+ * The nfs_client identifies our client state to the server.
+ */
+struct nfs_client {
+	atomic_t		cl_count;
+	int			cl_cons_state;	/* current construction state (-ve: init error) */
+#define NFS_CS_READY		0		/* ready to be used */
+#define NFS_CS_INITING		1		/* busy initialising */
+	int			cl_nfsversion;	/* NFS protocol version */
+	unsigned long		cl_res_state;	/* NFS resources state */
+#define NFS_CS_RPCIOD		0		/* - rpciod started */
+#define NFS_CS_CALLBACK		1		/* - callback started */
+#define NFS_CS_IDMAP		2		/* - idmap started */
+#define NFS_CS_RENEWD		3		/* - renewd started */
+	struct sockaddr_in	cl_addr;	/* server identifier */
+	char *			cl_hostname;	/* hostname of server */
+	struct list_head	cl_share_link;	/* link in global client list */
+	struct list_head	cl_superblocks;	/* List of nfs_server structs */
+
+	struct rpc_clnt *	cl_rpcclient;
+	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
+	unsigned long		retrans_timeo;	/* retransmit timeout */
+	unsigned int		retrans_count;	/* number of retransmit tries */
+
+#ifdef CONFIG_NFS_V4
+	u64			cl_clientid;	/* constant */
+	nfs4_verifier		cl_confirm;
+	unsigned long		cl_state;
+
+	u32			cl_lockowner_id;
+
+	/*
+	 * The following rwsem ensures exclusive access to the server
+	 * while we recover the state following a lease expiration.
+	 */
+	struct rw_semaphore	cl_sem;
+
+	struct list_head	cl_delegations;
+	struct list_head	cl_state_owners;
+	struct list_head	cl_unused;
+	int			cl_nunused;
+	spinlock_t		cl_lock;
+
+	unsigned long		cl_lease_time;
+	unsigned long		cl_last_renewal;
+	struct work_struct	cl_renewd;
+
+	struct rpc_wait_queue	cl_rpcwaitq;
+
+	/* used for the setclientid verifier */
+	struct timespec		cl_boot_time;
+
+	/* idmapper */
+	struct idmap *		cl_idmap;
+
+	/* Our own IP address, as a null-terminated string.
+	 * This is used to generate the clientid, and the callback address.
+	 */
+	char			cl_ipaddr[16];
+	unsigned char		cl_id_uniquifier;
+#endif
+};
+
+/*
  * NFS client parameters stored in the superblock.
  */
 struct nfs_server {
+	struct nfs_client *	nfs_client;	/* shared client and NFS4 state */
+	struct list_head	client_link;	/* List of other nfs_server structs
+						 * that share the same client
+						 */
+	struct list_head	master_link;	/* link in master servers list */
 	struct rpc_clnt *	client;		/* RPC client handle */
-	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
-	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
@@ -29,24 +95,14 @@
 	unsigned int		acregmax;
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
-	unsigned long		retrans_timeo;	/* retransmit timeout */
-	unsigned int		retrans_count;	/* number of retransmit tries */
 	unsigned int		namelen;
-	char *			hostname;	/* remote hostname */
-	struct nfs_fh		fh;
-	struct sockaddr_in	addr;
+
 	struct nfs_fsid		fsid;
+	__u64			maxfilesize;	/* maximum file size */
 	unsigned long		mount_time;	/* when this fs was mounted */
+	dev_t			s_dev;		/* superblock dev numbers */
+
 #ifdef CONFIG_NFS_V4
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			ip_addr[16];
-	char *			mnt_path;
-	struct nfs4_client *	nfs4_state;	/* all NFSv4 state starts here */
-	struct list_head	nfs4_siblings;	/* List of other nfs_server structs
-						 * that share the same clientid
-						 */
 	u32			attr_bitmask[2];/* V4 bitmask representing the set
 						   of attributes supported on this
 						   filesystem */
@@ -54,6 +110,7 @@
 						   that are supported on this
 						   filesystem */
 #endif
+	void (*destroy)(struct nfs_server *);
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index 102e560..15a9f3b 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -62,15 +62,15 @@
 #ifdef __KERNEL__
 
 /* Forward declaration to make this header independent of others */
-struct nfs4_client;
+struct nfs_client;
 
-void nfs_idmap_new(struct nfs4_client *);
-void nfs_idmap_delete(struct nfs4_client *);
+int nfs_idmap_new(struct nfs_client *);
+void nfs_idmap_delete(struct nfs_client *);
 
-int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *);
-int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *);
-int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *);
-int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *);
+int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
+int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *);
+int nfs_map_uid_to_name(struct nfs_client *, __u32, char *);
+int nfs_map_gid_to_group(struct nfs_client *, __u32, char *);
 
 extern unsigned int nfs_idmap_cache_timeout;
 #endif /* __KERNEL__ */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 41e5a19..dc5397d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_NFS_XDR_H
 #define _LINUX_NFS_XDR_H
 
-#include <linux/sunrpc/xprt.h>
 #include <linux/nfsacl.h>
 
 /*
@@ -359,8 +358,8 @@
 	struct nfs_fh *		fromfh;
 	const char *		fromname;
 	unsigned int		fromlen;
-	const char *		topath;
-	unsigned int		tolen;
+	struct page **		pages;
+	unsigned int		pathlen;
 	struct iattr *		sattr;
 };
 
@@ -435,8 +434,8 @@
 	struct nfs_fh *		fromfh;
 	const char *		fromname;
 	unsigned int		fromlen;
-	const char *		topath;
-	unsigned int		tolen;
+	struct page **		pages;
+	unsigned int		pathlen;
 	struct iattr *		sattr;
 };
 
@@ -534,7 +533,10 @@
 struct nfs4_create_arg {
 	u32				ftype;
 	union {
-		struct qstr *		symlink;    /* NF4LNK */
+		struct {
+			struct page **	pages;
+			unsigned int	len;
+		} symlink;   /* NF4LNK */
 		struct {
 			u32		specdata1;
 			u32		specdata2;
@@ -770,6 +772,9 @@
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
+	int	(*lookupfh)(struct nfs_server *, struct nfs_fh *,
+			    struct qstr *, struct nfs_fh *,
+			    struct nfs_fattr *);
 	int	(*getattr) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fattr *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
@@ -791,9 +796,8 @@
 	int	(*rename)  (struct inode *, struct qstr *,
 			    struct inode *, struct qstr *);
 	int	(*link)    (struct inode *, struct inode *, struct qstr *);
-	int	(*symlink) (struct inode *, struct qstr *, struct qstr *,
-			    struct iattr *, struct nfs_fh *,
-			    struct nfs_fattr *);
+	int	(*symlink) (struct inode *, struct dentry *, struct page *,
+			    unsigned int, struct iattr *);
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
@@ -806,6 +810,7 @@
 			    struct nfs_fsinfo *);
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
+	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
@@ -829,9 +834,9 @@
 /*
  * Function vectors etc. for the NFS client
  */
-extern struct nfs_rpc_ops	nfs_v2_clientops;
-extern struct nfs_rpc_ops	nfs_v3_clientops;
-extern struct nfs_rpc_ops	nfs_v4_clientops;
+extern const struct nfs_rpc_ops	nfs_v2_clientops;
+extern const struct nfs_rpc_ops	nfs_v3_clientops;
+extern const struct nfs_rpc_ops	nfs_v4_clientops;
 extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
 extern struct rpc_version	nfs_version4;
diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild
index c8c5456..d9c5455 100644
--- a/include/linux/nfsd/Kbuild
+++ b/include/linux/nfsd/Kbuild
@@ -1,2 +1,7 @@
-unifdef-y := const.h export.h stats.h syscall.h nfsfh.h debug.h auth.h
-
+unifdef-y += const.h
+unifdef-y += export.h
+unifdef-y += stats.h
+unifdef-y += syscall.h
+unifdef-y += nfsfh.h
+unifdef-y += debug.h
+unifdef-y += auth.h
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7a24915..6a1e098 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2010,6 +2010,23 @@
 #define PCI_DEVICE_ID_ALTIMA_AC9100	0x03ea
 #define PCI_DEVICE_ID_ALTIMA_AC1003	0x03eb
 
+#define PCI_VENDOR_ID_ARECA		0x17d3
+#define PCI_DEVICE_ID_ARECA_1110	0x1110
+#define PCI_DEVICE_ID_ARECA_1120	0x1120
+#define PCI_DEVICE_ID_ARECA_1130	0x1130
+#define PCI_DEVICE_ID_ARECA_1160	0x1160
+#define PCI_DEVICE_ID_ARECA_1170	0x1170
+#define PCI_DEVICE_ID_ARECA_1210	0x1210
+#define PCI_DEVICE_ID_ARECA_1220	0x1220
+#define PCI_DEVICE_ID_ARECA_1230	0x1230
+#define PCI_DEVICE_ID_ARECA_1260	0x1260
+#define PCI_DEVICE_ID_ARECA_1270	0x1270
+#define PCI_DEVICE_ID_ARECA_1280	0x1280
+#define PCI_DEVICE_ID_ARECA_1380	0x1380
+#define PCI_DEVICE_ID_ARECA_1381	0x1381
+#define PCI_DEVICE_ID_ARECA_1680	0x1680
+#define PCI_DEVICE_ID_ARECA_1681	0x1681
+
 #define PCI_VENDOR_ID_S2IO		0x17d5
 #define	PCI_DEVICE_ID_S2IO_WIN		0x5731
 #define	PCI_DEVICE_ID_S2IO_UNI		0x5831
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index bd2c5a2..c3f01b3 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -305,6 +305,7 @@
 	TCA_FW_POLICE,
 	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
 	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+	TCA_FW_MASK,
 	__TCA_FW_MAX
 };
 
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
index 73fa27a..2415a64 100644
--- a/include/linux/raid/Kbuild
+++ b/include/linux/raid/Kbuild
@@ -1 +1,2 @@
-header-y += md_p.h md_u.h
+header-y += md_p.h
+header-y += md_u.h
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index facd9ee..9c92dc8 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -2,6 +2,7 @@
 #define __LINUX_RTNETLINK_H
 
 #include <linux/netlink.h>
+#include <linux/if.h>
 
 /****
  *		Routing/neighbour discovery messages.
@@ -238,10 +239,8 @@
 	RT_TABLE_DEFAULT=253,
 	RT_TABLE_MAIN=254,
 	RT_TABLE_LOCAL=255,
-	__RT_TABLE_MAX
+	RT_TABLE_MAX=0xFFFFFFFF
 };
-#define RT_TABLE_MAX (__RT_TABLE_MAX - 1)
-
 
 
 /* Routing message attributes */
@@ -263,6 +262,7 @@
 	RTA_CACHEINFO,
 	RTA_SESSION,
 	RTA_MP_ALGO,
+	RTA_TABLE,
 	__RTA_MAX
 };
 
@@ -383,226 +383,6 @@
 	} u;
 };
 
-
-/*********************************************************
- *		Interface address.
- ****/
-
-struct ifaddrmsg
-{
-	unsigned char	ifa_family;
-	unsigned char	ifa_prefixlen;	/* The prefix length		*/
-	unsigned char	ifa_flags;	/* Flags			*/
-	unsigned char	ifa_scope;	/* See above			*/
-	int		ifa_index;	/* Link index			*/
-};
-
-enum
-{
-	IFA_UNSPEC,
-	IFA_ADDRESS,
-	IFA_LOCAL,
-	IFA_LABEL,
-	IFA_BROADCAST,
-	IFA_ANYCAST,
-	IFA_CACHEINFO,
-	IFA_MULTICAST,
-	__IFA_MAX
-};
-
-#define IFA_MAX (__IFA_MAX - 1)
-
-/* ifa_flags */
-
-#define IFA_F_SECONDARY		0x01
-#define IFA_F_TEMPORARY		IFA_F_SECONDARY
-
-#define IFA_F_DEPRECATED	0x20
-#define IFA_F_TENTATIVE		0x40
-#define IFA_F_PERMANENT		0x80
-
-struct ifa_cacheinfo
-{
-	__u32	ifa_prefered;
-	__u32	ifa_valid;
-	__u32	cstamp; /* created timestamp, hundredths of seconds */
-	__u32	tstamp; /* updated timestamp, hundredths of seconds */
-};
-
-
-#define IFA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
-#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
-
-/*
-   Important comment:
-   IFA_ADDRESS is prefix address, rather than local interface address.
-   It makes no difference for normally configured broadcast interfaces,
-   but for point-to-point IFA_ADDRESS is DESTINATION address,
-   local address is supplied in IFA_LOCAL attribute.
- */
-
-/**************************************************************
- *		Neighbour discovery.
- ****/
-
-struct ndmsg
-{
-	unsigned char	ndm_family;
-	unsigned char	ndm_pad1;
-	unsigned short	ndm_pad2;
-	int		ndm_ifindex;	/* Link index			*/
-	__u16		ndm_state;
-	__u8		ndm_flags;
-	__u8		ndm_type;
-};
-
-enum
-{
-	NDA_UNSPEC,
-	NDA_DST,
-	NDA_LLADDR,
-	NDA_CACHEINFO,
-	NDA_PROBES,
-	__NDA_MAX
-};
-
-#define NDA_MAX (__NDA_MAX - 1)
-
-#define NDA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
-#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg))
-
-/*
- *	Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_ROUTER	0x80
-
-/*
- *	Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE	0x01
-#define NUD_REACHABLE	0x02
-#define NUD_STALE	0x04
-#define NUD_DELAY	0x08
-#define NUD_PROBE	0x10
-#define NUD_FAILED	0x20
-
-/* Dummy states */
-#define NUD_NOARP	0x40
-#define NUD_PERMANENT	0x80
-#define NUD_NONE	0x00
-
-
-struct nda_cacheinfo
-{
-	__u32		ndm_confirmed;
-	__u32		ndm_used;
-	__u32		ndm_updated;
-	__u32		ndm_refcnt;
-};
-
-
-/*****************************************************************
- *		Neighbour tables specific messages.
- *
- * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
- * NLM_F_DUMP flag set. Every neighbour table configuration is
- * spread over multiple messages to avoid running into message
- * size limits on systems with many interfaces. The first message
- * in the sequence transports all not device specific data such as
- * statistics, configuration, and the default parameter set.
- * This message is followed by 0..n messages carrying device
- * specific parameter sets.
- * Although the ordering should be sufficient, NDTA_NAME can be
- * used to identify sequences. The initial message can be identified
- * by checking for NDTA_CONFIG. The device specific messages do
- * not contain this TLV but have NDTPA_IFINDEX set to the
- * corresponding interface index.
- *
- * To change neighbour table attributes, send RTM_SETNEIGHTBL
- * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
- * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
- * otherwise. Device specific parameter sets can be changed by
- * setting NDTPA_IFINDEX to the interface index of the corresponding
- * device.
- ****/
-
-struct ndt_stats
-{
-	__u64		ndts_allocs;
-	__u64		ndts_destroys;
-	__u64		ndts_hash_grows;
-	__u64		ndts_res_failed;
-	__u64		ndts_lookups;
-	__u64		ndts_hits;
-	__u64		ndts_rcv_probes_mcast;
-	__u64		ndts_rcv_probes_ucast;
-	__u64		ndts_periodic_gc_runs;
-	__u64		ndts_forced_gc_runs;
-};
-
-enum {
-	NDTPA_UNSPEC,
-	NDTPA_IFINDEX,			/* u32, unchangeable */
-	NDTPA_REFCNT,			/* u32, read-only */
-	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
-	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
-	NDTPA_RETRANS_TIME,		/* u64, msecs */
-	NDTPA_GC_STALETIME,		/* u64, msecs */
-	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
-	NDTPA_QUEUE_LEN,		/* u32 */
-	NDTPA_APP_PROBES,		/* u32 */
-	NDTPA_UCAST_PROBES,		/* u32 */
-	NDTPA_MCAST_PROBES,		/* u32 */
-	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
-	NDTPA_PROXY_DELAY,		/* u64, msecs */
-	NDTPA_PROXY_QLEN,		/* u32 */
-	NDTPA_LOCKTIME,			/* u64, msecs */
-	__NDTPA_MAX
-};
-#define NDTPA_MAX (__NDTPA_MAX - 1)
-
-struct ndtmsg
-{
-	__u8		ndtm_family;
-	__u8		ndtm_pad1;
-	__u16		ndtm_pad2;
-};
-
-struct ndt_config
-{
-	__u16		ndtc_key_len;
-	__u16		ndtc_entry_size;
-	__u32		ndtc_entries;
-	__u32		ndtc_last_flush;	/* delta to now in msecs */
-	__u32		ndtc_last_rand;		/* delta to now in msecs */
-	__u32		ndtc_hash_rnd;
-	__u32		ndtc_hash_mask;
-	__u32		ndtc_hash_chain_gc;
-	__u32		ndtc_proxy_qlen;
-};
-
-enum {
-	NDTA_UNSPEC,
-	NDTA_NAME,			/* char *, unchangeable */
-	NDTA_THRESH1,			/* u32 */
-	NDTA_THRESH2,			/* u32 */
-	NDTA_THRESH3,			/* u32 */
-	NDTA_CONFIG,			/* struct ndt_config, read-only */
-	NDTA_PARMS,			/* nested TLV NDTPA_* */
-	NDTA_STATS,			/* struct ndt_stats, read-only */
-	NDTA_GC_INTERVAL,		/* u64, msecs */
-	__NDTA_MAX
-};
-#define NDTA_MAX (__NDTA_MAX - 1)
-
-#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
-		     NLMSG_ALIGN(sizeof(struct ndtmsg))))
-#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
-
-
 /****
  *		General form of address family dependent message.
  ****/
@@ -663,138 +443,6 @@
 	__u32	valid_time;
 };
 
-/* The struct should be in sync with struct net_device_stats */
-struct rtnl_link_stats
-{
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
-	__u32	collisions;
-
-	/* detailed rx_errors: */
-	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
-
-	/* detailed tx_errors */
-	__u32	tx_aborted_errors;
-	__u32	tx_carrier_errors;
-	__u32	tx_fifo_errors;
-	__u32	tx_heartbeat_errors;
-	__u32	tx_window_errors;
-	
-	/* for cslip etc */
-	__u32	rx_compressed;
-	__u32	tx_compressed;
-};
-
-/* The struct should be in sync with struct ifmap */
-struct rtnl_link_ifmap
-{
-	__u64	mem_start;
-	__u64	mem_end;
-	__u64	base_addr;
-	__u16	irq;
-	__u8	dma;
-	__u8	port;
-};
-
-enum
-{
-	IFLA_UNSPEC,
-	IFLA_ADDRESS,
-	IFLA_BROADCAST,
-	IFLA_IFNAME,
-	IFLA_MTU,
-	IFLA_LINK,
-	IFLA_QDISC,
-	IFLA_STATS,
-	IFLA_COST,
-#define IFLA_COST IFLA_COST
-	IFLA_PRIORITY,
-#define IFLA_PRIORITY IFLA_PRIORITY
-	IFLA_MASTER,
-#define IFLA_MASTER IFLA_MASTER
-	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
-#define IFLA_WIRELESS IFLA_WIRELESS
-	IFLA_PROTINFO,		/* Protocol specific information for a link */
-#define IFLA_PROTINFO IFLA_PROTINFO
-	IFLA_TXQLEN,
-#define IFLA_TXQLEN IFLA_TXQLEN
-	IFLA_MAP,
-#define IFLA_MAP IFLA_MAP
-	IFLA_WEIGHT,
-#define IFLA_WEIGHT IFLA_WEIGHT
-	IFLA_OPERSTATE,
-	IFLA_LINKMODE,
-	__IFLA_MAX
-};
-
-
-#define IFLA_MAX (__IFLA_MAX - 1)
-
-#define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
-#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
-
-/* ifi_flags.
-
-   IFF_* flags.
-
-   The only change is:
-   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
-   more not changeable by user. They describe link media
-   characteristics and set by device driver.
-
-   Comments:
-   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
-   - If neither of these three flags are set;
-     the interface is NBMA.
-
-   - IFF_MULTICAST does not mean anything special:
-   multicasts can be used on all not-NBMA links.
-   IFF_MULTICAST means that this media uses special encapsulation
-   for multicast frames. Apparently, all IFF_POINTOPOINT and
-   IFF_BROADCAST devices are able to use multicasts too.
- */
-
-/* IFLA_LINK.
-   For usual devices it is equal ifi_index.
-   If it is a "virtual interface" (f.e. tunnel), ifi_link
-   can point to real physical interface (f.e. for bandwidth calculations),
-   or maybe 0, what means, that real media is unknown (usual
-   for IPIP tunnels, when route to endpoint is allowed to change)
- */
-
-/* Subtype attributes for IFLA_PROTINFO */
-enum
-{
-	IFLA_INET6_UNSPEC,
-	IFLA_INET6_FLAGS,	/* link flags			*/
-	IFLA_INET6_CONF,	/* sysctl parameters		*/
-	IFLA_INET6_STATS,	/* statistics			*/
-	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
-	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
-	__IFLA_INET6_MAX
-};
-
-#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
-
-struct ifla_cacheinfo
-{
-	__u32	max_reasm_len;
-	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
-	__u32	reachable_time;
-	__u32	retrans_time;
-};
 
 /*****************************************************************
  *		Traffic control messages.
@@ -885,10 +533,13 @@
 	RTNLGRP_NOP2,
 	RTNLGRP_DECnet_ROUTE,
 #define RTNLGRP_DECnet_ROUTE	RTNLGRP_DECnet_ROUTE
-	RTNLGRP_NOP3,
+	RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE	RTNLGRP_DECnet_RULE
 	RTNLGRP_NOP4,
 	RTNLGRP_IPV6_PREFIX,
 #define RTNLGRP_IPV6_PREFIX	RTNLGRP_IPV6_PREFIX
+	RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE	RTNLGRP_IPV6_RULE
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
@@ -923,8 +574,6 @@
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
 
-extern struct sock *rtnl;
-
 struct rtnetlink_link
 {
 	int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
@@ -933,6 +582,10 @@
 
 extern struct rtnetlink_link * rtnetlink_links[NPROTO];
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
+extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		       struct nlmsghdr *nlh, gfp_t flags);
+extern void rtnl_set_sk_err(u32 group, int error);
 extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
 
 extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
@@ -1065,6 +718,13 @@
 	} \
 } while(0)
 
+static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
+{
+	return RTA_GET_U32(rta[RTA_TABLE-1]);
+rtattr_failure:
+	return table;
+}
+
 #endif /* __KERNEL__ */
 
 
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 66ff545..4efbd9c 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
-static inline void sg_set_buf(struct scatterlist *sg, void *buf,
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 			      unsigned int buflen)
 {
 	sg->page = virt_to_page(buf);
@@ -13,7 +13,7 @@
 	sg->length = buflen;
 }
 
-static inline void sg_init_one(struct scatterlist *sg, void *buf,
+static inline void sg_init_one(struct scatterlist *sg, const void *buf,
 			       unsigned int buflen)
 {
 	memset(sg, 0, sizeof(*sg));
diff --git a/include/linux/security.h b/include/linux/security.h
index 6bc2aad..9f56fb8 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -31,6 +31,8 @@
 #include <linux/msg.h>
 #include <linux/sched.h>
 #include <linux/key.h>
+#include <linux/xfrm.h>
+#include <net/flow.h>
 
 struct ctl_table;
 
@@ -88,6 +90,7 @@
 struct nfsctl_arg;
 struct sched_param;
 struct swap_info_struct;
+struct request_sock;
 
 /* bprm_apply_creds unsafe reasons */
 #define LSM_UNSAFE_SHARE	1
@@ -812,9 +815,19 @@
  *      which is used to copy security attributes between local stream sockets.
  * @sk_free_security:
  *	Deallocate security structure.
- * @sk_getsid:
- *	Retrieve the LSM-specific sid for the sock to enable caching of network
+ * @sk_clone_security:
+ *	Clone/copy security structure.
+ * @sk_getsecid:
+ *	Retrieve the LSM-specific secid for the sock to enable caching of network
  *	authorizations.
+ * @sock_graft:
+ *	Sets the socket's isec sid to the sock's sid.
+ * @inet_conn_request:
+ *	Sets the openreq's sid to socket's sid with MLS portion taken from peer sid.
+ * @inet_csk_clone:
+ *	Sets the new child socket's sid to the openreq sid.
+ * @req_classify_flow:
+ *	Sets the flow's sid to the openreq sid.
  *
  * Security hooks for XFRM operations.
  *
@@ -823,9 +836,10 @@
  *	used by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level policy update program (e.g., setkey).
- *	Allocate a security structure to the xp->security field.
- *	The security field is initialized to NULL when the xfrm_policy is
- *	allocated.
+ *	@sk refers to the sock from which to derive the security context.
+ *	Allocate a security structure to the xp->security field; the security
+ *	field is initialized to NULL when the xfrm_policy is allocated. Only
+ *	one of sec_ctx or sock can be specified.
  *	Return 0 if operation was successful (memory to allocate, legal context)
  * @xfrm_policy_clone_security:
  *	@old contains an existing xfrm_policy in the SPD.
@@ -844,9 +858,14 @@
  *	Database by the XFRM system.
  *	@sec_ctx contains the security context information being provided by
  *	the user-level SA generation program (e.g., setkey or racoon).
- *	Allocate a security structure to the x->security field.  The
- *	security field is initialized to NULL when the xfrm_state is
- *	allocated.
+ *	@polsec contains the security context information associated with a xfrm
+ *	policy rule from which to take the base context. polsec must be NULL
+ *	when sec_ctx is specified.
+ *	@secid contains the secid from which to take the mls portion of the context.
+ *	Allocate a security structure to the x->security field; the security
+ *	field is initialized to NULL when the xfrm_state is allocated. Set the
+ *	context to correspond to either sec_ctx or polsec, with the mls portion
+ *	taken from secid in the latter case.
  *	Return 0 if operation was successful (memory to allocate, legal context).
  * @xfrm_state_free_security:
  *	@x contains the xfrm_state.
@@ -857,13 +876,27 @@
  * @xfrm_policy_lookup:
  *	@xp contains the xfrm_policy for which the access control is being
  *	checked.
- *	@sk_sid contains the sock security label that is used to authorize
+ *	@fl_secid contains the flow security label that is used to authorize
  *	access to the policy xp.
  *	@dir contains the direction of the flow (input or output).
- *	Check permission when a sock selects a xfrm_policy for processing
+ *	Check permission when a flow selects a xfrm_policy for processing
  *	XFRMs on a packet.  The hook is called when selecting either a
  *	per-socket policy or a generic xfrm policy.
  *	Return 0 if permission is granted.
+ * @xfrm_state_pol_flow_match:
+ *	@x contains the state to match.
+ *	@xp contains the policy to check for a match.
+ *	@fl contains the flow to check for a match.
+ *	Return 1 if there is a match.
+ * @xfrm_flow_state_match:
+ *	@fl contains the flow key to match.
+ *	@xfrm points to the xfrm_state to match.
+ *	Return 1 if there is a match.
+ * @xfrm_decode_session:
+ *	@skb points to skb to decode.
+ *	@secid points to the flow key secid to set.
+ *	@ckall says if all xfrms used should be checked for same secid.
+ *	Return 0 if ckall is zero or all xfrms used have the same secid.
  *
  * Security hooks affecting all Key Management operations
  *
@@ -1308,8 +1341,8 @@
 	int (*unix_may_send) (struct socket * sock, struct socket * other);
 
 	int (*socket_create) (int family, int type, int protocol, int kern);
-	void (*socket_post_create) (struct socket * sock, int family,
-				    int type, int protocol, int kern);
+	int (*socket_post_create) (struct socket * sock, int family,
+				   int type, int protocol, int kern);
 	int (*socket_bind) (struct socket * sock,
 			    struct sockaddr * address, int addrlen);
 	int (*socket_connect) (struct socket * sock,
@@ -1332,18 +1365,31 @@
 	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
-	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
+	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
+	void (*sk_getsecid) (struct sock *sk, u32 *secid);
+	void (*sock_graft)(struct sock* sk, struct socket *parent);
+	int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb,
+					struct request_sock *req);
+	void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req);
+	void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp,
+			struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk);
 	int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new);
 	void (*xfrm_policy_free_security) (struct xfrm_policy *xp);
 	int (*xfrm_policy_delete_security) (struct xfrm_policy *xp);
-	int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_state_alloc_security) (struct xfrm_state *x,
+		struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec,
+		u32 secid);
 	void (*xfrm_state_free_security) (struct xfrm_state *x);
 	int (*xfrm_state_delete_security) (struct xfrm_state *x);
-	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
+	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl);
+	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm);
+	int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 	/* key management security hooks */
@@ -2778,13 +2824,13 @@
 	return security_ops->socket_create(family, type, protocol, kern);
 }
 
-static inline void security_socket_post_create(struct socket * sock, 
-					       int family,
-					       int type, 
-					       int protocol, int kern)
+static inline int security_socket_post_create(struct socket * sock,
+					      int family,
+					      int type,
+					      int protocol, int kern)
 {
-	security_ops->socket_post_create(sock, family, type,
-					 protocol, kern);
+	return security_ops->socket_post_create(sock, family, type,
+						protocol, kern);
 }
 
 static inline int security_socket_bind(struct socket * sock, 
@@ -2885,9 +2931,36 @@
 	return security_ops->sk_free_security(sk);
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
 {
-	return security_ops->sk_getsid(sk, fl, dir);
+	return security_ops->sk_clone_security(sk, newsk);
+}
+
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
+{
+	security_ops->sk_getsecid(sk, &fl->secid);
+}
+
+static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+	security_ops->req_classify_flow(req, fl);
+}
+
+static inline void security_sock_graft(struct sock* sk, struct socket *parent)
+{
+	security_ops->sock_graft(sk, parent);
+}
+
+static inline int security_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
+{
+	return security_ops->inet_conn_request(sk, skb, req);
+}
+
+static inline void security_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+	security_ops->inet_csk_clone(newsk, req);
 }
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket * sock,
@@ -2909,11 +2982,12 @@
 	return 0;
 }
 
-static inline void security_socket_post_create(struct socket * sock, 
-					       int family,
-					       int type, 
-					       int protocol, int kern)
+static inline int security_socket_post_create(struct socket * sock,
+					      int family,
+					      int type,
+					      int protocol, int kern)
 {
+	return 0;
 }
 
 static inline int security_socket_bind(struct socket * sock, 
@@ -3011,16 +3085,43 @@
 {
 }
 
-static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void security_sk_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+
+static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl)
+{
+}
+
+static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl)
+{
+}
+
+static inline void security_sock_graft(struct sock* sk, struct socket *parent)
+{
+}
+
+static inline int security_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
 {
 	return 0;
 }
+
+static inline void security_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
 {
-	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx);
+	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL);
+}
+
+static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
+{
+	return security_ops->xfrm_policy_alloc_security(xp, NULL, sk);
 }
 
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
@@ -3038,9 +3139,18 @@
 	return security_ops->xfrm_policy_delete_security(xp);
 }
 
-static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static inline int security_xfrm_state_alloc(struct xfrm_state *x,
+			struct xfrm_user_sec_ctx *sec_ctx)
 {
-	return security_ops->xfrm_state_alloc_security(x, sec_ctx);
+	return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0);
+}
+
+static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
+				struct xfrm_sec_ctx *polsec, u32 secid)
+{
+	if (!polsec)
+		return 0;
+	return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid);
 }
 
 static inline int security_xfrm_state_delete(struct xfrm_state *x)
@@ -3053,9 +3163,32 @@
 	security_ops->xfrm_state_free_security(x);
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
-	return security_ops->xfrm_policy_lookup(xp, sk_sid, dir);
+	return security_ops->xfrm_policy_lookup(xp, fl_secid, dir);
+}
+
+static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl)
+{
+	return security_ops->xfrm_state_pol_flow_match(x, xp, fl);
+}
+
+static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	return security_ops->xfrm_flow_state_match(fl, xfrm);
+}
+
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
+{
+	return security_ops->xfrm_decode_session(skb, secid, 1);
+}
+
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
+{
+	int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0);
+
+	BUG_ON(rc);
 }
 #else	/* CONFIG_SECURITY_NETWORK_XFRM */
 static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
@@ -3063,6 +3196,11 @@
 	return 0;
 }
 
+static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
+{
+	return 0;
+}
+
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
 {
 	return 0;
@@ -3077,7 +3215,14 @@
 	return 0;
 }
 
-static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static inline int security_xfrm_state_alloc(struct xfrm_state *x,
+					struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
+					struct xfrm_sec_ctx *polsec, u32 secid)
 {
 	return 0;
 }
@@ -3091,10 +3236,32 @@
 	return 0;
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
 	return 0;
 }
+
+static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl)
+{
+	return 1;
+}
+
+static inline int security_xfrm_flow_state_match(struct flowi *fl,
+                                struct xfrm_state *xfrm)
+{
+	return 1;
+}
+
+static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
+{
+	return 0;
+}
+
+static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl)
+{
+}
+
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
 #ifdef CONFIG_KEYS
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 755e9cd..85577a4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,8 +34,9 @@
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
 
 #define CHECKSUM_NONE 0
-#define CHECKSUM_HW 1
+#define CHECKSUM_PARTIAL 1
 #define CHECKSUM_UNNECESSARY 2
+#define CHECKSUM_COMPLETE 3
 
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
@@ -56,17 +57,17 @@
  *	      Apparently with secret goal to sell you new device, when you
  *	      will add new protocol to your host. F.e. IPv6. 8)
  *
- *	HW: the most generic way. Device supplied checksum of _all_
+ *	COMPLETE: the most generic way. Device supplied checksum of _all_
  *	    the packet as seen by netif_rx in skb->csum.
  *	    NOTE: Even if device supports only some protocols, but
- *	    is able to produce some skb->csum, it MUST use HW,
+ *	    is able to produce some skb->csum, it MUST use COMPLETE,
  *	    not UNNECESSARY.
  *
  * B. Checksumming on output.
  *
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
- *	HW: device is required to csum packet as seen by hard_start_xmit
+ *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
  *	from skb->h.raw to the end and to record the checksum
  *	at skb->h.raw+skb->csum.
  *
@@ -1261,14 +1262,14 @@
  *	@len: length of data pulled
  *
  *	After doing a pull on a received packet, you need to call this to
- *	update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE
- *	so that it can be recomputed from scratch.
+ *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
+ *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
 
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 }
 
@@ -1287,7 +1288,7 @@
 {
 	if (likely(len >= skb->len))
 		return 0;
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 	return __pskb_trim(skb, len);
 }
diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 4db25d5..854aa6b 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -155,42 +155,11 @@
 	UDP_MIB_NOPORTS,			/* NoPorts */
 	UDP_MIB_INERRORS,			/* InErrors */
 	UDP_MIB_OUTDATAGRAMS,			/* OutDatagrams */
+	UDP_MIB_RCVBUFERRORS,			/* RcvbufErrors */
+	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
 	__UDP_MIB_MAX
 };
 
-/* sctp mib definitions */
-/*
- * draft-ietf-sigtran-sctp-mib-07.txt
- */
-enum
-{
-	SCTP_MIB_NUM = 0,
-	SCTP_MIB_CURRESTAB,			/* CurrEstab */
-	SCTP_MIB_ACTIVEESTABS,			/* ActiveEstabs */
-	SCTP_MIB_PASSIVEESTABS,			/* PassiveEstabs */
-	SCTP_MIB_ABORTEDS,			/* Aborteds */
-	SCTP_MIB_SHUTDOWNS,			/* Shutdowns */
-	SCTP_MIB_OUTOFBLUES,			/* OutOfBlues */
-	SCTP_MIB_CHECKSUMERRORS,		/* ChecksumErrors */
-	SCTP_MIB_OUTCTRLCHUNKS,			/* OutCtrlChunks */
-	SCTP_MIB_OUTORDERCHUNKS,		/* OutOrderChunks */
-	SCTP_MIB_OUTUNORDERCHUNKS,		/* OutUnorderChunks */
-	SCTP_MIB_INCTRLCHUNKS,			/* InCtrlChunks */
-	SCTP_MIB_INORDERCHUNKS,			/* InOrderChunks */
-	SCTP_MIB_INUNORDERCHUNKS,		/* InUnorderChunks */
-	SCTP_MIB_FRAGUSRMSGS,			/* FragUsrMsgs */
-	SCTP_MIB_REASMUSRMSGS,			/* ReasmUsrMsgs */
-	SCTP_MIB_OUTSCTPPACKS,			/* OutSCTPPacks */
-	SCTP_MIB_INSCTPPACKS,			/* InSCTPPacks */
-	SCTP_MIB_RTOALGORITHM,			/* RtoAlgorithm */
-	SCTP_MIB_RTOMIN,			/* RtoMin */
-	SCTP_MIB_RTOMAX,			/* RtoMax */
-	SCTP_MIB_RTOINITIAL,			/* RtoInitial */
-	SCTP_MIB_VALCOOKIELIFE,			/* ValCookieLife */
-	SCTP_MIB_MAXINITRETR,			/* MaxInitRetr */
-	__SCTP_MIB_MAX
-};
-
 /* linux mib definitions */
 enum
 {
diff --git a/include/linux/sunrpc/Kbuild b/include/linux/sunrpc/Kbuild
index 0d1d768..fb438f1 100644
--- a/include/linux/sunrpc/Kbuild
+++ b/include/linux/sunrpc/Kbuild
@@ -1 +1 @@
-unifdef-y := debug.h
+unifdef-y += debug.h
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 8fe9f35..f6d1d64 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -18,18 +18,6 @@
 #include <linux/sunrpc/timer.h>
 #include <asm/signal.h>
 
-/*
- * This defines an RPC port mapping
- */
-struct rpc_portmap {
-	__u32			pm_prog;
-	__u32			pm_vers;
-	__u32			pm_prot;
-	__u16			pm_port;
-	unsigned char		pm_binding : 1;	/* doing a getport() */
-	struct rpc_wait_queue	pm_bindwait;	/* waiting on getport() */
-};
-
 struct rpc_inode;
 
 /*
@@ -40,7 +28,9 @@
 	atomic_t		cl_users;	/* number of references */
 	struct rpc_xprt *	cl_xprt;	/* transport */
 	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
-	u32			cl_maxproc;	/* max procedure number */
+	u32			cl_prog,	/* RPC program number */
+				cl_vers,	/* RPC version number */
+				cl_maxproc;	/* max procedure number */
 
 	char *			cl_server;	/* server machine name */
 	char *			cl_protname;	/* protocol name */
@@ -55,7 +45,6 @@
 				cl_dead     : 1;/* abandoned */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
-	struct rpc_portmap *	cl_pmap;	/* port mapping */
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
@@ -64,14 +53,8 @@
 	struct dentry *		cl_dentry;	/* inode */
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
-	struct rpc_portmap	cl_pmap_default;
 	char			cl_inline_name[32];
 };
-#define cl_timeout		cl_xprt->timeout
-#define cl_prog			cl_pmap->pm_prog
-#define cl_vers			cl_pmap->pm_vers
-#define cl_port			cl_pmap->pm_port
-#define cl_prot			cl_pmap->pm_prot
 
 /*
  * General RPC program info
@@ -106,24 +89,36 @@
 	char *			p_name;		/* name of procedure */
 };
 
-#define RPC_CONGESTED(clnt)	(RPCXPRT_CONGESTED((clnt)->cl_xprt))
-#define RPC_PEERADDR(clnt)	(&(clnt)->cl_xprt->addr)
-
 #ifdef __KERNEL__
 
-struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
-				struct rpc_program *info,
-				u32 version, rpc_authflavor_t authflavor);
-struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
-				struct rpc_program *info,
-				u32 version, rpc_authflavor_t authflavor);
+struct rpc_create_args {
+	int			protocol;
+	struct sockaddr		*address;
+	size_t			addrsize;
+	struct rpc_timeout	*timeout;
+	char			*servername;
+	struct rpc_program	*program;
+	u32			version;
+	rpc_authflavor_t	authflavor;
+	unsigned long		flags;
+};
+
+/* Values for "flags" field */
+#define RPC_CLNT_CREATE_HARDRTRY	(1UL << 0)
+#define RPC_CLNT_CREATE_INTR		(1UL << 1)
+#define RPC_CLNT_CREATE_AUTOBIND	(1UL << 2)
+#define RPC_CLNT_CREATE_ONESHOT		(1UL << 3)
+#define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 4)
+#define RPC_CLNT_CREATE_NOPING		(1UL << 5)
+
+struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				struct rpc_program *, int);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
-void		rpc_getport(struct rpc_task *, struct rpc_clnt *);
+void		rpc_getport(struct rpc_task *);
 int		rpc_register(u32, u32, int, unsigned short, int *);
 
 void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
@@ -140,6 +135,8 @@
 size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
 int		rpc_ping(struct rpc_clnt *clnt, int flags);
+size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
+char *		rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 
 /*
  * Helper function for NFSroot support
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 1279280..e30ba20 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -46,8 +46,8 @@
 	unsigned char		seed[16];
 	int			signalg;
 	int			sealalg;
-	struct crypto_tfm	*enc;
-	struct crypto_tfm	*seq;
+	struct crypto_blkcipher	*enc;
+	struct crypto_blkcipher	*seq;
 	s32			endtime;
 	u32			seq_send;
 	struct xdr_netobj	mech_used;
@@ -136,26 +136,27 @@
 
 
 u32
-krb5_encrypt(struct crypto_tfm * key,
+krb5_encrypt(struct crypto_blkcipher *key,
 	     void *iv, void *in, void *out, int length);
 
 u32
-krb5_decrypt(struct crypto_tfm * key,
+krb5_decrypt(struct crypto_blkcipher *key,
 	     void *iv, void *in, void *out, int length); 
 
 int
-gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset,
-		struct page **pages);
+gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf,
+		    int offset, struct page **pages);
 
 int
-gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset);
+gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf,
+		    int offset);
 
 s32
-krb5_make_seq_num(struct crypto_tfm * key,
+krb5_make_seq_num(struct crypto_blkcipher *key,
 		int direction,
 		s32 seqnum, unsigned char *cksum, unsigned char *buf);
 
 s32
-krb5_get_seq_num(struct crypto_tfm * key,
+krb5_get_seq_num(struct crypto_blkcipher *key,
 	       unsigned char *cksum,
 	       unsigned char *buf, int *direction, s32 * seqnum);
diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
index 336e218..2cf3fbb 100644
--- a/include/linux/sunrpc/gss_spkm3.h
+++ b/include/linux/sunrpc/gss_spkm3.h
@@ -19,9 +19,9 @@
 	unsigned int		req_flags ;
 	struct xdr_netobj	share_key;
 	int			conf_alg;
-	struct crypto_tfm*	derived_conf_key;
+	struct crypto_blkcipher	*derived_conf_key;
 	int			intg_alg;
-	struct crypto_tfm*	derived_integ_key;
+	struct crypto_blkcipher	*derived_integ_key;
 	int			keyestb_alg;   /* alg used to get share_key */
 	int			owf_alg;   /* one way function */
 };
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a481472..a2eb9b4 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -43,7 +43,7 @@
 
 extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
 extern int rpc_rmdir(struct dentry *);
-extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
+extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(struct dentry *);
 extern struct vfsmount *rpc_get_mount(void);
 extern void rpc_put_mount(void);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 82a91bb..f399c13 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -127,7 +127,6 @@
  */
 #define RPC_TASK_ASYNC		0x0001		/* is an async task */
 #define RPC_TASK_SWAPPER	0x0002		/* is swapping in/out */
-#define RPC_TASK_CHILD		0x0008		/* is child of other task */
 #define RPC_CALL_MAJORSEEN	0x0020		/* major timeout seen */
 #define RPC_TASK_ROOTCREDS	0x0040		/* force root creds */
 #define RPC_TASK_DYNAMIC	0x0080		/* task was kmalloc'ed */
@@ -136,7 +135,6 @@
 #define RPC_TASK_NOINTR		0x0400		/* uninterruptible task */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
-#define RPC_IS_CHILD(t)		((t)->tk_flags & RPC_TASK_CHILD)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
@@ -253,7 +251,6 @@
 				const struct rpc_call_ops *ops, void *data);
 struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 				const struct rpc_call_ops *ops, void *data);
-struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent);
 void		rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 				int flags, const struct rpc_call_ops *ops,
 				void *data);
@@ -261,8 +258,6 @@
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_killall_tasks(struct rpc_clnt *);
 int		rpc_execute(struct rpc_task *);
-void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
-					rpc_action action);
 void		rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3a0cca2..bdeba85 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -12,6 +12,7 @@
 #include <linux/uio.h>
 #include <linux/socket.h>
 #include <linux/in.h>
+#include <linux/kref.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 
@@ -51,6 +52,14 @@
 	unsigned char		to_exponential;
 };
 
+enum rpc_display_format_t {
+	RPC_DISPLAY_ADDR = 0,
+	RPC_DISPLAY_PORT,
+	RPC_DISPLAY_PROTO,
+	RPC_DISPLAY_ALL,
+	RPC_DISPLAY_MAX,
+};
+
 struct rpc_task;
 struct rpc_xprt;
 struct seq_file;
@@ -103,8 +112,10 @@
 
 struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
+	char *		(*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format);
 	int		(*reserve_xprt)(struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+	void		(*rpcbind)(struct rpc_task *task);
 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
 	void		(*connect)(struct rpc_task *task);
 	void *		(*buf_alloc)(struct rpc_task *task, size_t size);
@@ -119,12 +130,14 @@
 };
 
 struct rpc_xprt {
+	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 	struct socket *		sock;		/* BSD socket layer */
 	struct sock *		inet;		/* INET layer */
 
 	struct rpc_timeout	timeout;	/* timeout parms */
-	struct sockaddr_in	addr;		/* server address */
+	struct sockaddr_storage	addr;		/* server address */
+	size_t			addrlen;	/* size of server address */
 	int			prot;		/* IP protocol */
 
 	unsigned long		cong;		/* current congestion */
@@ -138,6 +151,7 @@
 	unsigned int		tsh_size;	/* size of transport specific
 						   header */
 
+	struct rpc_wait_queue	binding;	/* requests waiting on rpcbind */
 	struct rpc_wait_queue	sending;	/* requests waiting to send */
 	struct rpc_wait_queue	resend;		/* requests waiting to resend */
 	struct rpc_wait_queue	pending;	/* requests in flight */
@@ -205,6 +219,8 @@
 	void			(*old_data_ready)(struct sock *, int);
 	void			(*old_state_change)(struct sock *);
 	void			(*old_write_space)(struct sock *);
+
+	char *			address_strings[RPC_DISPLAY_MAX];
 };
 
 #define XPRT_LAST_FRAG		(1 << 0)
@@ -217,12 +233,12 @@
 /*
  * Transport operations used by ULPs
  */
-struct rpc_xprt *	xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to);
 void			xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr);
 
 /*
  * Generic internal transport functions
  */
+struct rpc_xprt *	xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms);
 void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_task *task);
@@ -234,7 +250,8 @@
 void			xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release(struct rpc_task *task);
-int			xprt_destroy(struct rpc_xprt *xprt);
+struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
+void			xprt_put(struct rpc_xprt *xprt);
 
 static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
 {
@@ -269,6 +286,8 @@
 #define XPRT_CONNECTED		(1)
 #define XPRT_CONNECTING		(2)
 #define XPRT_CLOSE_WAIT		(3)
+#define XPRT_BOUND		(4)
+#define XPRT_BINDING		(5)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
@@ -312,6 +331,33 @@
 	return test_and_set_bit(XPRT_CONNECTING, &xprt->state);
 }
 
+static inline void xprt_set_bound(struct rpc_xprt *xprt)
+{
+	test_and_set_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline int xprt_bound(struct rpc_xprt *xprt)
+{
+	return test_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline void xprt_clear_bound(struct rpc_xprt *xprt)
+{
+	clear_bit(XPRT_BOUND, &xprt->state);
+}
+
+static inline void xprt_clear_binding(struct rpc_xprt *xprt)
+{
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_BINDING, &xprt->state);
+	smp_mb__after_clear_bit();
+}
+
+static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
+{
+	return test_and_set_bit(XPRT_BINDING, &xprt->state);
+}
+
 #endif /* __KERNEL__*/
 
 #endif /* _LINUX_SUNRPC_XPRT_H */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e4b1a4d..736ed91 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -411,6 +411,10 @@
 	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
 	NET_TCP_DMA_COPYBREAK=116,
 	NET_TCP_SLOW_START_AFTER_IDLE=117,
+	NET_CIPSOV4_CACHE_ENABLE=118,
+	NET_CIPSOV4_CACHE_BUCKET_SIZE=119,
+	NET_CIPSOV4_RBM_OPTFMT=120,
+	NET_CIPSOV4_RBM_STRICTVALID=121,
 };
 
 enum {
@@ -552,6 +556,7 @@
 	NET_IPV6_ACCEPT_RA_RTR_PREF=20,
 	NET_IPV6_RTR_PROBE_INTERVAL=21,
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
+	NET_IPV6_PROXY_NDP=23,
 	__NET_IPV6_MAX
 };
 
diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild
index 5251a50..78dfbac 100644
--- a/include/linux/tc_act/Kbuild
+++ b/include/linux/tc_act/Kbuild
@@ -1 +1,4 @@
-header-y += tc_gact.h tc_ipt.h tc_mirred.h tc_pedit.h
+header-y += tc_gact.h
+header-y += tc_ipt.h
+header-y += tc_mirred.h
+header-y += tc_pedit.h
diff --git a/include/linux/tc_ematch/Kbuild b/include/linux/tc_ematch/Kbuild
index 381e930..4a58a1c 100644
--- a/include/linux/tc_ematch/Kbuild
+++ b/include/linux/tc_ematch/Kbuild
@@ -1 +1,4 @@
-headers-y := tc_em_cmp.h tc_em_meta.h tc_em_nbyte.h tc_em_text.h
+header-y += tc_em_cmp.h
+header-y += tc_em_meta.h
+header-y += tc_em_nbyte.h
+header-y += tc_em_text.h
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9e38b56..0422036 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -85,6 +85,7 @@
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(void);
+void writeback_congestion_end(void);
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 46a15c7..14ecd19 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -104,6 +104,13 @@
 
 enum
 {
+	XFRM_POLICY_TYPE_MAIN	= 0,
+	XFRM_POLICY_TYPE_SUB	= 1,
+	XFRM_POLICY_TYPE_MAX	= 2
+};
+
+enum
+{
 	XFRM_POLICY_IN	= 0,
 	XFRM_POLICY_OUT	= 1,
 	XFRM_POLICY_FWD	= 2,
@@ -120,7 +127,9 @@
 
 #define XFRM_MODE_TRANSPORT 0
 #define XFRM_MODE_TUNNEL 1
-#define XFRM_MODE_MAX 2
+#define XFRM_MODE_ROUTEOPTIMIZATION 2
+#define XFRM_MODE_IN_TRIGGER 3
+#define XFRM_MODE_MAX 4
 
 /* Netlink configuration messages.  */
 enum {
@@ -164,6 +173,10 @@
 #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE
 	XFRM_MSG_GETAE,
 #define XFRM_MSG_GETAE XFRM_MSG_GETAE
+
+	XFRM_MSG_REPORT,
+#define XFRM_MSG_REPORT XFRM_MSG_REPORT
+
 	__XFRM_MSG_MAX
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -217,6 +230,12 @@
 #define XFRM_AE_MAX (__XFRM_AE_MAX - 1)
 };
 
+struct xfrm_userpolicy_type {
+	__u8		type;
+	__u16		reserved1;
+	__u8		reserved2;
+};
+
 /* Netlink message attributes.  */
 enum xfrm_attr_type_t {
 	XFRMA_UNSPEC,
@@ -232,6 +251,10 @@
 	XFRMA_REPLAY_VAL,
 	XFRMA_REPLAY_THRESH,
 	XFRMA_ETIMER_THRESH,
+	XFRMA_SRCADDR,		/* xfrm_address_t */
+	XFRMA_COADDR,		/* xfrm_address_t */
+	XFRMA_LASTUSED,
+	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -247,12 +270,13 @@
 	__u32				seq;
 	__u32				reqid;
 	__u16				family;
-	__u8				mode; /* 0=transport,1=tunnel */
+	__u8				mode;		/* XFRM_MODE_xxx */
 	__u8				replay_window;
 	__u8				flags;
 #define XFRM_STATE_NOECN	1
 #define XFRM_STATE_DECAP_DSCP	2
 #define XFRM_STATE_NOPMTUDISC	4
+#define XFRM_STATE_WILDRECV	8
 };
 
 struct xfrm_usersa_id {
@@ -319,12 +343,18 @@
 	__u8				proto;
 };
 
+struct xfrm_user_report {
+	__u8				proto;
+	struct xfrm_selector		sel;
+};
+
 #ifndef __KERNEL__
 /* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE		1
 #define XFRMGRP_EXPIRE		2
 #define XFRMGRP_SA		4
 #define XFRMGRP_POLICY		8
+#define XFRMGRP_REPORT		0x10
 #endif
 
 enum xfrm_nlgroups {
@@ -340,6 +370,8 @@
 #define XFRMNLGRP_POLICY	XFRMNLGRP_POLICY
 	XFRMNLGRP_AEVENTS,
 #define XFRMNLGRP_AEVENTS	XFRMNLGRP_AEVENTS
+	XFRMNLGRP_REPORT,
+#define XFRMNLGRP_REPORT	XFRMNLGRP_REPORT
 	__XFRMNLGRP_MAX
 };
 #define XFRMNLGRP_MAX	(__XFRMNLGRP_MAX - 1)
diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild
index e1da2a5..13e7a3c 100644
--- a/include/mtd/Kbuild
+++ b/include/mtd/Kbuild
@@ -1,2 +1,6 @@
-unifdef-y := mtd-abi.h
-header-y := inftl-user.h jffs2-user.h mtd-user.h nftl-user.h
+header-y += inftl-user.h
+header-y += jffs2-user.h
+header-y += mtd-user.h
+header-y += nftl-user.h
+
+unifdef-y += mtd-abi.h
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 1da3f7f..b0a67b7 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -34,6 +34,7 @@
 #define MTD_WRITEABLE		0x400	/* Device is writeable */
 #define MTD_BIT_WRITEABLE	0x800	/* Single bits can be flipped */
 #define MTD_NO_ERASE		0x1000	/* No erase necessary */
+#define MTD_STUPID_LOCK		0x2000	/* Always locked after reset */
 
 // Some common devices / combinations of capabilities
 #define MTD_CAP_ROM		0
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 11e9eaf..8b06c2f 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -8,70 +8,110 @@
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 
-#define tca_gen(name) \
-struct tcf_##name *next; \
-	u32 index; \
-	int refcnt; \
-	int bindcnt; \
-	u32 capab; \
-	int action; \
-	struct tcf_t tm; \
-	struct gnet_stats_basic bstats; \
-	struct gnet_stats_queue qstats; \
-	struct gnet_stats_rate_est rate_est; \
-	spinlock_t *stats_lock; \
-	spinlock_t lock
-
-struct tcf_police
-{
-	tca_gen(police);
-	int		result;
-	u32		ewma_rate;
-	u32		burst;
-	u32		mtu;
-	u32		toks;
-	u32		ptoks;
-	psched_time_t	t_c;
-	struct qdisc_rate_table *R_tab;
-	struct qdisc_rate_table *P_tab;
+struct tcf_common {
+	struct tcf_common		*tcfc_next;
+	u32				tcfc_index;
+	int				tcfc_refcnt;
+	int				tcfc_bindcnt;
+	u32				tcfc_capab;
+	int				tcfc_action;
+	struct tcf_t			tcfc_tm;
+	struct gnet_stats_basic		tcfc_bstats;
+	struct gnet_stats_queue		tcfc_qstats;
+	struct gnet_stats_rate_est	tcfc_rate_est;
+	spinlock_t			*tcfc_stats_lock;
+	spinlock_t			tcfc_lock;
 };
+#define tcf_next	common.tcfc_next
+#define tcf_index	common.tcfc_index
+#define tcf_refcnt	common.tcfc_refcnt
+#define tcf_bindcnt	common.tcfc_bindcnt
+#define tcf_capab	common.tcfc_capab
+#define tcf_action	common.tcfc_action
+#define tcf_tm		common.tcfc_tm
+#define tcf_bstats	common.tcfc_bstats
+#define tcf_qstats	common.tcfc_qstats
+#define tcf_rate_est	common.tcfc_rate_est
+#define tcf_stats_lock	common.tcfc_stats_lock
+#define tcf_lock	common.tcfc_lock
+
+struct tcf_police {
+	struct tcf_common	common;
+	int			tcfp_result;
+	u32			tcfp_ewma_rate;
+	u32			tcfp_burst;
+	u32			tcfp_mtu;
+	u32			tcfp_toks;
+	u32			tcfp_ptoks;
+	psched_time_t		tcfp_t_c;
+	struct qdisc_rate_table	*tcfp_R_tab;
+	struct qdisc_rate_table	*tcfp_P_tab;
+};
+#define to_police(pc)	\
+	container_of(pc, struct tcf_police, common)
+
+struct tcf_hashinfo {
+	struct tcf_common	**htab;
+	unsigned int		hmask;
+	rwlock_t		*lock;
+};
+
+static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
+{
+	return index & hmask;
+}
 
 #ifdef CONFIG_NET_CLS_ACT
 
 #define ACT_P_CREATED 1
 #define ACT_P_DELETED 1
 
-struct tcf_act_hdr
-{
-	tca_gen(act_hdr);
+struct tcf_act_hdr {
+	struct tcf_common	common;
 };
 
-struct tc_action
-{
-	void *priv;
-	struct tc_action_ops *ops;
-	__u32   type;   /* for backward compat(TCA_OLD_COMPAT) */
-	__u32   order; 
-	struct tc_action *next;
+struct tc_action {
+	void			*priv;
+	struct tc_action_ops	*ops;
+	__u32			type; /* for backward compat(TCA_OLD_COMPAT) */
+	__u32			order;
+	struct tc_action	*next;
 };
 
 #define TCA_CAP_NONE 0
-struct tc_action_ops
-{
+struct tc_action_ops {
 	struct tc_action_ops *next;
+	struct tcf_hashinfo *hinfo;
 	char    kind[IFNAMSIZ];
 	__u32   type; /* TBD to match kind */
 	__u32 	capab;  /* capabilities includes 4 bit version */
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *);
 	int     (*get_stats)(struct sk_buff *, struct tc_action *);
-	int     (*dump)(struct sk_buff *, struct tc_action *,int , int);
+	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	int     (*cleanup)(struct tc_action *, int bind);
-	int     (*lookup)(struct tc_action *, u32 );
-	int     (*init)(struct rtattr *,struct rtattr *,struct tc_action *, int , int );
-	int     (*walk)(struct sk_buff *, struct netlink_callback *, int , struct tc_action *);
+	int     (*lookup)(struct tc_action *, u32);
+	int     (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int);
+	int     (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *);
 };
 
+extern struct tcf_common *tcf_hash_lookup(u32 index,
+					  struct tcf_hashinfo *hinfo);
+extern void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo);
+extern int tcf_hash_release(struct tcf_common *p, int bind,
+			    struct tcf_hashinfo *hinfo);
+extern int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			      int type, struct tc_action *a);
+extern u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo);
+extern int tcf_hash_search(struct tc_action *a, u32 index);
+extern struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a,
+					 int bind, struct tcf_hashinfo *hinfo);
+extern struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est,
+					  struct tc_action *a, int size,
+					  int bind, u32 *idx_gen,
+					  struct tcf_hashinfo *hinfo);
+extern void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo);
+
 extern int tcf_register_action(struct tc_action_ops *a);
 extern int tcf_unregister_action(struct tc_action_ops *a);
 extern void tcf_action_destroy(struct tc_action *a, int bind);
@@ -96,17 +136,17 @@
 	int ret = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	if (p) {
-		if (bind) {
-			 p->bindcnt--;
-		}
-		p->refcnt--;
-		if (p->refcnt <= 0 && !p->bindcnt) {
+		if (bind)
+			p->tcf_bindcnt--;
+
+		p->tcf_refcnt--;
+		if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) {
 			tcf_police_destroy(p);
 			ret = 1;
 		}
 	}
 #else
-	if (p && --p->refcnt == 0)
+	if (p && --p->tcf_refcnt == 0)
 		tcf_police_destroy(p);
 
 #endif /* CONFIG_NET_CLS_ACT */
diff --git a/include/net/act_generic.h b/include/net/act_generic.h
deleted file mode 100644
index c9daa7e..0000000
--- a/include/net/act_generic.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * include/net/act_generic.h
- *
-*/
-#ifndef _NET_ACT_GENERIC_H
-#define _NET_ACT_GENERIC_H
-static inline int tcf_defact_release(struct tcf_defact *p, int bind)
-{
-	int ret = 0;
-	if (p) {
-		if (bind) {
-			p->bindcnt--;
-		}
-		p->refcnt--;
-		if (p->bindcnt <= 0 && p->refcnt <= 0) {
-			kfree(p->defdata);
-			tcf_hash_destroy(p);
-			ret = 1;
-		}
-	}
-	return ret;
-}
-
-static inline int
-alloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata)
-{
-	p->defdata = kmalloc(datalen, GFP_KERNEL);
-	if (p->defdata == NULL)
-		return -ENOMEM;
-	p->datalen = datalen;
-	memcpy(p->defdata, defdata, datalen);
-	return 0;
-}
-
-static inline int
-realloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata)
-{
-	/* safer to be just brute force for now */
-	kfree(p->defdata);
-	return alloc_defdata(p, datalen, defdata);
-}
-
-static inline int
-tcf_defact_init(struct rtattr *rta, struct rtattr *est,
-		struct tc_action *a, int ovr, int bind)
-{
-	struct rtattr *tb[TCA_DEF_MAX];
-	struct tc_defact *parm;
-	struct tcf_defact *p;
-	void *defdata;
-	u32 datalen = 0;
-	int ret = 0;
-
-	if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
-		return -EINVAL;
-
-	if (tb[TCA_DEF_PARMS - 1] == NULL || 
-	    RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
-		return -EINVAL;
-
-	parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
-	defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
-	if (defdata == NULL)
-		return -EINVAL;
-
-	datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
-	if (datalen <= 0)
-		return -EINVAL;
-
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
-			return -ENOMEM;
-
-		ret = alloc_defdata(p, datalen, defdata);
-		if (ret < 0) {
-			kfree(p);
-			return ret;
-		}
-		ret = ACT_P_CREATED;
-	} else {
-		if (!ovr) {
-			tcf_defact_release(p, bind);
-			return -EEXIST;
-		}
-		realloc_defdata(p, datalen, defdata);
-	}
-
-	spin_lock_bh(&p->lock);
-	p->action = parm->action;
-	spin_unlock_bh(&p->lock);
-	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
-	return ret;
-}
-
-static inline int tcf_defact_cleanup(struct tc_action *a, int bind)
-{
-	struct tcf_defact *p = PRIV(a, defact);
-
-	if (p != NULL)
-		return tcf_defact_release(p, bind);
-	return 0;
-}
-
-static inline int
-tcf_defact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
-{
-	unsigned char *b = skb->tail;
-	struct tc_defact opt;
-	struct tcf_defact *p = PRIV(a, defact);
-	struct tcf_t t;
-
-	opt.index = p->index;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	opt.action = p->action;
-	RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
-	RTA_PUT(skb, TCA_DEF_DATA, p->datalen, p->defdata);
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
-	RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
-	return skb->len;
-
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#define tca_use_default_ops \
-	.dump           =       tcf_defact_dump, \
-	.cleanup        =       tcf_defact_cleanup, \
-	.init           =       tcf_defact_init, \
-	.walk           =       tcf_generic_walker, \
-
-#define tca_use_default_defines(name) \
-	static u32 idx_gen; \
-	static struct tcf_defact *tcf_##name_ht[MY_TAB_SIZE]; \
-	static DEFINE_RWLOCK(##name_lock);
-#endif /* _NET_ACT_GENERIC_H */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 3d71251..44f1b67 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -61,6 +61,9 @@
 extern int			ipv6_chk_addr(struct in6_addr *addr,
 					      struct net_device *dev,
 					      int strict);
+#ifdef CONFIG_IPV6_MIP6
+extern int			ipv6_chk_home_addr(struct in6_addr *addr);
+#endif
 extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
 						struct net_device *dev,
 						int strict);
@@ -126,20 +129,18 @@
 static inline struct inet6_dev *
 __in6_dev_get(struct net_device *dev)
 {
-	return (struct inet6_dev *)dev->ip6_ptr;
+	return rcu_dereference(dev->ip6_ptr);
 }
 
-extern rwlock_t addrconf_lock;
-
 static inline struct inet6_dev *
 in6_dev_get(struct net_device *dev)
 {
 	struct inet6_dev *idev = NULL;
-	read_lock(&addrconf_lock);
-	idev = dev->ip6_ptr;
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
 	if (idev)
 		atomic_inc(&idev->refcnt);
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return idev;
 }
 
diff --git a/include/net/ah.h b/include/net/ah.h
index ceff00a..8f257c1 100644
--- a/include/net/ah.h
+++ b/include/net/ah.h
@@ -1,6 +1,7 @@
 #ifndef _NET_AH_H
 #define _NET_AH_H
 
+#include <linux/crypto.h>
 #include <net/xfrm.h>
 
 /* This is the maximum truncated ICV length that we know of. */
@@ -14,22 +15,29 @@
 	int			icv_full_len;
 	int			icv_trunc_len;
 
-	void			(*icv)(struct ah_data*,
-	                               struct sk_buff *skb, u8 *icv);
-
-	struct crypto_tfm	*tfm;
+	struct crypto_hash	*tfm;
 };
 
-static inline void
-ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data)
+static inline int ah_mac_digest(struct ah_data *ahp, struct sk_buff *skb,
+				u8 *auth_data)
 {
-	struct crypto_tfm *tfm = ahp->tfm;
+	struct hash_desc desc;
+	int err;
+
+	desc.tfm = ahp->tfm;
+	desc.flags = 0;
 
 	memset(auth_data, 0, ahp->icv_trunc_len);
-	crypto_hmac_init(tfm, ahp->key, &ahp->key_len);
-	skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update);
-	crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv);
-	memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len);
+	err = crypto_hash_init(&desc);
+	if (unlikely(err))
+		goto out;
+	err = skb_icv_walk(skb, &desc, 0, skb->len, crypto_hash_update);
+	if (unlikely(err))
+		goto out;
+	err = crypto_hash_final(&desc, ahp->work_icv);
+
+out:
+	return err;
 }
 
 #endif
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
new file mode 100644
index 0000000..59406e0
--- /dev/null
+++ b/include/net/cipso_ipv4.h
@@ -0,0 +1,246 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _CIPSO_IPV4_H
+#define _CIPSO_IPV4_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/netlabel.h>
+
+/* known doi values */
+#define CIPSO_V4_DOI_UNKNOWN          0x00000000
+
+/* tag types */
+#define CIPSO_V4_TAG_INVALID          0
+#define CIPSO_V4_TAG_RBITMAP          1
+#define CIPSO_V4_TAG_ENUM             2
+#define CIPSO_V4_TAG_RANGE            5
+#define CIPSO_V4_TAG_PBITMAP          6
+#define CIPSO_V4_TAG_FREEFORM         7
+
+/* doi mapping types */
+#define CIPSO_V4_MAP_UNKNOWN          0
+#define CIPSO_V4_MAP_STD              1
+#define CIPSO_V4_MAP_PASS             2
+
+/* limits */
+#define CIPSO_V4_MAX_REM_LVLS         256
+#define CIPSO_V4_INV_LVL              0x80000000
+#define CIPSO_V4_MAX_LOC_LVLS         (CIPSO_V4_INV_LVL - 1)
+#define CIPSO_V4_MAX_REM_CATS         65536
+#define CIPSO_V4_INV_CAT              0x80000000
+#define CIPSO_V4_MAX_LOC_CATS         (CIPSO_V4_INV_CAT - 1)
+
+/*
+ * CIPSO DOI definitions
+ */
+
+/* DOI definition struct */
+#define CIPSO_V4_TAG_MAXCNT           5
+struct cipso_v4_doi {
+	u32 doi;
+	u32 type;
+	union {
+		struct cipso_v4_std_map_tbl *std;
+	} map;
+	u8 tags[CIPSO_V4_TAG_MAXCNT];
+
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head dom_list;
+};
+
+/* Standard CIPSO mapping table */
+/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the
+ *       bit is set then consider that value as unspecified, meaning the
+ *       mapping for that particular level/category is invalid */
+struct cipso_v4_std_map_tbl {
+	struct {
+		u32 *cipso;
+		u32 *local;
+		u32 cipso_size;
+		u32 local_size;
+	} lvl;
+	struct {
+		u32 *cipso;
+		u32 *local;
+		u32 cipso_size;
+		u32 local_size;
+	} cat;
+};
+
+/*
+ * Sysctl Variables
+ */
+
+#ifdef CONFIG_NETLABEL
+extern int cipso_v4_cache_enabled;
+extern int cipso_v4_cache_bucketsize;
+extern int cipso_v4_rbm_optfmt;
+extern int cipso_v4_rbm_strictvalid;
+#endif
+
+/*
+ * Helper Functions
+ */
+
+#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
+#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso)
+
+/*
+ * DOI List Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom);
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom);
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+			       const char *domain);
+#else
+static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_doi_remove(u32 doi,
+				    void (*callback) (struct rcu_head * head))
+{
+	return 0;
+}
+
+static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+	return NULL;
+}
+
+static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+	return NULL;
+}
+
+static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+	return NULL;
+}
+
+static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def,
+					  const char *domain)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+					     const char *domain)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+void cipso_v4_cache_invalidate(void);
+int cipso_v4_cache_add(const struct sk_buff *skb,
+		       const struct netlbl_lsm_secattr *secattr);
+#else
+static inline void cipso_v4_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int cipso_v4_cache_add(const struct sk_buff *skb,
+				     const struct netlbl_lsm_secattr *secattr)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * Protocol Handling Functions
+ */
+
+#ifdef CONFIG_NETLABEL
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
+int cipso_v4_socket_setattr(const struct socket *sock,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+			    struct netlbl_lsm_secattr *secattr);
+int cipso_v4_validate(unsigned char **option);
+#else
+static inline void cipso_v4_error(struct sk_buff *skb,
+				  int error,
+				  u32 gateway)
+{
+	return;
+}
+
+static inline int cipso_v4_socket_setattr(const struct socket *sock,
+				  const struct cipso_v4_doi *doi_def,
+				  const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_socket_getattr(const struct socket *sock,
+					  struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+					  struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_validate(unsigned char **option)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif /* _CIPSO_IPV4_H */
diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index a15dcf0..f01626c 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -22,7 +22,7 @@
 };
 
 struct dn_fib_res {
-	struct dn_fib_rule *r;
+	struct fib_rule *r;
 	struct dn_fib_info *fi;
 	unsigned char prefixlen;
 	unsigned char nh_sel;
@@ -94,7 +94,8 @@
 
 
 struct dn_fib_table {
-	int n;
+	struct hlist_node hlist;
+	u32 n;
 
 	int (*insert)(struct dn_fib_table *t, struct rtmsg *r, 
 			struct dn_kern_rta *rta, struct nlmsghdr *n, 
@@ -130,14 +131,11 @@
 extern void dn_fib_flush(void);
 extern void dn_fib_select_multipath(const struct flowi *fl,
 					struct dn_fib_res *res);
-extern int dn_fib_sync_down(__le16 local, struct net_device *dev,
-				int force);
-extern int dn_fib_sync_up(struct net_device *dev);
 
 /*
  * dn_tables.c
  */
-extern struct dn_fib_table *dn_fib_get_table(int n, int creat);
+extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat);
 extern struct dn_fib_table *dn_fib_empty_table(void);
 extern void dn_fib_table_init(void);
 extern void dn_fib_table_cleanup(void);
@@ -147,10 +145,8 @@
  */
 extern void dn_fib_rules_init(void);
 extern void dn_fib_rules_cleanup(void);
-extern void dn_fib_rule_put(struct dn_fib_rule *);
-extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags);
 extern unsigned dnet_addr_type(__le16 addr);
-extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res);
+extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res);
 
 /*
  * rtnetlink interface
@@ -176,11 +172,9 @@
 	if (res->fi)
 		dn_fib_info_put(res->fi);
 	if (res->r)
-		dn_fib_rule_put(res->r);
+		fib_rule_put(res->r);
 }
 
-extern struct dn_fib_table *dn_fib_tables[];
-
 #else /* Endnode */
 
 #define dn_fib_init()  do { } while(0)
diff --git a/include/net/dst.h b/include/net/dst.h
index 36d54fc..a8d825f 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -54,6 +54,7 @@
 	unsigned long		expires;
 
 	unsigned short		header_len;	/* more space at head required */
+	unsigned short		nfheader_len;	/* more non-fragment space at head required */
 	unsigned short		trailer_len;	/* space to reserve at tail */
 
 	u32			metrics[RTAX_MAX];
diff --git a/include/net/esp.h b/include/net/esp.h
index 90cd94f..713d039f 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -1,6 +1,7 @@
 #ifndef _NET_ESP_H
 #define _NET_ESP_H
 
+#include <linux/crypto.h>
 #include <net/xfrm.h>
 #include <asm/scatterlist.h>
 
@@ -14,14 +15,15 @@
 	struct {
 		u8			*key;		/* Key */
 		int			key_len;	/* Key length */
-		u8			*ivec;		/* ivec buffer */
+		int			padlen;		/* 0..255 */
 		/* ivlen is offset from enc_data, where encrypted data start.
 		 * It is logically different of crypto_tfm_alg_ivsize(tfm).
 		 * We assume that it is either zero (no ivec), or
 		 * >= crypto_tfm_alg_ivsize(tfm). */
 		int			ivlen;
-		int			padlen;		/* 0..255 */
-		struct crypto_tfm	*tfm;		/* crypto handle */
+		int			ivinitted;
+		u8			*ivec;		/* ivec buffer */
+		struct crypto_blkcipher	*tfm;		/* crypto handle */
 	} conf;
 
 	/* Integrity. It is active when icv_full_len != 0 */
@@ -34,7 +36,7 @@
 		void			(*icv)(struct esp_data*,
 		                               struct sk_buff *skb,
 		                               int offset, int len, u8 *icv);
-		struct crypto_tfm	*tfm;
+		struct crypto_hash	*tfm;
 	} auth;
 };
 
@@ -42,18 +44,22 @@
 extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
 extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
 
-static inline void
-esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset,
-                int len, u8 *auth_data)
+static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb,
+				 int offset, int len)
 {
-	struct crypto_tfm *tfm = esp->auth.tfm;
-	char *icv = esp->auth.work_icv;
+	struct hash_desc desc;
+	int err;
 
-	memset(auth_data, 0, esp->auth.icv_trunc_len);
-	crypto_hmac_init(tfm, esp->auth.key, &esp->auth.key_len);
-	skb_icv_walk(skb, tfm, offset, len, crypto_hmac_update);
-	crypto_hmac_final(tfm, esp->auth.key, &esp->auth.key_len, icv);
-	memcpy(auth_data, icv, esp->auth.icv_trunc_len);
+	desc.tfm = esp->auth.tfm;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (unlikely(err))
+		return err;
+	err = skb_icv_walk(skb, &desc, offset, len, crypto_hash_update);
+	if (unlikely(err))
+		return err;
+	return crypto_hash_final(&desc, esp->auth.work_icv);
 }
 
 #endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
new file mode 100644
index 0000000..8e2f473
--- /dev/null
+++ b/include/net/fib_rules.h
@@ -0,0 +1,97 @@
+#ifndef __NET_FIB_RULES_H
+#define __NET_FIB_RULES_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/fib_rules.h>
+#include <net/flow.h>
+#include <net/netlink.h>
+
+struct fib_rule
+{
+	struct list_head	list;
+	atomic_t		refcnt;
+	int			ifindex;
+	char			ifname[IFNAMSIZ];
+	u32			pref;
+	u32			flags;
+	u32			table;
+	u8			action;
+	struct rcu_head		rcu;
+};
+
+struct fib_lookup_arg
+{
+	void			*lookup_ptr;
+	void			*result;
+	struct fib_rule		*rule;
+};
+
+struct fib_rules_ops
+{
+	int			family;
+	struct list_head	list;
+	int			rule_size;
+
+	int			(*action)(struct fib_rule *,
+					  struct flowi *, int,
+					  struct fib_lookup_arg *);
+	int			(*match)(struct fib_rule *,
+					 struct flowi *, int);
+	int			(*configure)(struct fib_rule *,
+					     struct sk_buff *,
+					     struct nlmsghdr *,
+					     struct fib_rule_hdr *,
+					     struct nlattr **);
+	int			(*compare)(struct fib_rule *,
+					   struct fib_rule_hdr *,
+					   struct nlattr **);
+	int			(*fill)(struct fib_rule *, struct sk_buff *,
+					struct nlmsghdr *,
+					struct fib_rule_hdr *);
+	u32			(*default_pref)(void);
+
+	int			nlgroup;
+	struct nla_policy	*policy;
+	struct list_head	*rules_list;
+	struct module		*owner;
+};
+
+static inline void fib_rule_get(struct fib_rule *rule)
+{
+	atomic_inc(&rule->refcnt);
+}
+
+static inline void fib_rule_put_rcu(struct rcu_head *head)
+{
+	struct fib_rule *rule = container_of(head, struct fib_rule, rcu);
+	kfree(rule);
+}
+
+static inline void fib_rule_put(struct fib_rule *rule)
+{
+	if (atomic_dec_and_test(&rule->refcnt))
+		call_rcu(&rule->rcu, fib_rule_put_rcu);
+}
+
+static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
+{
+	if (nla[FRA_TABLE])
+		return nla_get_u32(nla[FRA_TABLE]);
+	return frh->table;
+}
+
+extern int			fib_rules_register(struct fib_rules_ops *);
+extern int			fib_rules_unregister(struct fib_rules_ops *);
+
+extern int			fib_rules_lookup(struct fib_rules_ops *,
+						 struct flowi *, int flags,
+						 struct fib_lookup_arg *);
+
+extern int			fib_nl_newrule(struct sk_buff *,
+					       struct nlmsghdr *, void *);
+extern int			fib_nl_delrule(struct sk_buff *,
+					       struct nlmsghdr *, void *);
+extern int			fib_rules_dump(struct sk_buff *,
+					       struct netlink_callback *, int);
+#endif
diff --git a/include/net/flow.h b/include/net/flow.h
index 04d89f7..3ca210e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,6 +26,7 @@
 		struct {
 			struct in6_addr		daddr;
 			struct in6_addr		saddr;
+			__u32			fwmark;
 			__u32			flowlabel;
 		} ip6_u;
 
@@ -42,6 +43,7 @@
 #define fld_scope	nl_u.dn_u.scope
 #define fl6_dst		nl_u.ip6_u.daddr
 #define fl6_src		nl_u.ip6_u.saddr
+#define fl6_fwmark	nl_u.ip6_u.fwmark
 #define fl6_flowlabel	nl_u.ip6_u.flowlabel
 #define fl4_dst		nl_u.ip4_u.daddr
 #define fl4_src		nl_u.ip4_u.saddr
@@ -72,12 +74,22 @@
 		} dnports;
 
 		__u32		spi;
+
+#ifdef CONFIG_IPV6_MIP6
+		struct {
+			__u8	type;
+		} mht;
+#endif
 	} uli_u;
 #define fl_ip_sport	uli_u.ports.sport
 #define fl_ip_dport	uli_u.ports.dport
 #define fl_icmp_type	uli_u.icmpt.type
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
+#ifdef CONFIG_IPV6_MIP6
+#define fl_mh_type	uli_u.mht.type
+#endif
+	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 #define FLOW_DIR_IN	0
@@ -85,10 +97,10 @@
 #define FLOW_DIR_FWD	2
 
 struct sock;
-typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp);
 
-extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 	 		       flow_resolve_t resolver);
 extern void flow_cache_flush(void);
 extern atomic_t flow_cache_genid;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 8c22872..4a38d85 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -27,8 +27,6 @@
 	struct list_head	family_list;	/* private */
 };
 
-#define GENL_ADMIN_PERM		0x01
-
 /**
  * struct genl_info - receiving information
  * @snd_seq: sending sequence number
@@ -133,11 +131,12 @@
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
+ * @flags: allocation flags
  */
 static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
-				    unsigned int group)
+				    unsigned int group, gfp_t flags)
 {
-	return nlmsg_multicast(genl_sock, skb, pid, group);
+	return nlmsg_multicast(genl_sock, skb, pid, group, flags);
 }
 
 /**
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index e459e1a..34489c1 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -189,6 +189,7 @@
 	struct ipv6_devconf	cnf;
 	struct ipv6_devstat	stats;
 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
+	struct rcu_head		rcu;
 };
 
 extern struct ipv6_devconf ipv6_devconf;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 9bf73fe..de4e83b6 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -147,7 +147,8 @@
 enum inet_csk_ack_state_t {
 	ICSK_ACK_SCHED	= 1,
 	ICSK_ACK_TIMER  = 2,
-	ICSK_ACK_PUSHED = 4
+	ICSK_ACK_PUSHED = 4,
+	ICSK_ACK_PUSHED2 = 8
 };
 
 extern void inet_csk_init_xmit_timers(struct sock *sk,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 98e0bb3..b4491c9 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -271,38 +271,15 @@
 	return ((struct rtable *)skb->dst)->rt_iif;
 }
 
-extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
+extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
 					   const u32 daddr,
 					   const unsigned short hnum,
 					   const int dif);
 
-/* Optimize the common listener case. */
-static inline struct sock *
-		inet_lookup_listener(struct inet_hashinfo *hashinfo,
-				     const u32 daddr,
-				     const unsigned short hnum, const int dif)
+static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
+						u32 daddr, u16 dport, int dif)
 {
-	struct sock *sk = NULL;
-	const struct hlist_head *head;
-
-	read_lock(&hashinfo->lhash_lock);
-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
-	if (!hlist_empty(head)) {
-		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-
-		if (inet->num == hnum && !sk->sk_node.next &&
-		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
-		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-		    !sk->sk_bound_dev_if)
-			goto sherry_cache;
-		sk = __inet_lookup_listener(head, daddr, hnum, dif);
-	}
-	if (sk) {
-sherry_cache:
-		sock_hold(sk);
-	}
-	read_unlock(&hashinfo->lhash_lock);
-	return sk;
+	return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif);
 }
 
 /* Socket demux engine toys. */
@@ -391,14 +368,25 @@
 	goto out;
 }
 
+static inline struct sock *
+	inet_lookup_established(struct inet_hashinfo *hashinfo,
+				const u32 saddr, const u16 sport,
+				const u32 daddr, const u16 dport,
+				const int dif)
+{
+	return __inet_lookup_established(hashinfo, saddr, sport, daddr,
+					 ntohs(dport), dif);
+}
+
 static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
 					 const u32 saddr, const u16 sport,
-					 const u32 daddr, const u16 hnum,
+					 const u32 daddr, const u16 dport,
 					 const int dif)
 {
+	u16 hnum = ntohs(dport);
 	struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
 						    hnum, dif);
-	return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
+	return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif);
 }
 
 static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
@@ -409,7 +397,7 @@
 	struct sock *sk;
 
 	local_bh_disable();
-	sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+	sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif);
 	local_bh_enable();
 
 	return sk;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1f4a9a6..f624271 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -27,7 +27,6 @@
 /** struct ip_options - IP Options
  *
  * @faddr - Saved first hop address
- * @is_setbyuser - Set by setsockopt?
  * @is_data - Options in __data, rather than skb
  * @is_strictroute - Strict source route
  * @srr_is_hit - Packet destination addr was our one
@@ -42,8 +41,7 @@
 	unsigned char	srr;
 	unsigned char	rr;
 	unsigned char	ts;
-	unsigned char	is_setbyuser:1,
-			is_data:1,
+	unsigned char	is_data:1,
 			is_strictroute:1,
 			srr_is_hit:1,
 			is_changed:1,
@@ -51,7 +49,7 @@
 			ts_needtime:1,
 			ts_needaddr:1;
 	unsigned char	router_alert;
-	unsigned char	__pad1;
+	unsigned char	cipso;
 	unsigned char	__pad2;
 	unsigned char	__data[0];
 };
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a66e9de..e4438de 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,14 +16,35 @@
 #ifdef __KERNEL__
 
 #include <linux/ipv6_route.h>
-
-#include <net/dst.h>
-#include <net/flow.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <net/dst.h>
+#include <net/flow.h>
+#include <net/netlink.h>
 
 struct rt6_info;
 
+struct fib6_config
+{
+	u32		fc_table;
+	u32		fc_metric;
+	int		fc_dst_len;
+	int		fc_src_len;
+	int		fc_ifindex;
+	u32		fc_flags;
+	u32		fc_protocol;
+
+	struct in6_addr	fc_dst;
+	struct in6_addr	fc_src;
+	struct in6_addr	fc_gateway;
+
+	unsigned long	fc_expires;
+	struct nlattr	*fc_mx;
+	int		fc_mx_len;
+
+	struct nl_info	fc_nlinfo;
+};
+
 struct fib6_node
 {
 	struct fib6_node	*parent;
@@ -39,6 +60,11 @@
 	__u32			fn_sernum;
 };
 
+#ifndef CONFIG_IPV6_SUBTREES
+#define FIB6_SUBTREE(fn)	NULL
+#else
+#define FIB6_SUBTREE(fn)	((fn)->subtree)
+#endif
 
 /*
  *	routing information
@@ -51,6 +77,8 @@
 	int		plen;
 };
 
+struct fib6_table;
+
 struct rt6_info
 {
 	union {
@@ -71,6 +99,7 @@
 	u32				rt6i_flags;
 	u32				rt6i_metric;
 	atomic_t			rt6i_ref;
+	struct fib6_table		*rt6i_table;
 
 	struct rt6key			rt6i_dst;
 	struct rt6key			rt6i_src;
@@ -89,28 +118,6 @@
 	void *args;
 };
 
-extern struct fib6_walker_t fib6_walker_list;
-extern rwlock_t fib6_walker_lock;
-
-static inline void fib6_walker_link(struct fib6_walker_t *w)
-{
-	write_lock_bh(&fib6_walker_lock);
-	w->next = fib6_walker_list.next;
-	w->prev = &fib6_walker_list;
-	w->next->prev = w;
-	w->prev->next = w;
-	write_unlock_bh(&fib6_walker_lock);
-}
-
-static inline void fib6_walker_unlink(struct fib6_walker_t *w)
-{
-	write_lock_bh(&fib6_walker_lock);
-	w->next->prev = w->prev;
-	w->prev->next = w->next;
-	w->prev = w->next = w;
-	write_unlock_bh(&fib6_walker_lock);
-}
-
 struct rt6_statistics {
 	__u32		fib_nodes;
 	__u32		fib_route_nodes;
@@ -143,12 +150,41 @@
 
 typedef void			(*f_pnode)(struct fib6_node *fn, void *);
 
-extern struct fib6_node		ip6_routing_table;
+struct fib6_table {
+	struct hlist_node	tb6_hlist;
+	u32			tb6_id;
+	rwlock_t		tb6_lock;
+	struct fib6_node	tb6_root;
+};
+
+#define RT6_TABLE_UNSPEC	RT_TABLE_UNSPEC
+#define RT6_TABLE_MAIN		RT_TABLE_MAIN
+#define RT6_TABLE_DFLT		RT6_TABLE_MAIN
+#define RT6_TABLE_INFO		RT6_TABLE_MAIN
+#define RT6_TABLE_PREFIX	RT6_TABLE_MAIN
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB6_TABLE_MIN		1
+#define FIB6_TABLE_MAX		RT_TABLE_MAX
+#define RT6_TABLE_LOCAL		RT_TABLE_LOCAL
+#else
+#define FIB6_TABLE_MIN		RT_TABLE_MAIN
+#define FIB6_TABLE_MAX		FIB6_TABLE_MIN
+#define RT6_TABLE_LOCAL		RT6_TABLE_MAIN
+#endif
+
+typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
+					 struct flowi *, int);
 
 /*
  *	exported functions
  */
 
+extern struct fib6_table *	fib6_get_table(u32 id);
+extern struct fib6_table *	fib6_new_table(u32 id);
+extern struct dst_entry *	fib6_rule_lookup(struct flowi *fl, int flags,
+						 pol_lookup_t lookup);
+
 extern struct fib6_node		*fib6_lookup(struct fib6_node *root,
 					     struct in6_addr *daddr,
 					     struct in6_addr *saddr);
@@ -157,32 +193,29 @@
 					     struct in6_addr *daddr, int dst_len,
 					     struct in6_addr *saddr, int src_len);
 
-extern void			fib6_clean_tree(struct fib6_node *root,
-						int (*func)(struct rt6_info *, void *arg),
-						int prune, void *arg);
-
-extern int			fib6_walk(struct fib6_walker_t *w);
-extern int			fib6_walk_continue(struct fib6_walker_t *w);
+extern void			fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+					       int prune, void *arg);
 
 extern int			fib6_add(struct fib6_node *root,
 					 struct rt6_info *rt,
-					 struct nlmsghdr *nlh,
-					 void *rtattr,
-					 struct netlink_skb_parms *req);
+					 struct nl_info *info);
 
 extern int			fib6_del(struct rt6_info *rt,
-					 struct nlmsghdr *nlh,
-					 void *rtattr,
-					 struct netlink_skb_parms *req);
+					 struct nl_info *info);
 
 extern void			inet6_rt_notify(int event, struct rt6_info *rt,
-						struct nlmsghdr *nlh,
-						struct netlink_skb_parms *req);
+						struct nl_info *info);
 
 extern void			fib6_run_gc(unsigned long dummy);
 
 extern void			fib6_gc_cleanup(void);
 
 extern void			fib6_init(void);
+
+extern void			fib6_rules_init(void);
+extern void			fib6_rules_cleanup(void);
+extern int			fib6_rules_dump(struct sk_buff *,
+						struct netlink_callback *);
+
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 96b0e66..6ca6b71 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -32,6 +32,10 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 
+#define RT6_LOOKUP_F_IFACE	0x1
+#define RT6_LOOKUP_F_REACHABLE	0x2
+#define RT6_LOOKUP_F_HAS_SADDR	0x4
+
 struct pol_chain {
 	int			type;
 	int			priority;
@@ -41,6 +45,11 @@
 
 extern struct rt6_info	ip6_null_entry;
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+extern struct rt6_info	ip6_prohibit_entry;
+extern struct rt6_info	ip6_blk_hole_entry;
+#endif
+
 extern int ip6_rt_gc_interval;
 
 extern void			ip6_route_input(struct sk_buff *skb);
@@ -48,25 +57,14 @@
 extern struct dst_entry *	ip6_route_output(struct sock *sk,
 						 struct flowi *fl);
 
-extern int			ip6_route_me_harder(struct sk_buff *skb);
-
 extern void			ip6_route_init(void);
 extern void			ip6_route_cleanup(void);
 
 extern int			ipv6_route_ioctl(unsigned int cmd, void __user *arg);
 
-extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
-					      struct nlmsghdr *,
-					      void *rtattr,
-					      struct netlink_skb_parms *req);
-extern int			ip6_ins_rt(struct rt6_info *,
-					   struct nlmsghdr *,
-					   void *rtattr,
-					   struct netlink_skb_parms *req);
-extern int			ip6_del_rt(struct rt6_info *,
-					   struct nlmsghdr *,
-					   void *rtattr,
-					   struct netlink_skb_parms *req);
+extern int			ip6_route_add(struct fib6_config *cfg);
+extern int			ip6_ins_rt(struct rt6_info *);
+extern int			ip6_del_rt(struct rt6_info *);
 
 extern int			ip6_rt_addr_add(struct in6_addr *addr,
 						struct net_device *dev,
@@ -114,6 +112,7 @@
 					      struct in6_addr *gwaddr);
 
 extern void			rt6_redirect(struct in6_addr *dest,
+					     struct in6_addr *src,
 					     struct in6_addr *saddr,
 					     struct neighbour *neigh,
 					     u8 *lladdr,
@@ -131,6 +130,13 @@
 extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 
+struct rt6_rtnl_dump_arg
+{
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+extern int rt6_dump_route(struct rt6_info *rt, void *p_arg);
 extern void rt6_ifdown(struct net_device *dev);
 extern void rt6_mtu_change(struct net_device *dev, unsigned mtu);
 
@@ -140,21 +146,24 @@
  *	Store a destination cache entry in a socket
  */
 static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				   struct in6_addr *daddr)
+				   struct in6_addr *daddr, struct in6_addr *saddr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct rt6_info *rt = (struct rt6_info *) dst;
 
 	sk_setup_caps(sk, dst);
 	np->daddr_cache = daddr;
+#ifdef CONFIG_IPV6_SUBTREES
+	np->saddr_cache = saddr;
+#endif
 	np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 }
 
 static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				 struct in6_addr *daddr)
+				 struct in6_addr *daddr, struct in6_addr *saddr)
 {
 	write_lock(&sk->sk_dst_lock);
-	__ip6_dst_store(sk, dst, daddr);
+	__ip6_dst_store(sk, dst, daddr, saddr);
 	write_unlock(&sk->sk_dst_lock);
 }
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a095d1d..fcc159a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -18,26 +18,34 @@
 
 #include <net/flow.h>
 #include <linux/seq_file.h>
+#include <net/fib_rules.h>
 
-/* WARNING: The ordering of these elements must match ordering
- *          of RTA_* rtnetlink attribute numbers.
- */
-struct kern_rta {
-	void		*rta_dst;
-	void		*rta_src;
-	int		*rta_iif;
-	int		*rta_oif;
-	void		*rta_gw;
-	u32		*rta_priority;
-	void		*rta_prefsrc;
-	struct rtattr	*rta_mx;
-	struct rtattr	*rta_mp;
-	unsigned char	*rta_protoinfo;
-	u32		*rta_flow;
-	struct rta_cacheinfo *rta_ci;
-	struct rta_session *rta_sess;
-	u32		*rta_mp_alg;
-};
+struct fib_config {
+	u8			fc_family;
+	u8			fc_dst_len;
+	u8			fc_src_len;
+	u8			fc_tos;
+	u8			fc_protocol;
+	u8			fc_scope;
+	u8			fc_type;
+	/* 1 byte unused */
+	u32			fc_table;
+	u32			fc_dst;
+	u32			fc_src;
+	u32			fc_gw;
+	int			fc_oif;
+	u32			fc_flags;
+	u32			fc_priority;
+	u32			fc_prefsrc;
+	struct nlattr		*fc_mx;
+	struct rtnexthop	*fc_mp;
+	int			fc_mx_len;
+	int			fc_mp_len;
+	u32			fc_flow;
+	u32			fc_mp_alg;
+	u32			fc_nlflags;
+	struct nl_info		fc_nlinfo;
+ };
 
 struct fib_info;
 
@@ -149,15 +157,12 @@
 #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
 
 struct fib_table {
-	unsigned char	tb_id;
+	struct hlist_node tb_hlist;
+	u32		tb_id;
 	unsigned	tb_stamp;
 	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
-	int		(*tb_insert)(struct fib_table *table, struct rtmsg *r,
-				     struct kern_rta *rta, struct nlmsghdr *n,
-				     struct netlink_skb_parms *req);
-	int		(*tb_delete)(struct fib_table *table, struct rtmsg *r,
-				     struct kern_rta *rta, struct nlmsghdr *n,
-				     struct netlink_skb_parms *req);
+	int		(*tb_insert)(struct fib_table *, struct fib_config *);
+	int		(*tb_delete)(struct fib_table *, struct fib_config *);
 	int		(*tb_dump)(struct fib_table *table, struct sk_buff *skb,
 				     struct netlink_callback *cb);
 	int		(*tb_flush)(struct fib_table *table);
@@ -172,14 +177,14 @@
 extern struct fib_table *ip_fib_local_table;
 extern struct fib_table *ip_fib_main_table;
 
-static inline struct fib_table *fib_get_table(int id)
+static inline struct fib_table *fib_get_table(u32 id)
 {
 	if (id != RT_TABLE_LOCAL)
 		return ip_fib_main_table;
 	return ip_fib_local_table;
 }
 
-static inline struct fib_table *fib_new_table(int id)
+static inline struct fib_table *fib_new_table(u32 id)
 {
 	return fib_get_table(id);
 }
@@ -199,35 +204,19 @@
 }
 
 #else /* CONFIG_IP_MULTIPLE_TABLES */
-#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
-#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
+#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
+#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
 
-extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
-extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
-extern struct fib_table *__fib_new_table(int id);
-extern void fib_rule_put(struct fib_rule *r);
+extern int fib_lookup(struct flowi *flp, struct fib_result *res);
 
-static inline struct fib_table *fib_get_table(int id)
-{
-	if (id == 0)
-		id = RT_TABLE_MAIN;
-
-	return fib_tables[id];
-}
-
-static inline struct fib_table *fib_new_table(int id)
-{
-	if (id == 0)
-		id = RT_TABLE_MAIN;
-
-	return fib_tables[id] ? : __fib_new_table(id);
-}
-
+extern struct fib_table *fib_new_table(u32 id);
+extern struct fib_table *fib_get_table(u32 id);
 extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
+extern struct nla_policy rtm_ipv4_policy[];
 extern void		ip_fib_init(void);
 extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
@@ -243,23 +232,20 @@
 extern int ip_fib_check_default(u32 gw, struct net_device *dev);
 extern int fib_sync_down(u32 local, struct net_device *dev, int force);
 extern int fib_sync_up(struct net_device *dev);
-extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
-			       struct kern_rta *rta, struct rtentry *r);
 extern u32  __fib_res_prefsrc(struct fib_result *res);
 
 /* Exported by fib_hash.c */
-extern struct fib_table *fib_hash_init(int id);
+extern struct fib_table *fib_hash_init(u32 id);
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-/* Exported by fib_rules.c */
+extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
+extern void __init fib4_rules_init(void);
+
 #ifdef CONFIG_NET_CLS_ROUTE
 extern u32 fib_rules_tclass(struct fib_result *res);
 #endif
-extern void fib_rules_init(void);
+
 #endif
 
 static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h
index e651a57..87c1af3 100644
--- a/include/net/ipcomp.h
+++ b/include/net/ipcomp.h
@@ -1,11 +1,14 @@
 #ifndef _NET_IPCOMP_H
 #define _NET_IPCOMP_H
 
+#include <linux/crypto.h>
+#include <linux/types.h>
+
 #define IPCOMP_SCRATCH_SIZE     65400
 
 struct ipcomp_data {
 	u16 threshold;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 };
 
 #endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ece7e8a..72bf47b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -40,6 +40,7 @@
 #define NEXTHDR_ICMP		58	/* ICMP for IPv6. */
 #define NEXTHDR_NONE		59	/* No next header */
 #define NEXTHDR_DEST		60	/* Destination options header. */
+#define NEXTHDR_MOBILITY	135	/* Mobility header. */
 
 #define NEXTHDR_MAX		255
 
@@ -229,7 +230,7 @@
 					       void (*destructor)(struct sock *));
 
 
-extern int			ipv6_parse_hopopts(struct sk_buff *skb);
+extern int			ipv6_parse_hopopts(struct sk_buff **skbp);
 
 extern struct ipv6_txoptions *  ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt);
 extern struct ipv6_txoptions *	ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
@@ -506,6 +507,8 @@
 
 extern int 			ipv6_ext_hdr(u8 nexthdr);
 
+extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
+
 extern struct ipv6_txoptions *	ipv6_invert_rthdr(struct sock *sk,
 						  struct ipv6_rt_hdr *hdr);
 
diff --git a/include/net/mip6.h b/include/net/mip6.h
new file mode 100644
index 0000000..68263c6
--- /dev/null
+++ b/include/net/mip6.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ *	YOSHIFUJI Hideaki @USAGI
+ */
+#ifndef _NET_MIP6_H
+#define _NET_MIP6_H
+
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#define MIP6_OPT_PAD_1	0
+#define MIP6_OPT_PAD_N	1
+
+/*
+ * Mobility Header
+ */
+struct ip6_mh {
+	__u8	ip6mh_proto;
+	__u8	ip6mh_hdrlen;
+	__u8	ip6mh_type;
+	__u8	ip6mh_reserved;
+	__u16	ip6mh_cksum;
+	/* Followed by type specific messages */
+	__u8	data[0];
+} __attribute__ ((__packed__));
+
+#define IP6_MH_TYPE_BRR		0   /* Binding Refresh Request */
+#define IP6_MH_TYPE_HOTI	1   /* HOTI Message   */
+#define IP6_MH_TYPE_COTI	2   /* COTI Message  */
+#define IP6_MH_TYPE_HOT		3   /* HOT Message   */
+#define IP6_MH_TYPE_COT		4   /* COT Message  */
+#define IP6_MH_TYPE_BU		5   /* Binding Update */
+#define IP6_MH_TYPE_BACK	6   /* Binding ACK */
+#define IP6_MH_TYPE_BERROR	7   /* Binding Error */
+#define IP6_MH_TYPE_MAX		IP6_MH_TYPE_BERROR
+
+extern int mip6_init(void);
+extern void mip6_fini(void);
+extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb);
+
+#endif
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 4901ee4..c8aacbd 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -1,6 +1,8 @@
 #ifndef _NET_NEIGHBOUR_H
 #define _NET_NEIGHBOUR_H
 
+#include <linux/neighbour.h>
+
 /*
  *	Generic neighbour manipulation
  *
@@ -14,40 +16,6 @@
  *		- Add neighbour cache statistics like rtstat
  */
 
-/* The following flags & states are exported to user space,
-   so that they should be moved to include/linux/ directory.
- */
-
-/*
- *	Neighbor Cache Entry Flags
- */
-
-#define NTF_PROXY	0x08	/* == ATF_PUBL */
-#define NTF_ROUTER	0x80
-
-/*
- *	Neighbor Cache Entry States.
- */
-
-#define NUD_INCOMPLETE	0x01
-#define NUD_REACHABLE	0x02
-#define NUD_STALE	0x04
-#define NUD_DELAY	0x08
-#define NUD_PROBE	0x10
-#define NUD_FAILED	0x20
-
-/* Dummy states */
-#define NUD_NOARP	0x40
-#define NUD_PERMANENT	0x80
-#define NUD_NONE	0x00
-
-/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change
-   and make no address resolution or NUD.
-   NUD_PERMANENT is also cannot be deleted by garbage collectors.
- */
-
-#ifdef __KERNEL__
-
 #include <asm/atomic.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -133,7 +101,7 @@
 	__u8			dead;
 	atomic_t		probes;
 	rwlock_t		lock;
-	unsigned char		ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];
+	unsigned char		ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
 	struct hh_cache		*hh;
 	atomic_t		refcnt;
 	int			(*output)(struct sk_buff *skb);
@@ -158,6 +126,7 @@
 {
 	struct pneigh_entry	*next;
 	struct net_device		*dev;
+	u8			flags;
 	u8			key[0];
 };
 
@@ -374,6 +343,3 @@
 #define NEIGH_CB(skb)	((struct neighbour_cb *)(skb)->cb)
 
 #endif
-#endif
-
-
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
new file mode 100644
index 0000000..fc2b72f
--- /dev/null
+++ b/include/net/netlabel.h
@@ -0,0 +1,292 @@
+/*
+ * NetLabel System
+ *
+ * The NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_H
+#define _NETLABEL_H
+
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+
+/*
+ * NetLabel - A management interface for maintaining network packet label
+ *            mapping tables for explicit packet labling protocols.
+ *
+ * Network protocols such as CIPSO and RIPSO require a label translation layer
+ * to convert the label on the packet into something meaningful on the host
+ * machine.  In the current Linux implementation these mapping tables live
+ * inside the kernel; NetLabel provides a mechanism for user space applications
+ * to manage these mapping tables.
+ *
+ * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to
+ * send messages between kernel and user space.  The general format of a
+ * NetLabel message is shown below:
+ *
+ *  +-----------------+-------------------+--------- --- -- -
+ *  | struct nlmsghdr | struct genlmsghdr | payload
+ *  +-----------------+-------------------+--------- --- -- -
+ *
+ * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal.
+ * The payload is dependent on the subsystem specified in the
+ * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions
+ * should be defined in the corresponding net/netlabel/netlabel_<subsys>.h|c
+ * file.  All of the fields in the NetLabel payload are NETLINK attributes, the
+ * length of each field is the length of the NETLINK attribute payload, see
+ * include/net/netlink.h for more information on NETLINK attributes.
+ *
+ */
+
+/*
+ * NetLabel NETLINK protocol
+ */
+
+#define NETLBL_PROTO_VERSION            1
+
+/* NetLabel NETLINK types/families */
+#define NETLBL_NLTYPE_NONE              0
+#define NETLBL_NLTYPE_MGMT              1
+#define NETLBL_NLTYPE_MGMT_NAME         "NLBL_MGMT"
+#define NETLBL_NLTYPE_RIPSO             2
+#define NETLBL_NLTYPE_RIPSO_NAME        "NLBL_RIPSO"
+#define NETLBL_NLTYPE_CIPSOV4           3
+#define NETLBL_NLTYPE_CIPSOV4_NAME      "NLBL_CIPSOv4"
+#define NETLBL_NLTYPE_CIPSOV6           4
+#define NETLBL_NLTYPE_CIPSOV6_NAME      "NLBL_CIPSOv6"
+#define NETLBL_NLTYPE_UNLABELED         5
+#define NETLBL_NLTYPE_UNLABELED_NAME    "NLBL_UNLBL"
+
+/* NetLabel return codes */
+#define NETLBL_E_OK                     0
+
+/*
+ * Helper functions
+ */
+
+#define NETLBL_LEN_U8                   nla_total_size(sizeof(u8))
+#define NETLBL_LEN_U16                  nla_total_size(sizeof(u16))
+#define NETLBL_LEN_U32                  nla_total_size(sizeof(u32))
+
+/**
+ * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer
+ * @head: the amount of headroom in bytes
+ * @body: the desired size (minus headroom) in bytes
+ * @gfp_flags: the alloc flags to pass to alloc_skb()
+ *
+ * Description:
+ * Allocate a NETLINK message buffer based on the sizes given in @head and
+ * @body.  If @head is greater than zero skb_reserve() is called to reserve
+ * @head bytes at the start of the buffer.  Returns a valid sk_buff pointer on
+ * success, NULL on failure.
+ *
+ */
+static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head,
+						       size_t body,
+						       int gfp_flags)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags);
+	if (skb == NULL)
+		return NULL;
+	if (head > 0) {
+		skb_reserve(skb, head);
+		if (skb_tailroom(skb) < body) {
+			kfree_skb(skb);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+/*
+ * NetLabel - Kernel API for accessing the network packet label mappings.
+ *
+ * The following functions are provided for use by other kernel modules,
+ * specifically kernel LSM modules, to provide a consistent, transparent API
+ * for dealing with explicit packet labeling protocols such as CIPSO and
+ * RIPSO.  The functions defined here are implemented in the
+ * net/netlabel/netlabel_kapi.c file.
+ *
+ */
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map;
+
+/* Domain mapping operations */
+int netlbl_domhsh_remove(const char *domain);
+
+/* LSM security attributes */
+struct netlbl_lsm_cache {
+	void (*free) (const void *data);
+	void *data;
+};
+struct netlbl_lsm_secattr {
+	char *domain;
+
+	u32 mls_lvl;
+	u32 mls_lvl_vld;
+	unsigned char *mls_cat;
+	size_t mls_cat_len;
+
+	struct netlbl_lsm_cache cache;
+};
+
+/*
+ * LSM security attribute operations
+ */
+
+
+/**
+ * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct
+ * @secattr: the struct to initialize
+ *
+ * Description:
+ * Initialize an already allocated netlbl_lsm_secattr struct.  Returns zero on
+ * success, negative values on error.
+ *
+ */
+static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr)
+{
+	memset(secattr, 0, sizeof(*secattr));
+	return 0;
+}
+
+/**
+ * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct
+ * @secattr: the struct to clear
+ * @clear_cache: cache clear flag
+ *
+ * Description:
+ * Destroys the @secattr struct, including freeing all of the internal buffers.
+ * If @clear_cache is true then free the cache fields, otherwise leave them
+ * intact.  The struct must be reset with a call to netlbl_secattr_init()
+ * before reuse.
+ *
+ */
+static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr,
+					  u32 clear_cache)
+{
+	if (clear_cache && secattr->cache.data != NULL && secattr->cache.free)
+		secattr->cache.free(secattr->cache.data);
+	kfree(secattr->domain);
+	kfree(secattr->mls_cat);
+}
+
+/**
+ * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct
+ * @flags: the memory allocation flags
+ *
+ * Description:
+ * Allocate and initialize a netlbl_lsm_secattr struct.  Returns a valid
+ * pointer on success, or NULL on failure.
+ *
+ */
+static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags)
+{
+	return kzalloc(sizeof(struct netlbl_lsm_secattr), flags);
+}
+
+/**
+ * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct
+ * @secattr: the struct to free
+ * @clear_cache: cache clear flag
+ *
+ * Description:
+ * Frees @secattr including all of the internal buffers.  If @clear_cache is
+ * true then free the cache fields, otherwise leave them intact.
+ *
+ */
+static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
+				       u32 clear_cache)
+{
+	netlbl_secattr_destroy(secattr, clear_cache);
+	kfree(secattr);
+}
+
+/*
+ * LSM protocol operations
+ */
+
+#ifdef CONFIG_NETLABEL
+int netlbl_socket_setattr(const struct socket *sock,
+			  const struct netlbl_lsm_secattr *secattr);
+int netlbl_socket_getattr(const struct socket *sock,
+			  struct netlbl_lsm_secattr *secattr);
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+			  struct netlbl_lsm_secattr *secattr);
+void netlbl_skbuff_err(struct sk_buff *skb, int error);
+#else
+static inline int netlbl_socket_setattr(const struct socket *sock,
+				     const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int netlbl_socket_getattr(const struct socket *sock,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
+					struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+	return;
+}
+#endif /* CONFIG_NETLABEL */
+
+/*
+ * LSM label mapping cache operations
+ */
+
+#ifdef CONFIG_NETLABEL
+void netlbl_cache_invalidate(void);
+int netlbl_cache_add(const struct sk_buff *skb,
+		     const struct netlbl_lsm_secattr *secattr);
+#else
+static inline void netlbl_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int netlbl_cache_add(const struct sk_buff *skb,
+				   const struct netlbl_lsm_secattr *secattr)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif /* _NETLABEL_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 640c26a..11dc2e7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -35,12 +35,15 @@
  *   nlmsg_put()			add a netlink message to an skb
  *   nlmsg_put_answer()			callback based nlmsg_put()
  *   nlmsg_end()			finanlize netlink message
+ *   nlmsg_get_pos()			return current position in message
+ *   nlmsg_trim()			trim part of message
  *   nlmsg_cancel()			cancel message construction
  *   nlmsg_free()			free a netlink message
  *
  * Message Sending:
  *   nlmsg_multicast()			multicast message to several groups
  *   nlmsg_unicast()			unicast a message to a single socket
+ *   nlmsg_notify()			send notification message
  *
  * Message Length Calculations:
  *   nlmsg_msg_size(payload)		length of message w/o padding
@@ -62,6 +65,9 @@
  *   nlmsg_validate()			validate netlink message incl. attrs
  *   nlmsg_for_each_attr()		loop over all attributes
  *
+ * Misc:
+ *   nlmsg_report()			report back to application?
+ *
  * ------------------------------------------------------------------------
  *                          Attributes Interface
  * ------------------------------------------------------------------------
@@ -80,8 +86,10 @@
  *   struct nlattr			netlink attribtue header
  *
  * Attribute Construction:
- *   nla_reserve(skb, type, len)	reserve skb tailroom for an attribute
+ *   nla_reserve(skb, type, len)	reserve room for an attribute
+ *   nla_reserve_nohdr(skb, len)	reserve room for an attribute w/o hdr
  *   nla_put(skb, type, len, data)	add attribute to skb
+ *   nla_put_nohdr(skb, len, data)	add attribute w/o hdr
  *
  * Attribute Construction for Basic Types:
  *   nla_put_u8(skb, type, value)	add u8 attribute to skb
@@ -139,6 +147,7 @@
  *   nla_next(nla, remaining)		get next netlink attribute
  *   nla_validate()			validate a stream of attributes
  *   nla_find()				find attribute in stream of attributes
+ *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
  *   nla_parse_nested()			parse nested attribuets
  *   nla_for_each_attr()		loop over all attributes
@@ -158,6 +167,7 @@
 	NLA_FLAG,
 	NLA_MSECS,
 	NLA_NESTED,
+	NLA_NUL_STRING,
 	__NLA_TYPE_MAX,
 };
 
@@ -166,21 +176,37 @@
 /**
  * struct nla_policy - attribute validation policy
  * @type: Type of attribute or NLA_UNSPEC
- * @minlen: Minimal length of payload required to be available
+ * @len: Type specific length of payload
  *
  * Policies are defined as arrays of this struct, the array must be
  * accessible by attribute type up to the highest identifier to be expected.
  *
+ * Meaning of `len' field:
+ *    NLA_STRING           Maximum length of string
+ *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
+ *    NLA_FLAG             Unused
+ *    All other            Exact length of attribute payload
+ *
  * Example:
  * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
- *	[ATTR_BAR] = { .type = NLA_STRING },
- *	[ATTR_BAZ] = { .minlen = sizeof(struct mystruct) },
+ *	[ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ },
+ *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
  * };
  */
 struct nla_policy {
 	u16		type;
-	u16		minlen;
+	u16		len;
+};
+
+/**
+ * struct nl_info - netlink source information
+ * @nlh: Netlink message header of original request
+ * @pid: Netlink PID of requesting application
+ */
+struct nl_info {
+	struct nlmsghdr		*nlh;
+	u32			pid;
 };
 
 extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
@@ -188,6 +214,9 @@
 						    struct nlmsghdr *, int *));
 extern void		netlink_queue_skip(struct nlmsghdr *nlh,
 					   struct sk_buff *skb);
+extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
+				     u32 pid, unsigned int group, int report,
+				     gfp_t flags);
 
 extern int		nla_validate(struct nlattr *head, int len, int maxtype,
 				     struct nla_policy *policy);
@@ -203,12 +232,18 @@
 extern int		nla_strcmp(const struct nlattr *nla, const char *str);
 extern struct nlattr *	__nla_reserve(struct sk_buff *skb, int attrtype,
 				      int attrlen);
+extern void *		__nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
 extern struct nlattr *	nla_reserve(struct sk_buff *skb, int attrtype,
 				    int attrlen);
+extern void *		nla_reserve_nohdr(struct sk_buff *skb, int attrlen);
 extern void		__nla_put(struct sk_buff *skb, int attrtype,
 				  int attrlen, const void *data);
+extern void		__nla_put_nohdr(struct sk_buff *skb, int attrlen,
+					const void *data);
 extern int		nla_put(struct sk_buff *skb, int attrtype,
 				int attrlen, const void *data);
+extern int		nla_put_nohdr(struct sk_buff *skb, int attrlen,
+				      const void *data);
 
 /**************************************************************************
  * Netlink Messages
@@ -364,6 +399,17 @@
 }
 
 /**
+ * nlmsg_report - need to report back to application?
+ * @nlh: netlink message header
+ *
+ * Returns 1 if a report back to the application is requested.
+ */
+static inline int nlmsg_report(struct nlmsghdr *nlh)
+{
+	return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+}
+
+/**
  * nlmsg_for_each_attr - iterate over a stream of attributes
  * @pos: loop counter, set to current attribute
  * @nlh: netlink message header
@@ -453,12 +499,13 @@
 /**
  * nlmsg_new - Allocate a new netlink message
  * @size: maximum size of message
+ * @flags: the type of memory to allocate.
  *
  * Use NLMSG_GOODSIZE if size isn't know and you need a good default size.
  */
-static inline struct sk_buff *nlmsg_new(int size)
+static inline struct sk_buff *nlmsg_new(int size, gfp_t flags)
 {
-	return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	return alloc_skb(size, flags);
 }
 
 /**
@@ -480,6 +527,32 @@
 }
 
 /**
+ * nlmsg_get_pos - return current position in netlink message
+ * @skb: socket buffer the message is stored in
+ *
+ * Returns a pointer to the current tail of the message.
+ */
+static inline void *nlmsg_get_pos(struct sk_buff *skb)
+{
+	return skb->tail;
+}
+
+/**
+ * nlmsg_trim - Trim message to a mark
+ * @skb: socket buffer the message is stored in
+ * @mark: mark to trim to
+ *
+ * Trims the message to the provided mark. Returns -1.
+ */
+static inline int nlmsg_trim(struct sk_buff *skb, void *mark)
+{
+	if (mark)
+		skb_trim(skb, (unsigned char *) mark - skb->data);
+
+	return -1;
+}
+
+/**
  * nlmsg_cancel - Cancel construction of a netlink message
  * @skb: socket buffer the message is stored in
  * @nlh: netlink message header
@@ -489,9 +562,7 @@
  */
 static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	skb_trim(skb, (unsigned char *) nlh - skb->data);
-
-	return -1;
+	return nlmsg_trim(skb, nlh);
 }
 
 /**
@@ -509,15 +580,16 @@
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
+ * @flags: allocation flags
  */
 static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
-				  u32 pid, unsigned int group)
+				  u32 pid, unsigned int group, gfp_t flags)
 {
 	int err;
 
 	NETLINK_CB(skb).dst_group = group;
 
-	err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL);
+	err = netlink_broadcast(sk, skb, pid, group, flags);
 	if (err > 0)
 		err = 0;
 
@@ -631,6 +703,18 @@
 }
 
 /**
+ * nla_find_nested - find attribute in a set of nested attributes
+ * @nla: attribute containing the nested attributes
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute which matches the specified type.
+ */
+static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
+{
+	return nla_find(nla_data(nla), nla_len(nla), attrtype);
+}
+
+/**
  * nla_parse_nested - parse nested attributes
  * @tb: destination array with maxtype+1 elements
  * @maxtype: maximum attribute type to be expected
@@ -751,7 +835,7 @@
 #define NLA_PUT_STRING(skb, attrtype, value) \
 	NLA_PUT(skb, attrtype, strlen(value) + 1, value)
 
-#define NLA_PUT_FLAG(skb, attrtype, value) \
+#define NLA_PUT_FLAG(skb, attrtype) \
 	NLA_PUT(skb, attrtype, 0, NULL)
 
 #define NLA_PUT_MSECS(skb, attrtype, jiffies) \
@@ -862,10 +946,7 @@
  */
 static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 {
-	if (start)
-		skb_trim(skb, (unsigned char *) start - skb->data);
-
-	return -1;
+	return nlmsg_trim(skb, start);
 }
 
 /**
@@ -880,4 +961,13 @@
 	     nla_ok(pos, rem); \
 	     pos = nla_next(pos, &(rem)))
 
+/**
+ * nla_for_each_nested - iterate over nested attributes
+ * @pos: loop counter, set to current attribute
+ * @nla: attribute containing the nested attributes
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_nested(pos, nla, rem) \
+	nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)
+
 #endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
new file mode 100644
index 0000000..3334dbf
--- /dev/null
+++ b/include/net/nexthop.h
@@ -0,0 +1,33 @@
+#ifndef __NET_NEXTHOP_H
+#define __NET_NEXTHOP_H
+
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+
+static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining)
+{
+	return remaining >= sizeof(*rtnh) &&
+	       rtnh->rtnh_len >= sizeof(*rtnh) &&
+	       rtnh->rtnh_len <= remaining;
+}
+
+static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh,
+                                         int *remaining)
+{
+	int totlen = NLA_ALIGN(rtnh->rtnh_len);
+
+	*remaining -= totlen;
+	return (struct rtnexthop *) ((char *) rtnh + totlen);
+}
+
+static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh)
+{
+	return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh)));
+}
+
+static inline int rtnh_attrlen(const struct rtnexthop *rtnh)
+{
+	return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh));
+}
+
+#endif
diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h
deleted file mode 100644
index cf5e4d2..0000000
--- a/include/net/pkt_act.h
+++ /dev/null
@@ -1,273 +0,0 @@
-#ifndef __NET_PKT_ACT_H
-#define __NET_PKT_ACT_H
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-#define tca_st(val) (struct tcf_##val *)
-#define PRIV(a,name) ( tca_st(name) (a)->priv)
-
-#if 0 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
-
-static __inline__ unsigned
-tcf_hash(u32 index)
-{
-	return index & MY_TAB_MASK;
-}
-
-/* probably move this from being inline
- * and put into act_generic
-*/
-static inline void
-tcf_hash_destroy(struct tcf_st *p)
-{
-	unsigned h = tcf_hash(p->index);
-	struct tcf_st **p1p;
-
-	for (p1p = &tcf_ht[h]; *p1p; p1p = &(*p1p)->next) {
-		if (*p1p == p) {
-			write_lock_bh(&tcf_t_lock);
-			*p1p = p->next;
-			write_unlock_bh(&tcf_t_lock);
-#ifdef CONFIG_NET_ESTIMATOR
-			gen_kill_estimator(&p->bstats, &p->rate_est);
-#endif
-			kfree(p);
-			return;
-		}
-	}
-	BUG_TRAP(0);
-}
-
-static inline int
-tcf_hash_release(struct tcf_st *p, int bind )
-{
-	int ret = 0;
-	if (p) {
-		if (bind) {
-			p->bindcnt--;
-		}
-		p->refcnt--;
-	       	if(p->bindcnt <=0 && p->refcnt <= 0) {
-			tcf_hash_destroy(p);
-			ret = 1;
-		}
-	}
-	return ret;
-}
-
-static __inline__ int
-tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
-		struct tc_action *a)
-{
-	struct tcf_st *p;
-	int err =0, index =  -1,i= 0, s_i = 0, n_i = 0;
-	struct rtattr *r ;
-
-	read_lock(&tcf_t_lock);
-
-	s_i = cb->args[0];
-
-	for (i = 0; i < MY_TAB_SIZE; i++) {
-		p = tcf_ht[tcf_hash(i)];
-
-		for (; p; p = p->next) {
-			index++;
-			if (index < s_i)
-				continue;
-			a->priv = p;
-			a->order = n_i;
-			r = (struct rtattr*) skb->tail;
-			RTA_PUT(skb, a->order, 0, NULL);
-			err = tcf_action_dump_1(skb, a, 0, 0);
-			if (0 > err) {
-				index--;
-				skb_trim(skb, (u8*)r - skb->data);
-				goto done;
-			}
-			r->rta_len = skb->tail - (u8*)r;
-			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO) {
-				goto done;
-			}
-		}
-	}
-done:
-	read_unlock(&tcf_t_lock);
-	if (n_i)
-		cb->args[0] += n_i;
-	return n_i;
-
-rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
-	goto done;
-}
-
-static __inline__ int
-tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
-{
-	struct tcf_st *p, *s_p;
-	struct rtattr *r ;
-	int i= 0, n_i = 0;
-
-	r = (struct rtattr*) skb->tail;
-	RTA_PUT(skb, a->order, 0, NULL);
-	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
-	for (i = 0; i < MY_TAB_SIZE; i++) {
-		p = tcf_ht[tcf_hash(i)];
-
-		while (p != NULL) {
-			s_p = p->next;
-			if (ACT_P_DELETED == tcf_hash_release(p, 0)) {
-				 module_put(a->ops->owner);
-			}
-			n_i++;
-			p = s_p;
-		}
-	}
-	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
-	r->rta_len = skb->tail - (u8*)r;
-
-	return n_i;
-rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
-	return -EINVAL;
-}
-
-static __inline__ int
-tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type,
-		struct tc_action *a)
-{
-		if (type == RTM_DELACTION) {
-			return tcf_del_walker(skb,a);
-		} else if (type == RTM_GETACTION) {
-			return tcf_dump_walker(skb,cb,a);
-		} else {
-			printk("tcf_generic_walker: unknown action %d\n",type);
-			return -EINVAL;
-		}
-}
-
-static __inline__ struct tcf_st *
-tcf_hash_lookup(u32 index)
-{
-	struct tcf_st *p;
-
-	read_lock(&tcf_t_lock);
-	for (p = tcf_ht[tcf_hash(index)]; p; p = p->next) {
-		if (p->index == index)
-			break;
-	}
-	read_unlock(&tcf_t_lock);
-	return p;
-}
-
-static __inline__ u32
-tcf_hash_new_index(void)
-{
-	do {
-		if (++idx_gen == 0)
-			idx_gen = 1;
-	} while (tcf_hash_lookup(idx_gen));
-
-	return idx_gen;
-}
-
-
-static inline int
-tcf_hash_search(struct tc_action *a, u32 index)
-{
-	struct tcf_st *p = tcf_hash_lookup(index);
-
-	if (p != NULL) {
-		a->priv = p;
-		return 1;
-	}
-	return 0;
-}
-
-#ifdef CONFIG_NET_ACT_INIT
-static inline struct tcf_st *
-tcf_hash_check(u32 index, struct tc_action *a, int ovr, int bind)
-{
-	struct tcf_st *p = NULL;
-	if (index && (p = tcf_hash_lookup(index)) != NULL) {
-		if (bind) {
-			p->bindcnt++;
-			p->refcnt++;
-		}
-		a->priv = p;
-	}
-	return p;
-}
-
-static inline struct tcf_st *
-tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind)
-{
-	struct tcf_st *p = NULL;
-
-	p = kmalloc(size, GFP_KERNEL);
-	if (p == NULL)
-		return p;
-
-	memset(p, 0, size);
-	p->refcnt = 1;
-
-	if (bind) {
-		p->bindcnt = 1;
-	}
-
-	spin_lock_init(&p->lock);
-	p->stats_lock = &p->lock;
-	p->index = index ? : tcf_hash_new_index();
-	p->tm.install = jiffies;
-	p->tm.lastuse = jiffies;
-#ifdef CONFIG_NET_ESTIMATOR
-	if (est)
-		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
-#endif
-	a->priv = (void *) p;
-	return p;
-}
-
-static inline void tcf_hash_insert(struct tcf_st *p)
-{
-	unsigned h = tcf_hash(p->index);
-
-	write_lock_bh(&tcf_t_lock);
-	p->next = tcf_ht[h];
-	tcf_ht[h] = p;
-	write_unlock_bh(&tcf_t_lock);
-}
-
-#endif
-
-#endif
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c5d7f92..8e165ca 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -53,6 +53,7 @@
 	unsigned long			expires;
 	struct request_sock_ops		*rsk_ops;
 	struct sock			*sk;
+	u32				secid;
 };
 
 static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops)
diff --git a/include/net/route.h b/include/net/route.h
index c4a0686..7f93ac0 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -32,6 +32,7 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
+#include <linux/security.h>
 
 #ifndef __KERNEL__
 #warning This file is not supposed to be used outside of kernel.
@@ -166,6 +167,7 @@
 		ip_rt_put(*rp);
 		*rp = NULL;
 	}
+	security_sk_classify_flow(sk, &fl);
 	return ip_route_output_flow(rp, &fl, sk, 0);
 }
 
@@ -182,6 +184,7 @@
 		fl.proto = protocol;
 		ip_rt_put(*rp);
 		*rp = NULL;
+		security_sk_classify_flow(sk, &fl);
 		return ip_route_output_flow(rp, &fl, sk, 0);
 	}
 	return 0;
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index c51541e..6c632e2 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -264,10 +264,10 @@
 enum { SCTP_MAX_GABS = 16 };
 
 /* Heartbeat interval - 30 secs */
-#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT	(30 * HZ)
+#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT	(30*1000)
 
 /* Delayed sack timer - 200ms */
-#define SCTP_DEFAULT_TIMEOUT_SACK	((200 * HZ) / 1000)
+#define SCTP_DEFAULT_TIMEOUT_SACK	(200)
 
 /* RTO.Initial              - 3  seconds
  * RTO.Min                  - 1  second
@@ -275,9 +275,9 @@
  * RTO.Alpha                - 1/8
  * RTO.Beta                 - 1/4
  */
-#define SCTP_RTO_INITIAL	(3 * HZ)
-#define SCTP_RTO_MIN		(1 * HZ)
-#define SCTP_RTO_MAX		(60 * HZ)
+#define SCTP_RTO_INITIAL	(3 * 1000)
+#define SCTP_RTO_MIN		(1 * 1000)
+#define SCTP_RTO_MAX		(60 * 1000)
 
 #define SCTP_RTO_ALPHA          3   /* 1/8 when converted to right shifts. */
 #define SCTP_RTO_BETA           2   /* 1/4 when converted to right shifts. */
@@ -290,8 +290,7 @@
 #define SCTP_DEF_MAX_INIT 6
 #define SCTP_DEF_MAX_SEND 10
 
-#define SCTP_DEFAULT_COOKIE_LIFE_SEC	60 /* seconds */
-#define SCTP_DEFAULT_COOKIE_LIFE_USEC	0  /* microseconds */
+#define SCTP_DEFAULT_COOKIE_LIFE	(60 * 1000) /* 60 seconds */
 
 #define SCTP_DEFAULT_MINWINDOW	1500	/* default minimum rwnd size */
 #define SCTP_DEFAULT_MAXWINDOW	65535	/* default rwnd size */
@@ -312,9 +311,9 @@
 				 */
 
 #if defined (CONFIG_SCTP_HMAC_MD5)
-#define SCTP_COOKIE_HMAC_ALG "md5"
+#define SCTP_COOKIE_HMAC_ALG "hmac(md5)"
 #elif defined (CONFIG_SCTP_HMAC_SHA1)
-#define SCTP_COOKIE_HMAC_ALG "sha1"
+#define SCTP_COOKIE_HMAC_ALG "hmac(sha1)"
 #else
 #define SCTP_COOKIE_HMAC_ALG NULL
 #endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 92eae0e..ee68a31 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -128,6 +128,8 @@
 				     int flags);
 extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 extern int sctp_register_pf(struct sctp_pf *, sa_family_t);
+int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
+                        void *ptr);
 
 /*
  * sctp/socket.c
@@ -178,6 +180,17 @@
 			  struct sock *oldsk, struct sock *newsk);
 
 /*
+ * sctp/proc.c
+ */
+int sctp_snmp_proc_init(void);
+void sctp_snmp_proc_exit(void);
+int sctp_eps_proc_init(void);
+void sctp_eps_proc_exit(void);
+int sctp_assocs_proc_init(void);
+void sctp_assocs_proc_exit(void);
+
+
+/*
  *  Section:  Macros, externs, and inlines
  */
 
@@ -216,6 +229,50 @@
 
 #endif /* !TEST_FRAME */
 
+/* sctp mib definitions */
+enum
+{
+	SCTP_MIB_NUM = 0,
+	SCTP_MIB_CURRESTAB,			/* CurrEstab */
+	SCTP_MIB_ACTIVEESTABS,			/* ActiveEstabs */
+	SCTP_MIB_PASSIVEESTABS,			/* PassiveEstabs */
+	SCTP_MIB_ABORTEDS,			/* Aborteds */
+	SCTP_MIB_SHUTDOWNS,			/* Shutdowns */
+	SCTP_MIB_OUTOFBLUES,			/* OutOfBlues */
+	SCTP_MIB_CHECKSUMERRORS,		/* ChecksumErrors */
+	SCTP_MIB_OUTCTRLCHUNKS,			/* OutCtrlChunks */
+	SCTP_MIB_OUTORDERCHUNKS,		/* OutOrderChunks */
+	SCTP_MIB_OUTUNORDERCHUNKS,		/* OutUnorderChunks */
+	SCTP_MIB_INCTRLCHUNKS,			/* InCtrlChunks */
+	SCTP_MIB_INORDERCHUNKS,			/* InOrderChunks */
+	SCTP_MIB_INUNORDERCHUNKS,		/* InUnorderChunks */
+	SCTP_MIB_FRAGUSRMSGS,			/* FragUsrMsgs */
+	SCTP_MIB_REASMUSRMSGS,			/* ReasmUsrMsgs */
+	SCTP_MIB_OUTSCTPPACKS,			/* OutSCTPPacks */
+	SCTP_MIB_INSCTPPACKS,			/* InSCTPPacks */
+	SCTP_MIB_T1_INIT_EXPIREDS,
+	SCTP_MIB_T1_COOKIE_EXPIREDS,
+	SCTP_MIB_T2_SHUTDOWN_EXPIREDS,
+	SCTP_MIB_T3_RTX_EXPIREDS,
+	SCTP_MIB_T4_RTO_EXPIREDS,
+	SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS,
+	SCTP_MIB_DELAY_SACK_EXPIREDS,
+	SCTP_MIB_AUTOCLOSE_EXPIREDS,
+	SCTP_MIB_T3_RETRANSMITS,
+	SCTP_MIB_PMTUD_RETRANSMITS,
+	SCTP_MIB_FAST_RETRANSMITS,
+	SCTP_MIB_IN_PKT_SOFTIRQ,
+	SCTP_MIB_IN_PKT_BACKLOG,
+	SCTP_MIB_IN_PKT_DISCARDS,
+	SCTP_MIB_IN_DATA_CHUNK_DISCARDS,
+	__SCTP_MIB_MAX
+};
+
+#define SCTP_MIB_MAX    __SCTP_MIB_MAX
+struct sctp_mib {
+        unsigned long   mibs[SCTP_MIB_MAX];
+} __SNMP_MIB_ALIGN__;
+
 
 /* Print debugging messages.  */
 #if SCTP_DEBUG
@@ -330,17 +387,6 @@
 
 #endif /* #if defined(CONFIG_IPV6) */
 
-/* Some wrappers, in case crypto not available. */
-#if defined (CONFIG_CRYPTO_HMAC)
-#define sctp_crypto_alloc_tfm crypto_alloc_tfm
-#define sctp_crypto_free_tfm crypto_free_tfm
-#define sctp_crypto_hmac crypto_hmac
-#else
-#define sctp_crypto_alloc_tfm(x...) NULL
-#define sctp_crypto_free_tfm(x...)
-#define sctp_crypto_hmac(x...)
-#endif
-
 
 /* Map an association to an assoc_id. */
 static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e5aa7ff..c6d93bb 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -87,6 +87,7 @@
 struct sctp_ulpq;
 struct sctp_ep_common;
 struct sctp_ssnmap;
+struct crypto_hash;
 
 
 #include <net/sctp/tsnmap.h>
@@ -127,9 +128,9 @@
 	 * RTO.Alpha		    - 1/8  (3 when converted to right shifts.)
 	 * RTO.Beta		    - 1/4  (2 when converted to right shifts.)
 	 */
-	unsigned long rto_initial;
-	unsigned long rto_min;
-	unsigned long rto_max;
+	unsigned int rto_initial;
+	unsigned int rto_min;
+	unsigned int rto_max;
 
 	/* Note: rto_alpha and rto_beta are really defined as inverse
 	 * powers of two to facilitate integer operations.
@@ -144,13 +145,13 @@
 	int cookie_preserve_enable;
 
 	/* Valid.Cookie.Life	    - 60  seconds  */
-	unsigned long valid_cookie_life;
+	unsigned int valid_cookie_life;
 
 	/* Delayed SACK timeout  200ms default*/
-	unsigned long sack_timeout;
+	unsigned int sack_timeout;
 
 	/* HB.interval		    - 30 seconds  */
-	unsigned long hb_interval;
+	unsigned int hb_interval;
 
 	/* Association.Max.Retrans  - 10 attempts
 	 * Path.Max.Retrans	    - 5	 attempts (per destination address)
@@ -264,7 +265,7 @@
 	struct sctp_pf *pf;
 
 	/* Access to HMAC transform. */
-	struct crypto_tfm *hmac;
+	struct crypto_hash *hmac;
 
 	/* What is our base endpointer? */
 	struct sctp_endpoint *ep;
diff --git a/include/net/snmp.h b/include/net/snmp.h
index a36bed8..464970e 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -100,12 +100,6 @@
 	unsigned long	mibs[UDP_MIB_MAX];
 } __SNMP_MIB_ALIGN__;
 
-/* SCTP */
-#define SCTP_MIB_MAX	__SCTP_MIB_MAX
-struct sctp_mib {
-	unsigned long	mibs[SCTP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
-
 /* Linux */
 #define LINUX_MIB_MAX	__LINUX_MIB_MAX
 struct linux_mib {
diff --git a/include/net/sock.h b/include/net/sock.h
index 324b3ea..edd4d73 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -862,30 +862,24 @@
  *
  */
 
-static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
 	int err;
+	struct sk_filter *filter;
 	
 	err = security_sock_rcv_skb(sk, skb);
 	if (err)
 		return err;
 	
-	if (sk->sk_filter) {
-		struct sk_filter *filter;
-		
-		if (needlock)
-			bh_lock_sock(sk);
-		
-		filter = sk->sk_filter;
-		if (filter) {
-			unsigned int pkt_len = sk_run_filter(skb, filter->insns,
-							     filter->len);
-			err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
-		}
-
-		if (needlock)
-			bh_unlock_sock(sk);
+	rcu_read_lock_bh();
+	filter = sk->sk_filter;
+	if (filter) {
+		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+				filter->len);
+		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
+ 	rcu_read_unlock_bh();
+
 	return err;
 }
 
@@ -897,6 +891,12 @@
  *	Remove a filter from a socket and release its resources.
  */
  
+static inline void sk_filter_rcu_free(struct rcu_head *rcu)
+{
+	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
+	kfree(fp);
+}
+
 static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
 {
 	unsigned int size = sk_filter_len(fp);
@@ -904,7 +904,7 @@
 	atomic_sub(size, &sk->sk_omem_alloc);
 
 	if (atomic_dec_and_test(&fp->refcnt))
-		kfree(fp);
+		call_rcu_bh(&fp->rcu, sk_filter_rcu_free);
 }
 
 static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
@@ -969,9 +969,23 @@
 	sk->sk_sleep = &parent->wait;
 	parent->sk = sk;
 	sk->sk_socket = parent;
+	security_sock_graft(sk, parent);
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
+static inline void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+	void *sptr = nsk->sk_security;
+#endif
+
+	memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+	nsk->sk_security = sptr;
+	security_sk_clone(osk, nsk);
+#endif
+}
+
 extern int sock_i_uid(struct sock *sk);
 extern unsigned long sock_i_ino(struct sock *sk);
 
diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h
index 463aa671..65f024b 100644
--- a/include/net/tc_act/tc_defact.h
+++ b/include/net/tc_act/tc_defact.h
@@ -3,11 +3,12 @@
 
 #include <net/act_api.h>
 
-struct tcf_defact
-{
-	tca_gen(defact);
-	u32     datalen;
-	void    *defdata;
+struct tcf_defact {
+	struct tcf_common	common;
+	u32     		tcfd_datalen;
+	void    		*tcfd_defdata;
 };
+#define to_defact(pc) \
+	container_of(pc, struct tcf_defact, common)
 
-#endif
+#endif /* __NET_TC_DEF_H */
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 59f0d96..9e3f676 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -3,15 +3,15 @@
 
 #include <net/act_api.h>
 
-struct tcf_gact
-{
-        tca_gen(gact);
+struct tcf_gact {
+	struct tcf_common	common;
 #ifdef CONFIG_GACT_PROB
-        u16                 ptype;
-        u16                 pval;
-        int                 paction;
+        u16			tcfg_ptype;
+        u16			tcfg_pval;
+        int			tcfg_paction;
 #endif
-                                                                                
 };
-                                                                                
-#endif
+#define to_gact(pc) \
+	container_of(pc, struct tcf_gact, common)
+
+#endif /* __NET_TC_GACT_H */
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
index cb37ad0..f7d25df 100644
--- a/include/net/tc_act/tc_ipt.h
+++ b/include/net/tc_act/tc_ipt.h
@@ -5,12 +5,13 @@
 
 struct xt_entry_target;
 
-struct tcf_ipt
-{
-	tca_gen(ipt);
-	u32 hook;
-	char *tname;
-	struct xt_entry_target *t;
+struct tcf_ipt {
+	struct tcf_common	common;
+	u32			tcfi_hook;
+	char			*tcfi_tname;
+	struct xt_entry_target	*tcfi_t;
 };
+#define to_ipt(pc) \
+	container_of(pc, struct tcf_ipt, common)
 
-#endif
+#endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index b5c32f6..ceac661 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -3,13 +3,14 @@
 
 #include <net/act_api.h>
 
-struct tcf_mirred
-{
-	tca_gen(mirred);
-	int eaction;
-	int ifindex;
-	int ok_push;
-	struct net_device *dev;
+struct tcf_mirred {
+	struct tcf_common	common;
+	int			tcfm_eaction;
+	int			tcfm_ifindex;
+	int			tcfm_ok_push;
+	struct net_device	*tcfm_dev;
 };
+#define to_mirred(pc) \
+	container_of(pc, struct tcf_mirred, common)
 
-#endif
+#endif /* __NET_TC_MIR_H */
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index eb21689..e6f6e15 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -3,12 +3,13 @@
 
 #include <net/act_api.h>
 
-struct tcf_pedit
-{
-	tca_gen(pedit);
-	unsigned char           nkeys;
-	unsigned char           flags;
-	struct tc_pedit_key     *keys;
+struct tcf_pedit {
+	struct tcf_common	common;
+	unsigned char		tcfp_nkeys;
+	unsigned char		tcfp_flags;
+	struct tc_pedit_key	*tcfp_keys;
 };
+#define to_pedit(pc) \
+	container_of(pc, struct tcf_pedit, common)
 
-#endif
+#endif /* __NET_TC_PED_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 766fba1..db0c05f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -30,25 +30,9 @@
 
 #define UDP_HTABLE_SIZE		128
 
-/* udp.c: This needs to be shared by v4 and v6 because the lookup
- *        and hashing code needs to work with different AF's yet
- *        the port space is shared.
- */
 extern struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 extern rwlock_t udp_hash_lock;
 
-extern int udp_port_rover;
-
-static inline int udp_lport_inuse(u16 num)
-{
-	struct sock *sk;
-	struct hlist_node *node;
-
-	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-		if (inet_sk(sk)->num == num)
-			return 1;
-	return 0;
-}
 
 /* Note: this must match 'valbool' in sock_setsockopt */
 #define UDP_CSUM_NOXMIT		1
@@ -63,6 +47,8 @@
 
 struct sk_buff;
 
+extern int	udp_get_port(struct sock *sk, unsigned short snum,
+			     int (*saddr_cmp)(const struct sock *, const struct sock *));
 extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9c5ee9f..11e0b1d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -8,8 +8,8 @@
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/socket.h>
-#include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
+#include <linux/ipsec.h>
 #include <linux/in6.h>
 #include <linux/mutex.h>
 
@@ -94,8 +94,9 @@
 struct xfrm_state
 {
 	/* Note: bydst is re-used during gc */
-	struct list_head	bydst;
-	struct list_head	byspi;
+	struct hlist_node	bydst;
+	struct hlist_node	bysrc;
+	struct hlist_node	byspi;
 
 	atomic_t		refcnt;
 	spinlock_t		lock;
@@ -103,6 +104,8 @@
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 
+	u32			genid;
+
 	/* Key manger bits */
 	struct {
 		u8		state;
@@ -133,6 +136,9 @@
 	/* Data for encapsulator */
 	struct xfrm_encap_tmpl	*encap;
 
+	/* Data for care-of address */
+	xfrm_address_t	*coaddr;
+
 	/* IPComp needs an IPIP tunnel for handling uncompressed packets */
 	struct xfrm_state	*tunnel;
 
@@ -163,6 +169,9 @@
 	struct xfrm_lifetime_cur curlft;
 	struct timer_list	timer;
 
+	/* Last used time */
+	u64			lastused;
+
 	/* Reference to data common to all the instances of this
 	 * transformer. */
 	struct xfrm_type	*type;
@@ -196,6 +205,7 @@
 		u32 proto;
 		u32 byid;
 		u32 aevent;
+		u32 type;
 	} data;
 
 	u32	seq;
@@ -212,6 +222,7 @@
 	struct dst_ops		*dst_ops;
 	void			(*garbage_collect)(void);
 	int			(*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl);
+	int			(*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr);
 	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
 	int			(*bundle_create)(struct xfrm_policy *policy, 
 						 struct xfrm_state **xfrm, 
@@ -235,16 +246,12 @@
 
 struct xfrm_state_afinfo {
 	unsigned short		family;
-	struct list_head	*state_bydst;
-	struct list_head	*state_byspi;
 	int			(*init_flags)(struct xfrm_state *x);
 	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
 						struct xfrm_tmpl *tmpl,
 						xfrm_address_t *daddr, xfrm_address_t *saddr);
-	struct xfrm_state	*(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto);
-	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
-					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
-					     int create);
+	int			(*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
+	int			(*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
 };
 
 extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -257,11 +264,17 @@
 	char			*description;
 	struct module		*owner;
 	__u8			proto;
+	__u8			flags;
+#define XFRM_TYPE_NON_FRAGMENT	1
 
 	int			(*init_state)(struct xfrm_state *x);
 	void			(*destructor)(struct xfrm_state *);
 	int			(*input)(struct xfrm_state *, struct sk_buff *skb);
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
+	int			(*reject)(struct xfrm_state *, struct sk_buff *, struct flowi *);
+	int			(*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
+	xfrm_address_t		*(*local_addr)(struct xfrm_state *, xfrm_address_t *);
+	xfrm_address_t		*(*remote_addr)(struct xfrm_state *, xfrm_address_t *);
 	/* Estimate maximal size of result of transformation of a dgram */
 	u32			(*get_max_size)(struct xfrm_state *, int size);
 };
@@ -273,7 +286,7 @@
 
 struct xfrm_mode {
 	int (*input)(struct xfrm_state *x, struct sk_buff *skb);
-	int (*output)(struct sk_buff *skb);
+	int (*output)(struct xfrm_state *x,struct sk_buff *skb);
 
 	struct module *owner;
 	unsigned int encap;
@@ -299,7 +312,7 @@
 
 	__u32			reqid;
 
-/* Mode: transport/tunnel */
+/* Mode: transport, tunnel etc. */
 	__u8			mode;
 
 /* Sharing mode: unique, this session only, this user only etc. */
@@ -314,18 +327,20 @@
 	__u32			calgos;
 };
 
-#define XFRM_MAX_DEPTH		4
+#define XFRM_MAX_DEPTH		6
 
 struct xfrm_policy
 {
 	struct xfrm_policy	*next;
-	struct list_head	list;
+	struct hlist_node	bydst;
+	struct hlist_node	byidx;
 
 	/* This lock only affects elements except for entry. */
 	rwlock_t		lock;
 	atomic_t		refcnt;
 	struct timer_list	timer;
 
+	u8			type;
 	u32			priority;
 	u32			index;
 	struct xfrm_selector	selector;
@@ -363,16 +378,16 @@
 	char			*id;
 	int			(*notify)(struct xfrm_state *x, struct km_event *c);
 	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
-	struct xfrm_policy	*(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir);
+	struct xfrm_policy	*(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c);
+	int			(*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 };
 
 extern int xfrm_register_km(struct xfrm_mgr *km);
 extern int xfrm_unregister_km(struct xfrm_mgr *km);
 
-
-extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
+extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
 {
@@ -388,67 +403,19 @@
 		__xfrm_policy_destroy(policy);
 }
 
-#define XFRM_DST_HSIZE		1024
-
-static __inline__
-unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
+#ifdef CONFIG_XFRM_SUB_POLICY
+static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
 {
-	unsigned h;
-	h = ntohl(addr->a4);
-	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
-	return h;
+	int i;
+	for (i = npols - 1; i >= 0; --i)
+		xfrm_pol_put(pols[i]);
 }
-
-static __inline__
-unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
+#else
+static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
 {
-	unsigned h;
-	h = ntohl(addr->a6[2]^addr->a6[3]);
-	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
-	return h;
+	xfrm_pol_put(pols[0]);
 }
-
-static __inline__
-unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
-{
-	switch (family) {
-	case AF_INET:
-		return __xfrm4_dst_hash(addr);
-	case AF_INET6:
-		return __xfrm6_dst_hash(addr);
-	}
-	return 0;
-}
-
-static __inline__
-unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
-{
-	unsigned h;
-	h = ntohl(addr->a4^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
-	return h;
-}
-
-static __inline__
-unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
-{
-	unsigned h;
-	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
-	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
-	return h;
-}
-
-static __inline__
-unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
-{
-	switch (family) {
-	case AF_INET:
-		return __xfrm4_spi_hash(addr, spi, proto);
-	case AF_INET6:
-		return __xfrm6_spi_hash(addr, spi, proto);
-	}
-	return 0;	/*XXX*/
-}
+#endif
 
 extern void __xfrm_state_destroy(struct xfrm_state *);
 
@@ -508,6 +475,11 @@
 	case IPPROTO_ICMPV6:
 		port = htons(fl->fl_icmp_type);
 		break;
+#ifdef CONFIG_IPV6_MIP6
+	case IPPROTO_MH:
+		port = htons(fl->fl_mh_type);
+		break;
+#endif
 	default:
 		port = 0;	/*XXX*/
 	}
@@ -608,6 +580,7 @@
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
+	u32 genid;
 	u32 route_mtu_cached;
 	u32 child_mtu_cached;
 	u32 route_cookie;
@@ -659,6 +632,18 @@
 }
 
 static inline int
+xfrm_addr_any(xfrm_address_t *addr, unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return addr->a4 == 0;
+	case AF_INET6:
+		return ipv6_addr_any((struct in6_addr *)&addr->a6);
+	}
+	return 0;
+}
+
+static inline int
 __xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
 {
 	return	(tmpl->saddr.a4 &&
@@ -692,8 +677,8 @@
 {
 	if (sk && sk->sk_policy[XFRM_POLICY_IN])
 		return __xfrm_policy_check(sk, dir, skb, family);
-		
-	return	(!xfrm_policy_list[dir] && !skb->sp) ||
+
+	return	(!xfrm_policy_count[dir] && !skb->sp) ||
 		(skb->dst->flags & DST_NOPOLICY) ||
 		__xfrm_policy_check(sk, dir, skb, family);
 }
@@ -713,7 +698,7 @@
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
-	return	!xfrm_policy_list[XFRM_POLICY_OUT] ||
+	return	!xfrm_policy_count[XFRM_POLICY_OUT] ||
 		(skb->dst->flags & DST_NOXFRM) ||
 		__xfrm_route_forward(skb, family);
 }
@@ -831,11 +816,36 @@
 	return 0;
 }
 
+static __inline__ int
+xfrm_state_addr_flow_check(struct xfrm_state *x, struct flowi *fl,
+			   unsigned short family)
+{
+	switch (family) {
+	case AF_INET:
+		return __xfrm4_state_addr_check(x,
+						(xfrm_address_t *)&fl->fl4_dst,
+						(xfrm_address_t *)&fl->fl4_src);
+	case AF_INET6:
+		return __xfrm6_state_addr_check(x,
+						(xfrm_address_t *)&fl->fl6_dst,
+						(xfrm_address_t *)&fl->fl6_src);
+	}
+	return 0;
+}
+
 static inline int xfrm_state_kern(struct xfrm_state *x)
 {
 	return atomic_read(&x->tunnel_users);
 }
 
+static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
+{
+	return (!userproto || proto == userproto ||
+		(userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH ||
+						  proto == IPPROTO_ESP ||
+						  proto == IPPROTO_COMP)));
+}
+
 /*
  * xfrm algorithm information
  */
@@ -855,6 +865,7 @@
 
 struct xfrm_algo_desc {
 	char *name;
+	char *compat;
 	u8 available:1;
 	union {
 		struct xfrm_algo_auth_info auth;
@@ -902,6 +913,25 @@
 extern int xfrm_state_add(struct xfrm_state *x);
 extern int xfrm_state_update(struct xfrm_state *x);
 extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family);
+extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
+			  int n, unsigned short family);
+extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
+			   int n, unsigned short family);
+#else
+static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src,
+				 int n, unsigned short family)
+{
+	return -ENOSYS;
+}
+
+static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src,
+				  int n, unsigned short family)
+{
+	return -ENOSYS;
+}
+#endif
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern void xfrm_state_flush(u8 proto);
@@ -917,12 +947,16 @@
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
 extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi);
 extern int xfrm6_rcv(struct sk_buff **pskb);
+extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+			    xfrm_address_t *saddr, u8 proto);
 extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler);
 extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler);
 extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
 extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr);
 extern u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr);
 extern int xfrm6_output(struct sk_buff *skb);
+extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+				 u8 **prevhdr);
 
 #ifdef CONFIG_XFRM
 extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type);
@@ -947,27 +981,27 @@
 #endif
 
 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
-extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
+extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete);
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
-void xfrm_policy_flush(void);
+struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete);
+void xfrm_policy_flush(u8 type);
 u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
 				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
 				  int create, unsigned short family);
-extern void xfrm_policy_flush(void);
+extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
-extern int xfrm_flush_bundles(void);
-extern void xfrm_flush_all_bundles(void);
-extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family);
+extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
 extern wait_queue_head_t km_waitq;
 extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
 extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid);
+extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 
 extern void xfrm_input_init(void);
 extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq);
@@ -984,11 +1018,13 @@
 extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe);
 extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe);
 
-struct crypto_tfm;
-typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int);
+struct hash_desc;
+struct scatterlist;
+typedef int (icv_update_fn_t)(struct hash_desc *, struct scatterlist *,
+			      unsigned int);
 
-extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
-			 int offset, int len, icv_update_fn_t icv_update);
+extern int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *tfm,
+			int offset, int len, icv_update_fn_t icv_update);
 
 static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b,
 				int family)
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
index eb710ba..e7c0432 100644
--- a/include/rdma/Kbuild
+++ b/include/rdma/Kbuild
@@ -1 +1 @@
-header-y := ib_user_mad.h
+header-y += ib_user_mad.h
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 0ff6739..81b62307 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -40,7 +40,7 @@
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
 	unsigned char broadcast[MAX_ADDR_LEN];
-	enum ib_node_type dev_type;
+	enum rdma_node_type dev_type;
 };
 
 /**
@@ -72,6 +72,9 @@
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr);
 
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+	      const unsigned char *dst_dev_addr);
+
 static inline int ip_addr_size(struct sockaddr *addr)
 {
 	return addr->sa_family == AF_INET6 ?
@@ -113,4 +116,16 @@
 	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
 }
 
+static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
+}
+
+static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+}
+
 #endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index c99e442..97715b0 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -36,8 +37,11 @@
 #ifndef IB_SA_H
 #define IB_SA_H
 
+#include <linux/completion.h>
 #include <linux/compiler.h>
 
+#include <asm/atomic.h>
+
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
 
@@ -79,8 +83,8 @@
 };
 
 enum ib_sa_selector {
-	IB_SA_GTE  = 0,
-	IB_SA_LTE  = 1,
+	IB_SA_GT   = 0,
+	IB_SA_LT   = 1,
 	IB_SA_EQ   = 2,
 	/*
 	 * The meaning of "best" depends on the attribute: for
@@ -250,11 +254,28 @@
 	u64		data64[2];
 };
 
+struct ib_sa_client {
+	atomic_t users;
+	struct completion comp;
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
 struct ib_sa_query;
 
 void ib_sa_cancel_query(int id, struct ib_sa_query *query);
 
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_path_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -264,7 +285,8 @@
 		       void *context,
 		       struct ib_sa_query **query);
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
 			     u8 method,
 			     struct ib_sa_mcmember_rec *rec,
 			     ib_sa_comp_mask comp_mask,
@@ -275,7 +297,8 @@
 			     void *context,
 			     struct ib_sa_query **query);
 
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			 struct ib_device *device, u8 port_num,
 			 u8 method,
 			 struct ib_sa_service_rec *rec,
 			 ib_sa_comp_mask comp_mask,
@@ -288,6 +311,7 @@
 
 /**
  * ib_sa_mcmember_rec_set - Start an MCMember set query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:MCMember Record to send in query
@@ -311,7 +335,8 @@
  * cancel the query.
  */
 static inline int
-ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num,
+ib_sa_mcmember_rec_set(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
 		       struct ib_sa_mcmember_rec *rec,
 		       ib_sa_comp_mask comp_mask,
 		       int timeout_ms, gfp_t gfp_mask,
@@ -321,7 +346,7 @@
 		       void *context,
 		       struct ib_sa_query **query)
 {
-	return ib_sa_mcmember_rec_query(device, port_num,
+	return ib_sa_mcmember_rec_query(client, device, port_num,
 					IB_MGMT_METHOD_SET,
 					rec, comp_mask,
 					timeout_ms, gfp_mask, callback,
@@ -330,6 +355,7 @@
 
 /**
  * ib_sa_mcmember_rec_delete - Start an MCMember delete query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:MCMember Record to send in query
@@ -353,7 +379,8 @@
  * cancel the query.
  */
 static inline int
-ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num,
+ib_sa_mcmember_rec_delete(struct ib_sa_client *client,
+			  struct ib_device *device, u8 port_num,
 			  struct ib_sa_mcmember_rec *rec,
 			  ib_sa_comp_mask comp_mask,
 			  int timeout_ms, gfp_t gfp_mask,
@@ -363,7 +390,7 @@
 			  void *context,
 			  struct ib_sa_query **query)
 {
-	return ib_sa_mcmember_rec_query(device, port_num,
+	return ib_sa_mcmember_rec_query(client, device, port_num,
 					IB_SA_METHOD_DELETE,
 					rec, comp_mask,
 					timeout_ms, gfp_mask, callback,
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 7b53720..db1b814 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -275,6 +275,8 @@
 
 struct ib_uverbs_resize_cq_resp {
 	__u32 cqe;
+	__u32 reserved;
+	__u64 driver_data[0];
 };
 
 struct ib_uverbs_poll_cq {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ee1f3a3..8eacc35 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -56,12 +56,22 @@
 	} global;
 };
 
-enum ib_node_type {
-	IB_NODE_CA 	= 1,
-	IB_NODE_SWITCH,
-	IB_NODE_ROUTER
+enum rdma_node_type {
+	/* IB values map to NodeInfo:NodeType. */
+	RDMA_NODE_IB_CA 	= 1,
+	RDMA_NODE_IB_SWITCH,
+	RDMA_NODE_IB_ROUTER,
+	RDMA_NODE_RNIC
 };
 
+enum rdma_transport_type {
+	RDMA_TRANSPORT_IB,
+	RDMA_TRANSPORT_IWARP
+};
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+
 enum ib_device_cap_flags {
 	IB_DEVICE_RESIZE_MAX_WR		= 1,
 	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1),
@@ -78,6 +88,9 @@
 	IB_DEVICE_RC_RNR_NAK_GEN	= (1<<12),
 	IB_DEVICE_SRQ_RESIZE		= (1<<13),
 	IB_DEVICE_N_NOTIFY_CQ		= (1<<14),
+	IB_DEVICE_ZERO_STAG		= (1<<15),
+	IB_DEVICE_SEND_W_INV		= (1<<16),
+	IB_DEVICE_MEM_WINDOW		= (1<<17)
 };
 
 enum ib_atomic_cap {
@@ -835,6 +848,8 @@
 	u8                     *lmc_cache;
 };
 
+struct iw_cm_verbs;
+
 struct ib_device {
 	struct device                *dma_device;
 
@@ -851,6 +866,8 @@
 
 	u32                           flags;
 
+	struct iw_cm_verbs	     *iwcm;
+
 	int		           (*query_device)(struct ib_device *device,
 						   struct ib_device_attr *device_attr);
 	int		           (*query_port)(struct ib_device *device,
@@ -888,7 +905,8 @@
 						 struct ib_udata *udata);
 	int                        (*modify_srq)(struct ib_srq *srq,
 						 struct ib_srq_attr *srq_attr,
-						 enum ib_srq_attr_mask srq_attr_mask);
+						 enum ib_srq_attr_mask srq_attr_mask,
+						 struct ib_udata *udata);
 	int                        (*query_srq)(struct ib_srq *srq,
 						struct ib_srq_attr *srq_attr);
 	int                        (*destroy_srq)(struct ib_srq *srq);
@@ -900,7 +918,8 @@
 						struct ib_udata *udata);
 	int                        (*modify_qp)(struct ib_qp *qp,
 						struct ib_qp_attr *qp_attr,
-						int qp_attr_mask);
+						int qp_attr_mask,
+						struct ib_udata *udata);
 	int                        (*query_qp)(struct ib_qp *qp,
 					       struct ib_qp_attr *qp_attr,
 					       int qp_attr_mask,
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
new file mode 100644
index 0000000..aeefa9b
--- /dev/null
+++ b/include/rdma/iw_cm.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <linux/in.h>
+#include <rdma/ib_cm.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+	IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+	IW_CM_EVENT_CONNECT_REPLY,	 /* reply from active connect request */
+	IW_CM_EVENT_ESTABLISHED,	 /* passive side accept successful */
+	IW_CM_EVENT_DISCONNECT,		 /* orderly shutdown */
+	IW_CM_EVENT_CLOSE		 /* close complete */
+};
+
+enum iw_cm_event_status {
+	IW_CM_EVENT_STATUS_OK = 0,	 /* request successful */
+	IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
+	IW_CM_EVENT_STATUS_REJECTED,	 /* connect request rejected */
+	IW_CM_EVENT_STATUS_TIMEOUT,	 /* the operation timed out */
+	IW_CM_EVENT_STATUS_RESET,	 /* reset from remote peer */
+	IW_CM_EVENT_STATUS_EINVAL,	 /* asynchronous failure for bad parm */
+};
+
+struct iw_cm_event {
+	enum iw_cm_event_type event;
+	enum iw_cm_event_status status;
+	struct sockaddr_in local_addr;
+	struct sockaddr_in remote_addr;
+	void *private_data;
+	u8 private_data_len;
+	void* provider_data;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM.  Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+				 struct iw_cm_event *event);
+
+struct iw_cm_id {
+	iw_cm_handler		cm_handler;      /* client callback function */
+	void		        *context;	 /* client cb context */
+	struct ib_device	*device;
+	struct sockaddr_in      local_addr;
+	struct sockaddr_in	remote_addr;
+	void			*provider_data;	 /* provider private data */
+	iw_event_handler        event_handler;   /* cb for provider
+						    events */
+	/* Used by provider to add and remove refs on IW cm_id */
+	void (*add_ref)(struct iw_cm_id *);
+	void (*rem_ref)(struct iw_cm_id *);
+};
+
+struct iw_cm_conn_param {
+	const void *private_data;
+	u16 private_data_len;
+	u32 ord;
+	u32 ird;
+	u32 qpn;
+};
+
+struct iw_cm_verbs {
+	void		(*add_ref)(struct ib_qp *qp);
+
+	void		(*rem_ref)(struct ib_qp *qp);
+
+	struct ib_qp *	(*get_qp)(struct ib_device *device,
+				  int qpn);
+
+	int		(*connect)(struct iw_cm_id *cm_id,
+				   struct iw_cm_conn_param *conn_param);
+
+	int		(*accept)(struct iw_cm_id *cm_id,
+				  struct iw_cm_conn_param *conn_param);
+
+	int		(*reject)(struct iw_cm_id *cm_id,
+				  const void *pdata, u8 pdata_len);
+
+	int		(*create_listen)(struct iw_cm_id *cm_id,
+					 int backlog);
+
+	int		(*destroy_listen)(struct iw_cm_id *cm_id);
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ *   requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ *   parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ *   reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+		 u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection  establishment
+ *   parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ *   connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ *   valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+#endif /* IW_CM_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 402c63d..deb5a0a 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -117,6 +117,14 @@
 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps);
 
+/**
+  * rdma_destroy_id - Destroys an RDMA identifier.
+  *
+  * @id: RDMA identifier.
+  *
+  * Note: calling this function has the effect of canceling in-flight
+  * asynchronous operations associated with the id.
+  */
 void rdma_destroy_id(struct rdma_cm_id *id);
 
 /**
@@ -237,6 +245,10 @@
  * Typically, this routine is only called by the listener to accept a connection
  * request.  It must also be called on the active side of a connection if the
  * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
  */
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
 
diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild
index 14a033d..744f850 100644
--- a/include/scsi/Kbuild
+++ b/include/scsi/Kbuild
@@ -1,2 +1,4 @@
 header-y += scsi.h
-unifdef-y := scsi_ioctl.h sg.h
+
+unifdef-y += scsi_ioctl.h
+unifdef-y += sg.h
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 41904f6..401192e 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -102,6 +102,8 @@
 	uint32_t		unsol_datasn;
 	int			imm_count;	/* imm-data (bytes)   */
 	int			unsol_count;	/* unsolicited (bytes)*/
+	/* offset in unsolicited stream (bytes); */
+	int			unsol_offset;
 	int			data_count;	/* remaining Data-Out */
 	struct scsi_cmnd	*sc;		/* associated SCSI cmd*/
 	int			total_length;
@@ -110,6 +112,7 @@
 
 	/* state set/tested under session->lock */
 	int			state;
+	atomic_t		refcount;
 	struct list_head	running;	/* running cmd list */
 	void			*dd_data;	/* driver/transport data */
 };
@@ -290,8 +293,7 @@
 extern int iscsi_check_assign_cmdsn(struct iscsi_session *,
 				    struct iscsi_nopin *);
 extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *,
-					struct iscsi_data *hdr,
-					int transport_data_cnt);
+					struct iscsi_data *hdr);
 extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *,
 				char *, uint32_t);
 extern int iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
new file mode 100644
index 0000000..8e39982
--- /dev/null
+++ b/include/scsi/libsas.h
@@ -0,0 +1,627 @@
+/*
+ * SAS host prototypes and structures header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#ifndef _LIBSAS_H_
+#define _LIBSAS_H_
+
+
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <scsi/sas.h>
+#include <linux/list.h>
+#include <asm/semaphore.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_transport_sas.h>
+
+struct block_device;
+
+enum sas_class {
+	SAS,
+	EXPANDER
+};
+
+enum sas_phy_role {
+	PHY_ROLE_NONE = 0,
+	PHY_ROLE_TARGET = 0x40,
+	PHY_ROLE_INITIATOR = 0x80,
+};
+
+enum sas_phy_type {
+        PHY_TYPE_PHYSICAL,
+        PHY_TYPE_VIRTUAL
+};
+
+/* The events are mnemonically described in sas_dump.c
+ * so when updating/adding events here, please also
+ * update the other file too.
+ */
+enum ha_event {
+	HAE_RESET             = 0U,
+	HA_NUM_EVENTS         = 1,
+};
+
+enum port_event {
+	PORTE_BYTES_DMAED     = 0U,
+	PORTE_BROADCAST_RCVD  = 1,
+	PORTE_LINK_RESET_ERR  = 2,
+	PORTE_TIMER_EVENT     = 3,
+	PORTE_HARD_RESET      = 4,
+	PORT_NUM_EVENTS       = 5,
+};
+
+enum phy_event {
+	PHYE_LOSS_OF_SIGNAL   = 0U,
+	PHYE_OOB_DONE         = 1,
+	PHYE_OOB_ERROR        = 2,
+	PHYE_SPINUP_HOLD      = 3, /* hot plug SATA, no COMWAKE sent */
+	PHY_NUM_EVENTS        = 4,
+};
+
+enum discover_event {
+	DISCE_DISCOVER_DOMAIN   = 0U,
+	DISCE_REVALIDATE_DOMAIN = 1,
+	DISCE_PORT_GONE         = 2,
+	DISC_NUM_EVENTS 	= 3,
+};
+
+/* ---------- Expander Devices ---------- */
+
+#define ETASK 0xFA
+
+#define to_dom_device(_obj) container_of(_obj, struct domain_device, dev_obj)
+#define to_dev_attr(_attr)  container_of(_attr, struct domain_dev_attribute,\
+                                         attr)
+
+enum routing_attribute {
+	DIRECT_ROUTING,
+	SUBTRACTIVE_ROUTING,
+	TABLE_ROUTING,
+};
+
+enum ex_phy_state {
+	PHY_EMPTY,
+	PHY_VACANT,
+	PHY_NOT_PRESENT,
+	PHY_DEVICE_DISCOVERED
+};
+
+struct ex_phy {
+	int    phy_id;
+
+	enum ex_phy_state phy_state;
+
+	enum sas_dev_type attached_dev_type;
+	enum sas_linkrate linkrate;
+
+	u8   attached_sata_host:1;
+	u8   attached_sata_dev:1;
+	u8   attached_sata_ps:1;
+
+	enum sas_proto attached_tproto;
+	enum sas_proto attached_iproto;
+
+	u8   attached_sas_addr[SAS_ADDR_SIZE];
+	u8   attached_phy_id;
+
+	u8   phy_change_count;
+	enum routing_attribute routing_attr;
+	u8   virtual:1;
+
+	int  last_da_index;
+
+	struct sas_phy *phy;
+	struct sas_port *port;
+};
+
+struct expander_device {
+	struct list_head children;
+
+	u16    ex_change_count;
+	u16    max_route_indexes;
+	u8     num_phys;
+	u8     configuring:1;
+	u8     conf_route_table:1;
+	u8     enclosure_logical_id[8];
+
+	struct ex_phy *ex_phy;
+	struct sas_port *parent_port;
+};
+
+/* ---------- SATA device ---------- */
+enum ata_command_set {
+        ATA_COMMAND_SET   = 0,
+        ATAPI_COMMAND_SET = 1,
+};
+
+struct sata_device {
+        enum   ata_command_set command_set;
+        struct smp_resp        rps_resp; /* report_phy_sata_resp */
+        __le16 *identify_device;
+        __le16 *identify_packet_device;
+
+        u8     port_no;        /* port number, if this is a PM (Port) */
+        struct list_head children; /* PM Ports if this is a PM */
+};
+
+/* ---------- Domain device ---------- */
+struct domain_device {
+        enum sas_dev_type dev_type;
+
+        enum sas_linkrate linkrate;
+        enum sas_linkrate min_linkrate;
+        enum sas_linkrate max_linkrate;
+
+        int  pathways;
+
+        struct domain_device *parent;
+        struct list_head siblings; /* devices on the same level */
+        struct asd_sas_port *port;        /* shortcut to root of the tree */
+
+        struct list_head dev_list_node;
+
+        enum sas_proto    iproto;
+        enum sas_proto    tproto;
+
+        struct sas_rphy *rphy;
+
+        u8  sas_addr[SAS_ADDR_SIZE];
+        u8  hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
+
+        u8  frame_rcvd[32];
+
+        union {
+                struct expander_device ex_dev;
+                struct sata_device     sata_dev; /* STP & directly attached */
+        };
+
+        void *lldd_dev;
+};
+
+struct sas_discovery {
+	spinlock_t disc_event_lock;
+	struct work_struct disc_work[DISC_NUM_EVENTS];
+	unsigned long    pending;
+	u8     fanout_sas_addr[8];
+	u8     eeds_a[8];
+	u8     eeds_b[8];
+	int    max_level;
+};
+
+
+/* The port struct is Class:RW, driver:RO */
+struct asd_sas_port {
+/* private: */
+	struct completion port_gone_completion;
+
+	struct sas_discovery disc;
+	struct domain_device *port_dev;
+	spinlock_t dev_list_lock;
+	struct list_head dev_list;
+	enum   sas_linkrate linkrate;
+
+	struct sas_phy *phy;
+	struct work_struct work;
+
+/* public: */
+	int id;
+
+	enum sas_class   class;
+	u8               sas_addr[SAS_ADDR_SIZE];
+	u8               attached_sas_addr[SAS_ADDR_SIZE];
+	enum sas_proto   iproto;
+	enum sas_proto   tproto;
+
+	enum sas_oob_mode oob_mode;
+
+	spinlock_t       phy_list_lock;
+	struct list_head phy_list;
+	int              num_phys;
+	u32              phy_mask;
+
+	struct sas_ha_struct *ha;
+
+	struct sas_port	*port;
+
+	void *lldd_port;	  /* not touched by the sas class code */
+};
+
+/* The phy pretty much is controlled by the LLDD.
+ * The class only reads those fields.
+ */
+struct asd_sas_phy {
+/* private: */
+	/* protected by ha->event_lock */
+	struct work_struct   port_events[PORT_NUM_EVENTS];
+	struct work_struct   phy_events[PHY_NUM_EVENTS];
+
+	unsigned long port_events_pending;
+	unsigned long phy_events_pending;
+
+	int error;
+
+	struct sas_phy *phy;
+
+/* public: */
+	/* The following are class:RO, driver:R/W */
+	int            enabled;	  /* must be set */
+
+	int            id;	  /* must be set */
+	enum sas_class class;
+	enum sas_proto iproto;
+	enum sas_proto tproto;
+
+	enum sas_phy_type  type;
+	enum sas_phy_role  role;
+	enum sas_oob_mode  oob_mode;
+	enum sas_linkrate linkrate;
+
+	u8   *sas_addr;		  /* must be set */
+	u8   attached_sas_addr[SAS_ADDR_SIZE]; /* class:RO, driver: R/W */
+
+	spinlock_t     frame_rcvd_lock;
+	u8             *frame_rcvd; /* must be set */
+	int            frame_rcvd_size;
+
+	spinlock_t     sas_prim_lock;
+	u32            sas_prim;
+
+	struct list_head port_phy_el; /* driver:RO */
+	struct asd_sas_port      *port; /* Class:RW, driver: RO */
+
+	struct sas_ha_struct *ha; /* may be set; the class sets it anyway */
+
+	void *lldd_phy;		  /* not touched by the sas_class_code */
+};
+
+struct scsi_core {
+	struct Scsi_Host *shost;
+
+	spinlock_t        task_queue_lock;
+	struct list_head  task_queue;
+	int               task_queue_size;
+
+	struct semaphore  queue_thread_sema;
+	int               queue_thread_kill;
+};
+
+struct sas_ha_struct {
+/* private: */
+	spinlock_t       event_lock;
+	struct work_struct ha_events[HA_NUM_EVENTS];
+	unsigned long	 pending;
+
+	struct scsi_core core;
+
+/* public: */
+	char *sas_ha_name;
+	struct pci_dev *pcidev;	  /* should be set */
+	struct module *lldd_module; /* should be set */
+
+	u8 *sas_addr;		  /* must be set */
+	u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
+
+	spinlock_t      phy_port_lock;
+	struct asd_sas_phy  **sas_phy; /* array of valid pointers, must be set */
+	struct asd_sas_port **sas_port; /* array of valid pointers, must be set */
+	int             num_phys; /* must be set, gt 0, static */
+
+	/* The class calls this to send a task for execution. */
+	int lldd_max_execute_num;
+	int lldd_queue_size;
+
+	/* LLDD calls these to notify the class of an event. */
+	void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event);
+	void (*notify_port_event)(struct asd_sas_phy *, enum port_event);
+	void (*notify_phy_event)(struct asd_sas_phy *, enum phy_event);
+
+	void *lldd_ha;		  /* not touched by sas class code */
+};
+
+#define SHOST_TO_SAS_HA(_shost) (*(struct sas_ha_struct **)(_shost)->hostdata)
+
+static inline struct domain_device *
+starget_to_domain_dev(struct scsi_target *starget) {
+	return starget->hostdata;
+}
+
+static inline struct domain_device *
+sdev_to_domain_dev(struct scsi_device *sdev) {
+	return starget_to_domain_dev(sdev->sdev_target);
+}
+
+static inline struct domain_device *
+cmd_to_domain_dev(struct scsi_cmnd *cmd)
+{
+	return sdev_to_domain_dev(cmd->device);
+}
+
+void sas_hash_addr(u8 *hashed, const u8 *sas_addr);
+
+/* Before calling a notify event, LLDD should use this function
+ * when the link is severed (possibly from its tasklet).
+ * The idea is that the Class only reads those, while the LLDD,
+ * can R/W these (thus avoiding a race).
+ */
+static inline void sas_phy_disconnected(struct asd_sas_phy *phy)
+{
+	phy->oob_mode = OOB_NOT_CONNECTED;
+	phy->linkrate = SAS_LINK_RATE_UNKNOWN;
+}
+
+/* ---------- Tasks ---------- */
+/*
+      service_response |  SAS_TASK_COMPLETE  |  SAS_TASK_UNDELIVERED |
+  exec_status          |                     |                       |
+  ---------------------+---------------------+-----------------------+
+       SAM_...         |         X           |                       |
+       DEV_NO_RESPONSE |         X           |           X           |
+       INTERRUPTED     |         X           |                       |
+       QUEUE_FULL      |                     |           X           |
+       DEVICE_UNKNOWN  |                     |           X           |
+       SG_ERR          |                     |           X           |
+  ---------------------+---------------------+-----------------------+
+ */
+
+enum service_response {
+	SAS_TASK_COMPLETE,
+	SAS_TASK_UNDELIVERED = -1,
+};
+
+enum exec_status {
+	SAM_GOOD         = 0,
+	SAM_CHECK_COND   = 2,
+	SAM_COND_MET     = 4,
+	SAM_BUSY         = 8,
+	SAM_INTERMEDIATE = 0x10,
+	SAM_IM_COND_MET  = 0x12,
+	SAM_RESV_CONFLICT= 0x14,
+	SAM_TASK_SET_FULL= 0x28,
+	SAM_ACA_ACTIVE   = 0x30,
+	SAM_TASK_ABORTED = 0x40,
+
+	SAS_DEV_NO_RESPONSE = 0x80,
+	SAS_DATA_UNDERRUN,
+	SAS_DATA_OVERRUN,
+	SAS_INTERRUPTED,
+	SAS_QUEUE_FULL,
+	SAS_DEVICE_UNKNOWN,
+	SAS_SG_ERR,
+	SAS_OPEN_REJECT,
+	SAS_OPEN_TO,
+	SAS_PROTO_RESPONSE,
+	SAS_PHY_DOWN,
+	SAS_NAK_R_ERR,
+	SAS_PENDING,
+	SAS_ABORTED_TASK,
+};
+
+/* When a task finishes with a response, the LLDD examines the
+ * response:
+ * 	- For an ATA task task_status_struct::stat is set to
+ * SAS_PROTO_RESPONSE, and the task_status_struct::buf is set to the
+ * contents of struct ata_task_resp.
+ * 	- For SSP tasks, if no data is present or status/TMF response
+ * is valid, task_status_struct::stat is set.  If data is present
+ * (SENSE data), the LLDD copies up to SAS_STATUS_BUF_SIZE, sets
+ * task_status_struct::buf_valid_size, and task_status_struct::stat is
+ * set to SAM_CHECK_COND.
+ *
+ * "buf" has format SCSI Sense for SSP task, or struct ata_task_resp
+ * for ATA task.
+ *
+ * "frame_len" is the total frame length, which could be more or less
+ * than actually copied.
+ *
+ * Tasks ending with response, always set the residual field.
+ */
+struct ata_task_resp {
+	u16  frame_len;
+	u8   ending_fis[24];	  /* dev to host or data-in */
+	u32  sstatus;
+	u32  serror;
+	u32  scontrol;
+	u32  sactive;
+};
+
+#define SAS_STATUS_BUF_SIZE 96
+
+struct task_status_struct {
+	enum service_response resp;
+	enum exec_status      stat;
+	int  buf_valid_size;
+
+	u8   buf[SAS_STATUS_BUF_SIZE];
+
+	u32  residual;
+	enum sas_open_rej_reason open_rej_reason;
+};
+
+/* ATA and ATAPI task queuable to a SAS LLDD.
+ */
+struct sas_ata_task {
+	struct host_to_dev_fis fis;
+	u8     atapi_packet[16];  /* 0 if not ATAPI task */
+
+	u8     retry_count;	  /* hardware retry, should be > 0 */
+
+	u8     dma_xfer:1;	  /* PIO:0 or DMA:1 */
+	u8     use_ncq:1;
+	u8     set_affil_pol:1;
+	u8     stp_affil_pol:1;
+
+	u8     device_control_reg_update:1;
+};
+
+struct sas_smp_task {
+	struct scatterlist smp_req;
+	struct scatterlist smp_resp;
+};
+
+enum task_attribute {
+	TASK_ATTR_SIMPLE = 0,
+	TASK_ATTR_HOQ    = 1,
+	TASK_ATTR_ORDERED= 2,
+	TASK_ATTR_ACA    = 4,
+};
+
+struct sas_ssp_task {
+	u8     retry_count;	  /* hardware retry, should be > 0 */
+
+	u8     LUN[8];
+	u8     enable_first_burst:1;
+	enum   task_attribute task_attr;
+	u8     task_prio;
+	u8     cdb[16];
+};
+
+struct sas_task {
+	struct domain_device *dev;
+	struct list_head      list;
+
+	spinlock_t   task_state_lock;
+	unsigned     task_state_flags;
+
+	enum   sas_proto      task_proto;
+
+	/* Used by the discovery code. */
+	struct timer_list     timer;
+	struct completion     completion;
+
+	union {
+		struct sas_ata_task ata_task;
+		struct sas_smp_task smp_task;
+		struct sas_ssp_task ssp_task;
+	};
+
+	struct scatterlist *scatter;
+	int    num_scatter;
+	u32    total_xfer_len;
+	u8     data_dir:2;	  /* Use PCI_DMA_... */
+
+	struct task_status_struct task_status;
+	void   (*task_done)(struct sas_task *);
+
+	void   *lldd_task;	  /* for use by LLDDs */
+	void   *uldd_task;
+};
+
+
+
+#define SAS_TASK_STATE_PENDING  1
+#define SAS_TASK_STATE_DONE     2
+#define SAS_TASK_STATE_ABORTED  4
+
+static inline struct sas_task *sas_alloc_task(unsigned long flags)
+{
+	extern kmem_cache_t *sas_task_cache;
+	struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags);
+
+	if (task) {
+		memset(task, 0, sizeof(*task));
+		INIT_LIST_HEAD(&task->list);
+		spin_lock_init(&task->task_state_lock);
+		task->task_state_flags = SAS_TASK_STATE_PENDING;
+		init_timer(&task->timer);
+		init_completion(&task->completion);
+	}
+
+	return task;
+}
+
+static inline void sas_free_task(struct sas_task *task)
+{
+	if (task) {
+		extern kmem_cache_t *sas_task_cache;
+		BUG_ON(!list_empty(&task->list));
+		kmem_cache_free(sas_task_cache, task);
+	}
+}
+
+struct sas_domain_function_template {
+	/* The class calls these to notify the LLDD of an event. */
+	void (*lldd_port_formed)(struct asd_sas_phy *);
+	void (*lldd_port_deformed)(struct asd_sas_phy *);
+
+	/* The class calls these when a device is found or gone. */
+	int  (*lldd_dev_found)(struct domain_device *);
+	void (*lldd_dev_gone)(struct domain_device *);
+
+	int (*lldd_execute_task)(struct sas_task *, int num,
+				 unsigned long gfp_flags);
+
+	/* Task Management Functions. Must be called from process context. */
+	int (*lldd_abort_task)(struct sas_task *);
+	int (*lldd_abort_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_aca)(struct domain_device *, u8 *lun);
+	int (*lldd_clear_task_set)(struct domain_device *, u8 *lun);
+	int (*lldd_I_T_nexus_reset)(struct domain_device *);
+	int (*lldd_lu_reset)(struct domain_device *, u8 *lun);
+	int (*lldd_query_task)(struct sas_task *);
+
+	/* Port and Adapter management */
+	int (*lldd_clear_nexus_port)(struct asd_sas_port *);
+	int (*lldd_clear_nexus_ha)(struct sas_ha_struct *);
+
+	/* Phy management */
+	int (*lldd_control_phy)(struct asd_sas_phy *, enum phy_func, void *);
+};
+
+extern int sas_register_ha(struct sas_ha_struct *);
+extern int sas_unregister_ha(struct sas_ha_struct *);
+
+extern int sas_queuecommand(struct scsi_cmnd *,
+		     void (*scsi_done)(struct scsi_cmnd *));
+extern int sas_target_alloc(struct scsi_target *);
+extern int sas_slave_alloc(struct scsi_device *);
+extern int sas_slave_configure(struct scsi_device *);
+extern void sas_slave_destroy(struct scsi_device *);
+extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
+extern int sas_change_queue_type(struct scsi_device *, int qt);
+extern int sas_bios_param(struct scsi_device *,
+			  struct block_device *,
+			  sector_t capacity, int *hsc);
+extern struct scsi_transport_template *
+sas_domain_attach_transport(struct sas_domain_function_template *);
+extern void sas_domain_release_transport(struct scsi_transport_template *);
+
+int  sas_discover_root_expander(struct domain_device *);
+
+void sas_init_ex_attr(void);
+
+int  sas_ex_revalidate_domain(struct domain_device *);
+
+void sas_unregister_domain_devices(struct asd_sas_port *port);
+void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *);
+int  sas_discover_event(struct asd_sas_port *, enum discover_event ev);
+
+int  sas_discover_sata(struct domain_device *);
+int  sas_discover_end_dev(struct domain_device *);
+
+void sas_unregister_dev(struct domain_device *);
+
+void sas_init_dev(struct domain_device *);
+
+#endif /* _SASLIB_H_ */
diff --git a/include/scsi/sas.h b/include/scsi/sas.h
new file mode 100644
index 0000000..2f4b6af
--- /dev/null
+++ b/include/scsi/sas.h
@@ -0,0 +1,631 @@
+/*
+ * SAS structures and definitions header file
+ *
+ * Copyright (C) 2005 Adaptec, Inc.  All rights reserved.
+ * Copyright (C) 2005 Luben Tuikov <luben_tuikov@adaptec.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#ifndef _SAS_H_
+#define _SAS_H_
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define SAS_ADDR_SIZE        8
+#define HASHED_SAS_ADDR_SIZE 3
+#define SAS_ADDR(_sa) ((unsigned long long) be64_to_cpu(*(__be64 *)(_sa)))
+
+#define SMP_REQUEST             0x40
+#define SMP_RESPONSE            0x41
+
+#define SSP_DATA                0x01
+#define SSP_XFER_RDY            0x05
+#define SSP_COMMAND             0x06
+#define SSP_RESPONSE            0x07
+#define SSP_TASK                0x16
+
+#define SMP_REPORT_GENERAL       0x00
+#define SMP_REPORT_MANUF_INFO    0x01
+#define SMP_READ_GPIO_REG        0x02
+#define SMP_DISCOVER             0x10
+#define SMP_REPORT_PHY_ERR_LOG   0x11
+#define SMP_REPORT_PHY_SATA      0x12
+#define SMP_REPORT_ROUTE_INFO    0x13
+#define SMP_WRITE_GPIO_REG       0x82
+#define SMP_CONF_ROUTE_INFO      0x90
+#define SMP_PHY_CONTROL          0x91
+#define SMP_PHY_TEST_FUNCTION    0x92
+
+#define SMP_RESP_FUNC_ACC        0x00
+#define SMP_RESP_FUNC_UNK        0x01
+#define SMP_RESP_FUNC_FAILED     0x02
+#define SMP_RESP_INV_FRM_LEN     0x03
+#define SMP_RESP_NO_PHY          0x10
+#define SMP_RESP_NO_INDEX        0x11
+#define SMP_RESP_PHY_NO_SATA     0x12
+#define SMP_RESP_PHY_UNK_OP      0x13
+#define SMP_RESP_PHY_UNK_TESTF   0x14
+#define SMP_RESP_PHY_TEST_INPROG 0x15
+#define SMP_RESP_PHY_VACANT      0x16
+
+/* SAM TMFs */
+#define TMF_ABORT_TASK      0x01
+#define TMF_ABORT_TASK_SET  0x02
+#define TMF_CLEAR_TASK_SET  0x04
+#define TMF_LU_RESET        0x08
+#define TMF_CLEAR_ACA       0x40
+#define TMF_QUERY_TASK      0x80
+
+/* SAS TMF responses */
+#define TMF_RESP_FUNC_COMPLETE   0x00
+#define TMF_RESP_INVALID_FRAME   0x02
+#define TMF_RESP_FUNC_ESUPP      0x04
+#define TMF_RESP_FUNC_FAILED     0x05
+#define TMF_RESP_FUNC_SUCC       0x08
+#define TMF_RESP_NO_LUN          0x09
+#define TMF_RESP_OVERLAPPED_TAG  0x0A
+
+enum sas_oob_mode {
+	OOB_NOT_CONNECTED,
+	SATA_OOB_MODE,
+	SAS_OOB_MODE
+};
+
+/* See sas_discover.c if you plan on changing these.
+ */
+enum sas_dev_type {
+	NO_DEVICE   = 0,	  /* protocol */
+	SAS_END_DEV = 1,	  /* protocol */
+	EDGE_DEV    = 2,	  /* protocol */
+	FANOUT_DEV  = 3,	  /* protocol */
+	SAS_HA      = 4,
+	SATA_DEV    = 5,
+	SATA_PM     = 7,
+	SATA_PM_PORT= 8,
+};
+
+/* Partly from IDENTIFY address frame. */
+enum sas_proto {
+	SATA_PROTO    = 1,
+	SAS_PROTO_SMP = 2,	  /* protocol */
+	SAS_PROTO_STP = 4,	  /* protocol */
+	SAS_PROTO_SSP = 8,	  /* protocol */
+	SAS_PROTO_ALL = 0xE,
+};
+
+/* From the spec; local phys only */
+enum phy_func {
+	PHY_FUNC_NOP,
+	PHY_FUNC_LINK_RESET,		  /* Enables the phy */
+	PHY_FUNC_HARD_RESET,
+	PHY_FUNC_DISABLE,
+	PHY_FUNC_CLEAR_ERROR_LOG = 5,
+	PHY_FUNC_CLEAR_AFFIL,
+	PHY_FUNC_TX_SATA_PS_SIGNAL,
+	PHY_FUNC_RELEASE_SPINUP_HOLD = 0x10, /* LOCAL PORT ONLY! */
+	PHY_FUNC_SET_LINK_RATE,
+};
+
+/* SAS LLDD would need to report only _very_few_ of those, like BROADCAST.
+ * Most of those are here for completeness.
+ */
+enum sas_prim {
+	SAS_PRIM_AIP_NORMAL = 1,
+	SAS_PRIM_AIP_R0     = 2,
+	SAS_PRIM_AIP_R1     = 3,
+	SAS_PRIM_AIP_R2     = 4,
+	SAS_PRIM_AIP_WC     = 5,
+	SAS_PRIM_AIP_WD     = 6,
+	SAS_PRIM_AIP_WP     = 7,
+	SAS_PRIM_AIP_RWP    = 8,
+
+	SAS_PRIM_BC_CH      = 9,
+	SAS_PRIM_BC_RCH0    = 10,
+	SAS_PRIM_BC_RCH1    = 11,
+	SAS_PRIM_BC_R0      = 12,
+	SAS_PRIM_BC_R1      = 13,
+	SAS_PRIM_BC_R2      = 14,
+	SAS_PRIM_BC_R3      = 15,
+	SAS_PRIM_BC_R4      = 16,
+
+	SAS_PRIM_NOTIFY_ENSP= 17,
+	SAS_PRIM_NOTIFY_R0  = 18,
+	SAS_PRIM_NOTIFY_R1  = 19,
+	SAS_PRIM_NOTIFY_R2  = 20,
+
+	SAS_PRIM_CLOSE_CLAF = 21,
+	SAS_PRIM_CLOSE_NORM = 22,
+	SAS_PRIM_CLOSE_R0   = 23,
+	SAS_PRIM_CLOSE_R1   = 24,
+
+	SAS_PRIM_OPEN_RTRY  = 25,
+	SAS_PRIM_OPEN_RJCT  = 26,
+	SAS_PRIM_OPEN_ACPT  = 27,
+
+	SAS_PRIM_DONE       = 28,
+	SAS_PRIM_BREAK      = 29,
+
+	SATA_PRIM_DMAT      = 33,
+	SATA_PRIM_PMNAK     = 34,
+	SATA_PRIM_PMACK     = 35,
+	SATA_PRIM_PMREQ_S   = 36,
+	SATA_PRIM_PMREQ_P   = 37,
+	SATA_SATA_R_ERR     = 38,
+};
+
+enum sas_open_rej_reason {
+	/* Abandon open */
+	SAS_OREJ_UNKNOWN   = 0,
+	SAS_OREJ_BAD_DEST  = 1,
+	SAS_OREJ_CONN_RATE = 2,
+	SAS_OREJ_EPROTO    = 3,
+	SAS_OREJ_RESV_AB0  = 4,
+	SAS_OREJ_RESV_AB1  = 5,
+	SAS_OREJ_RESV_AB2  = 6,
+	SAS_OREJ_RESV_AB3  = 7,
+	SAS_OREJ_WRONG_DEST= 8,
+	SAS_OREJ_STP_NORES = 9,
+
+	/* Retry open */
+	SAS_OREJ_NO_DEST   = 10,
+	SAS_OREJ_PATH_BLOCKED = 11,
+	SAS_OREJ_RSVD_CONT0 = 12,
+	SAS_OREJ_RSVD_CONT1 = 13,
+	SAS_OREJ_RSVD_INIT0 = 14,
+	SAS_OREJ_RSVD_INIT1 = 15,
+	SAS_OREJ_RSVD_STOP0 = 16,
+	SAS_OREJ_RSVD_STOP1 = 17,
+	SAS_OREJ_RSVD_RETRY = 18,
+};
+
+struct  dev_to_host_fis {
+	u8     fis_type;	  /* 0x34 */
+	u8     flags;
+	u8     status;
+	u8     error;
+
+	u8     lbal;
+	union { u8 lbam; u8 byte_count_low; };
+	union { u8 lbah; u8 byte_count_high; };
+	u8     device;
+
+	u8     lbal_exp;
+	u8     lbam_exp;
+	u8     lbah_exp;
+	u8     _r_a;
+
+	union { u8  sector_count; u8 interrupt_reason; };
+	u8     sector_count_exp;
+	u8     _r_b;
+	u8     _r_c;
+
+	u32    _r_d;
+} __attribute__ ((packed));
+
+struct host_to_dev_fis {
+	u8     fis_type;	  /* 0x27 */
+	u8     flags;
+	u8     command;
+	u8     features;
+
+	u8     lbal;
+	union { u8 lbam; u8 byte_count_low; };
+	union { u8 lbah; u8 byte_count_high; };
+	u8     device;
+
+	u8     lbal_exp;
+	u8     lbam_exp;
+	u8     lbah_exp;
+	u8     features_exp;
+
+	union { u8  sector_count; u8 interrupt_reason; };
+	u8     sector_count_exp;
+	u8     _r_a;
+	u8     control;
+
+	u32    _r_b;
+} __attribute__ ((packed));
+
+/* Prefer to have code clarity over header file clarity.
+ */
+#ifdef __LITTLE_ENDIAN_BITFIELD
+struct sas_identify_frame {
+	/* Byte 0 */
+	u8  frame_type:4;
+	u8  dev_type:3;
+	u8  _un0:1;
+
+	/* Byte 1 */
+	u8  _un1;
+
+	/* Byte 2 */
+	union {
+		struct {
+			u8  _un20:1;
+			u8  smp_iport:1;
+			u8  stp_iport:1;
+			u8  ssp_iport:1;
+			u8  _un247:4;
+		};
+		u8 initiator_bits;
+	};
+
+	/* Byte 3 */
+	union {
+		struct {
+			u8  _un30:1;
+			u8 smp_tport:1;
+			u8 stp_tport:1;
+			u8 ssp_tport:1;
+			u8 _un347:4;
+		};
+		u8 target_bits;
+	};
+
+	/* Byte 4 - 11 */
+	u8 _un4_11[8];
+
+	/* Byte 12 - 19 */
+	u8 sas_addr[SAS_ADDR_SIZE];
+
+	/* Byte 20 */
+	u8 phy_id;
+
+	u8 _un21_27[7];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct ssp_frame_hdr {
+	u8     frame_type;
+	u8     hashed_dest_addr[HASHED_SAS_ADDR_SIZE];
+	u8     _r_a;
+	u8     hashed_src_addr[HASHED_SAS_ADDR_SIZE];
+	__be16 _r_b;
+
+	u8     changing_data_ptr:1;
+	u8     retransmit:1;
+	u8     retry_data_frames:1;
+	u8     _r_c:5;
+
+	u8     num_fill_bytes:2;
+	u8     _r_d:6;
+
+	u32    _r_e;
+	__be16 tag;
+	__be16 tptt;
+	__be32 data_offs;
+} __attribute__ ((packed));
+
+struct ssp_response_iu {
+	u8     _r_a[10];
+
+	u8     datapres:2;
+	u8     _r_b:6;
+
+	u8     status;
+
+	u32    _r_c;
+
+	__be32 sense_data_len;
+	__be32 response_data_len;
+
+	u8     resp_data[0];
+	u8     sense_data[0];
+} __attribute__ ((packed));
+
+/* ---------- SMP ---------- */
+
+struct report_general_resp {
+	__be16  change_count;
+	__be16  route_indexes;
+	u8      _r_a;
+	u8      num_phys;
+
+	u8      conf_route_table:1;
+	u8      configuring:1;
+	u8      _r_b:6;
+
+	u8      _r_c;
+
+	u8      enclosure_logical_id[8];
+
+	u8      _r_d[12];
+} __attribute__ ((packed));
+
+struct discover_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	__be16 _r_b;
+
+	u8    _r_c:4;
+	u8    attached_dev_type:3;
+	u8    _r_d:1;
+
+	u8    linkrate:4;
+	u8    _r_e:4;
+
+	u8    attached_sata_host:1;
+	u8    iproto:3;
+	u8    _r_f:4;
+
+	u8    attached_sata_dev:1;
+	u8    tproto:3;
+	u8    _r_g:3;
+	u8    attached_sata_ps:1;
+
+	u8    sas_addr[8];
+	u8    attached_sas_addr[8];
+	u8    attached_phy_id;
+
+	u8    _r_h[7];
+
+	u8    hmin_linkrate:4;
+	u8    pmin_linkrate:4;
+	u8    hmax_linkrate:4;
+	u8    pmax_linkrate:4;
+
+	u8    change_count;
+
+	u8    pptv:4;
+	u8    _r_i:3;
+	u8    virtual:1;
+
+	u8    routing_attr:4;
+	u8    _r_j:4;
+
+	u8    conn_type;
+	u8    conn_el_index;
+	u8    conn_phy_link;
+
+	u8    _r_k[8];
+} __attribute__ ((packed));
+
+struct report_phy_sata_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	u8    _r_b;
+
+	u8    affil_valid:1;
+	u8    affil_supp:1;
+	u8    _r_c:6;
+
+	u32    _r_d;
+
+	u8    stp_sas_addr[8];
+
+	struct dev_to_host_fis fis;
+
+	u32   _r_e;
+
+	u8    affil_stp_ini_addr[8];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct smp_resp {
+	u8    frame_type;
+	u8    function;
+	u8    result;
+	u8    reserved;
+	union {
+		struct report_general_resp  rg;
+		struct discover_resp        disc;
+		struct report_phy_sata_resp rps;
+	};
+} __attribute__ ((packed));
+
+#elif defined(__BIG_ENDIAN_BITFIELD)
+struct sas_identify_frame {
+	/* Byte 0 */
+	u8  _un0:1;
+	u8  dev_type:3;
+	u8  frame_type:4;
+
+	/* Byte 1 */
+	u8  _un1;
+
+	/* Byte 2 */
+	union {
+		struct {
+			u8  _un247:4;
+			u8  ssp_iport:1;
+			u8  stp_iport:1;
+			u8  smp_iport:1;
+			u8  _un20:1;
+		};
+		u8 initiator_bits;
+	};
+
+	/* Byte 3 */
+	union {
+		struct {
+			u8 _un347:4;
+			u8 ssp_tport:1;
+			u8 stp_tport:1;
+			u8 smp_tport:1;
+			u8 _un30:1;
+		};
+		u8 target_bits;
+	};
+
+	/* Byte 4 - 11 */
+	u8 _un4_11[8];
+
+	/* Byte 12 - 19 */
+	u8 sas_addr[SAS_ADDR_SIZE];
+
+	/* Byte 20 */
+	u8 phy_id;
+
+	u8 _un21_27[7];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct ssp_frame_hdr {
+	u8     frame_type;
+	u8     hashed_dest_addr[HASHED_SAS_ADDR_SIZE];
+	u8     _r_a;
+	u8     hashed_src_addr[HASHED_SAS_ADDR_SIZE];
+	__be16 _r_b;
+
+	u8     _r_c:5;
+	u8     retry_data_frames:1;
+	u8     retransmit:1;
+	u8     changing_data_ptr:1;
+
+	u8     _r_d:6;
+	u8     num_fill_bytes:2;
+
+	u32    _r_e;
+	__be16 tag;
+	__be16 tptt;
+	__be32 data_offs;
+} __attribute__ ((packed));
+
+struct ssp_response_iu {
+	u8     _r_a[10];
+
+	u8     _r_b:6;
+	u8     datapres:2;
+
+	u8     status;
+
+	u32    _r_c;
+
+	__be32 sense_data_len;
+	__be32 response_data_len;
+
+	u8     resp_data[0];
+	u8     sense_data[0];
+} __attribute__ ((packed));
+
+/* ---------- SMP ---------- */
+
+struct report_general_resp {
+	__be16  change_count;
+	__be16  route_indexes;
+	u8      _r_a;
+	u8      num_phys;
+
+	u8      _r_b:6;
+	u8      configuring:1;
+	u8      conf_route_table:1;
+
+	u8      _r_c;
+
+	u8      enclosure_logical_id[8];
+
+	u8      _r_d[12];
+} __attribute__ ((packed));
+
+struct discover_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	__be16 _r_b;
+
+	u8    _r_d:1;
+	u8    attached_dev_type:3;
+	u8    _r_c:4;
+
+	u8    _r_e:4;
+	u8    linkrate:4;
+
+	u8    _r_f:4;
+	u8    iproto:3;
+	u8    attached_sata_host:1;
+
+	u8    attached_sata_ps:1;
+	u8    _r_g:3;
+	u8    tproto:3;
+	u8    attached_sata_dev:1;
+
+	u8    sas_addr[8];
+	u8    attached_sas_addr[8];
+	u8    attached_phy_id;
+
+	u8    _r_h[7];
+
+	u8    pmin_linkrate:4;
+	u8    hmin_linkrate:4;
+	u8    pmax_linkrate:4;
+	u8    hmax_linkrate:4;
+
+	u8    change_count;
+
+	u8    virtual:1;
+	u8    _r_i:3;
+	u8    pptv:4;
+
+	u8    _r_j:4;
+	u8    routing_attr:4;
+
+	u8    conn_type;
+	u8    conn_el_index;
+	u8    conn_phy_link;
+
+	u8    _r_k[8];
+} __attribute__ ((packed));
+
+struct report_phy_sata_resp {
+	u8    _r_a[5];
+
+	u8    phy_id;
+	u8    _r_b;
+
+	u8    _r_c:6;
+	u8    affil_supp:1;
+	u8    affil_valid:1;
+
+	u32   _r_d;
+
+	u8    stp_sas_addr[8];
+
+	struct dev_to_host_fis fis;
+
+	u32   _r_e;
+
+	u8    affil_stp_ini_addr[8];
+
+	__be32 crc;
+} __attribute__ ((packed));
+
+struct smp_resp {
+	u8    frame_type;
+	u8    function;
+	u8    result;
+	u8    reserved;
+	union {
+		struct report_general_resp  rg;
+		struct discover_resp        disc;
+		struct report_phy_sata_resp rps;
+	};
+} __attribute__ ((packed));
+
+#else
+#error "Bitfield order not defined!"
+#endif
+
+#endif /* _SAS_H_ */
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index c60b8ff..84a6d5f 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -25,13 +25,6 @@
 #define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7]
 
 /*
- *	SCSI device types
- */
-
-#define MAX_SCSI_DEVICE_CODE 15
-extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
-
-/*
  * Special value for scanning to specify scanning or rescanning of all
  * possible channels, (target) ids, or luns on a given shost.
  */
@@ -225,6 +218,9 @@
 #define TYPE_RBC	    0x0e
 #define TYPE_NO_LUN         0x7f
 
+/* Returns a human-readable name for the device */
+extern const char * scsi_device_type(unsigned type);
+
 /*
  * standard mode-select header prepended to all mode-select commands
  */
@@ -433,4 +429,10 @@
 /* Used to obtain the PCI location of a device */
 #define SCSI_IOCTL_GET_PCI		0x5387
 
+/* Pull a u32 out of a SCSI message (using BE SCSI conventions) */
+static inline u32 scsi_to_u32(u8 *ptr)
+{
+	return (ptr[0]<<24) + (ptr[1]<<16) + (ptr[2]<<8) + ptr[3];
+}
+
 #endif /* _SCSI_SCSI_H */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 58e6444..be117f8 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -118,20 +118,6 @@
 	unsigned long pid;	/* Process ID, starts at 0. Unique per host. */
 };
 
-/*
- * These are the values that scsi_cmd->state can take.
- */
-#define SCSI_STATE_TIMEOUT         0x1000
-#define SCSI_STATE_FINISHED        0x1001
-#define SCSI_STATE_FAILED          0x1002
-#define SCSI_STATE_QUEUED          0x1003
-#define SCSI_STATE_UNUSED          0x1006
-#define SCSI_STATE_DISCONNECTING   0x1008
-#define SCSI_STATE_INITIALIZING    0x1009
-#define SCSI_STATE_BHQUEUE         0x100a
-#define SCSI_STATE_MLQUEUE         0x100b
-
-
 extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t);
 extern void scsi_put_command(struct scsi_cmnd *);
 extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b3dd90f..39c6f8c 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -16,6 +16,7 @@
 struct Scsi_Host;
 struct scsi_host_cmd_pool;
 struct scsi_transport_template;
+struct blk_queue_tags;
 
 
 /*
@@ -466,6 +467,12 @@
 	struct scsi_transport_template *transportt;
 
 	/*
+	 * area to keep a shared tag map (if needed, will be
+	 * NULL if not)
+	 */
+	struct blk_queue_tag	*bqt;
+
+	/*
 	 * The following two fields are protected with host_lock;
 	 * however, eh routines can safely access during eh processing
 	 * without acquiring the lock.
diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h
new file mode 100644
index 0000000..8c1470c
--- /dev/null
+++ b/include/scsi/scsi_netlink.h
@@ -0,0 +1,87 @@
+/*
+ *  SCSI Transport Netlink Interface
+ *    Used for the posting of outbound SCSI transport events
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef SCSI_NETLINK_H
+#define SCSI_NETLINK_H
+
+/*
+ * This file intended to be included by both kernel and user space
+ */
+
+/* Single Netlink Message type to send all SCSI Transport messages */
+#define SCSI_TRANSPORT_MSG		NLMSG_MIN_TYPE + 1
+
+/* SCSI Transport Broadcast Groups */
+	/* leaving groups 0 and 1 unassigned */
+#define SCSI_NL_GRP_FC_EVENTS		(1<<2)		/* Group 2 */
+#define SCSI_NL_GRP_CNT			3
+
+
+/* SCSI_TRANSPORT_MSG event message header */
+struct scsi_nl_hdr {
+	uint8_t version;
+	uint8_t transport;
+	uint16_t magic;
+	uint16_t msgtype;
+	uint16_t msglen;
+} __attribute__((aligned(sizeof(uint64_t))));
+
+/* scsi_nl_hdr->version value */
+#define SCSI_NL_VERSION				1
+
+/* scsi_nl_hdr->magic value */
+#define SCSI_NL_MAGIC				0xA1B2
+
+/* scsi_nl_hdr->transport value */
+#define SCSI_NL_TRANSPORT			0
+#define SCSI_NL_TRANSPORT_FC			1
+#define SCSI_NL_MAX_TRANSPORTS			2
+
+/* scsi_nl_hdr->msgtype values are defined in each transport */
+
+
+/*
+ * Vendor ID:
+ *   If transports post vendor-unique events, they must pass a well-known
+ *   32-bit vendor identifier. This identifier consists of 8 bits indicating
+ *   the "type" of identifier contained, and 24 bits of id data.
+ *
+ *   Identifiers for each type:
+ *    PCI :  ID data is the 16 bit PCI Registered Vendor ID
+ */
+#define SCSI_NL_VID_TYPE_SHIFT		56
+#define SCSI_NL_VID_TYPE_MASK		((u64)0xFF << SCSI_NL_VID_TYPE_SHIFT)
+#define SCSI_NL_VID_TYPE_PCI		((u64)0x01 << SCSI_NL_VID_TYPE_SHIFT)
+#define SCSI_NL_VID_ID_MASK		(~ SCSI_NL_VID_TYPE_MASK)
+
+
+#define INIT_SCSI_NL_HDR(hdr, t, mtype, mlen)			\
+	{							\
+	(hdr)->version = SCSI_NL_VERSION;			\
+	(hdr)->transport = t;					\
+	(hdr)->magic = SCSI_NL_MAGIC;				\
+	(hdr)->msgtype = mtype;					\
+	(hdr)->msglen = mlen;					\
+	}
+
+
+#endif /* SCSI_NETLINK_H */
+
diff --git a/include/scsi/scsi_netlink_fc.h b/include/scsi/scsi_netlink_fc.h
new file mode 100644
index 0000000..cbf76e4
--- /dev/null
+++ b/include/scsi/scsi_netlink_fc.h
@@ -0,0 +1,71 @@
+/*
+ *  FC Transport Netlink Interface
+ *
+ *  Copyright (C) 2006   James Smart, Emulex Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef SCSI_NETLINK_FC_H
+#define SCSI_NETLINK_FC_H
+
+#include <scsi/scsi_netlink.h>
+
+/*
+ * This file intended to be included by both kernel and user space
+ */
+
+/*
+ * FC Transport Message Types
+ */
+	/* kernel -> user */
+#define FC_NL_ASYNC_EVENT			0x0100
+	/* user -> kernel */
+/* none */
+
+
+/*
+ * Message Structures :
+ */
+
+/* macro to round up message lengths to 8byte boundary */
+#define FC_NL_MSGALIGN(len)		(((len) + 7) & ~7)
+
+
+/*
+ * FC Transport Broadcast Event Message :
+ *   FC_NL_ASYNC_EVENT
+ *
+ * Note: if Vendor Unique message, &event_data will be  start of
+ * 	 vendor unique payload, and the length of the payload is
+ *       per event_datalen
+ *
+ * Note: When specifying vendor_id, be sure to read the Vendor Type and ID
+ *   formatting requirements specified in scsi_netlink.h
+ */
+struct fc_nl_event {
+	struct scsi_nl_hdr snlh;		/* must be 1st element ! */
+	uint64_t seconds;
+	uint64_t vendor_id;
+	uint16_t host_no;
+	uint16_t event_datalen;
+	uint32_t event_num;
+	uint32_t event_code;
+	uint32_t event_data;
+} __attribute__((aligned(sizeof(uint64_t))));
+
+
+#endif /* SCSI_NETLINK_FC_H */
+
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index e47e36a..d04d05a 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -4,6 +4,7 @@
 #include <linux/blkdev.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
 
 
 #define MSG_SIMPLE_TAG	0x20
@@ -66,7 +67,8 @@
 		return;
 
 	if (!blk_queue_tagged(sdev->request_queue))
-		blk_queue_init_tags(sdev->request_queue, depth, NULL);
+		blk_queue_init_tags(sdev->request_queue, depth,
+				    sdev->host->bqt);
 
 	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
 }
@@ -131,4 +133,15 @@
 	return sdev->current_cmnd;
 }
 
+/**
+ * scsi_init_shared_tag_map - create a shared tag map
+ * @shost:	the host to share the tag map among all devices
+ * @depth:	the total depth of the map
+ */
+static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
+{
+	shost->bqt = blk_init_tags(depth);
+	return shost->bqt ? 0 : -ENOMEM;
+}
+
 #endif /* _SCSI_SCSI_TCQ_H */
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 6d28b03..fd35232 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -29,6 +29,7 @@
 
 #include <linux/sched.h>
 #include <scsi/scsi.h>
+#include <scsi/scsi_netlink.h>
 
 struct scsi_transport_template;
 
@@ -194,6 +195,7 @@
 	u32 roles;
 	enum fc_port_state port_state;	/* Will only be ONLINE or UNKNOWN */
 	u32 scsi_target_id;
+	u32 fast_io_fail_tmo;
 
 	/* exported data */
 	void *dd_data;			/* Used for driver-specific storage */
@@ -206,6 +208,7 @@
 	struct device dev;
  	struct work_struct dev_loss_work;
  	struct work_struct scan_work;
+ 	struct work_struct fail_io_work;
  	struct work_struct stgt_delete_work;
 	struct work_struct rport_delete_work;
 } __attribute__((aligned(sizeof(unsigned long))));
@@ -284,6 +287,30 @@
 
 
 /*
+ * FC Event Codes - Polled and Async, following FC HBAAPI v2.0 guidelines
+ */
+
+/*
+ * fc_host_event_code: If you alter this, you also need to alter
+ * scsi_transport_fc.c (for the ascii descriptions).
+ */
+enum fc_host_event_code  {
+	FCH_EVT_LIP			= 0x1,
+	FCH_EVT_LINKUP			= 0x2,
+	FCH_EVT_LINKDOWN		= 0x3,
+	FCH_EVT_LIPRESET		= 0x4,
+	FCH_EVT_RSCN			= 0x5,
+	FCH_EVT_ADAPTER_CHANGE		= 0x103,
+	FCH_EVT_PORT_UNKNOWN		= 0x200,
+	FCH_EVT_PORT_OFFLINE		= 0x201,
+	FCH_EVT_PORT_ONLINE		= 0x202,
+	FCH_EVT_PORT_FABRIC		= 0x204,
+	FCH_EVT_LINK_UNKNOWN		= 0x500,
+	FCH_EVT_VENDOR_UNIQUE		= 0xffff,
+};
+
+
+/*
  * FC Local Port (Host) Attributes
  *
  * Attributes are based on HBAAPI V2.0 definitions.
@@ -312,7 +339,6 @@
 	u64 permanent_port_name;
 	u32 supported_classes;
 	u8  supported_fc4s[FC_FC4_LIST_SIZE];
-	char symbolic_name[FC_SYMBOLIC_NAME_SIZE];
 	u32 supported_speeds;
 	u32 maxframe_size;
 	char serial_number[FC_SERIAL_NUMBER_SIZE];
@@ -324,6 +350,8 @@
 	u8  active_fc4s[FC_FC4_LIST_SIZE];
 	u32 speed;
 	u64 fabric_name;
+	char symbolic_name[FC_SYMBOLIC_NAME_SIZE];
+	char system_hostname[FC_SYMBOLIC_NAME_SIZE];
 
 	/* Private (Transport-managed) Attributes */
 	enum fc_tgtid_binding_type  tgtid_bind_type;
@@ -354,8 +382,6 @@
 	(((struct fc_host_attrs *)(x)->shost_data)->supported_classes)
 #define fc_host_supported_fc4s(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->supported_fc4s)
-#define fc_host_symbolic_name(x)	\
-	(((struct fc_host_attrs *)(x)->shost_data)->symbolic_name)
 #define fc_host_supported_speeds(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->supported_speeds)
 #define fc_host_maxframe_size(x)	\
@@ -374,6 +400,10 @@
 	(((struct fc_host_attrs *)(x)->shost_data)->speed)
 #define fc_host_fabric_name(x)	\
 	(((struct fc_host_attrs *)(x)->shost_data)->fabric_name)
+#define fc_host_symbolic_name(x)	\
+	(((struct fc_host_attrs *)(x)->shost_data)->symbolic_name)
+#define fc_host_system_hostname(x)	\
+	(((struct fc_host_attrs *)(x)->shost_data)->system_hostname)
 #define fc_host_tgtid_bind_type(x) \
 	(((struct fc_host_attrs *)(x)->shost_data)->tgtid_bind_type)
 #define fc_host_rports(x) \
@@ -409,12 +439,17 @@
 	void	(*get_host_active_fc4s)(struct Scsi_Host *);
 	void	(*get_host_speed)(struct Scsi_Host *);
 	void	(*get_host_fabric_name)(struct Scsi_Host *);
+	void	(*get_host_symbolic_name)(struct Scsi_Host *);
+	void	(*set_host_system_hostname)(struct Scsi_Host *);
 
 	struct fc_host_statistics * (*get_fc_host_stats)(struct Scsi_Host *);
 	void	(*reset_fc_host_stats)(struct Scsi_Host *);
 
 	int	(*issue_fc_host_lip)(struct Scsi_Host *);
 
+	void    (*dev_loss_tmo_callbk)(struct fc_rport *);
+	void	(*terminate_rport_io)(struct fc_rport *);
+
 	/* allocation lengths for host-specific data */
 	u32	 			dd_fcrport_size;
 
@@ -445,7 +480,6 @@
 	unsigned long	show_host_permanent_port_name:1;
 	unsigned long	show_host_supported_classes:1;
 	unsigned long	show_host_supported_fc4s:1;
-	unsigned long	show_host_symbolic_name:1;
 	unsigned long	show_host_supported_speeds:1;
 	unsigned long	show_host_maxframe_size:1;
 	unsigned long	show_host_serial_number:1;
@@ -456,6 +490,8 @@
 	unsigned long	show_host_active_fc4s:1;
 	unsigned long	show_host_speed:1;
 	unsigned long	show_host_fabric_name:1;
+	unsigned long	show_host_symbolic_name:1;
+	unsigned long	show_host_system_hostname:1;
 };
 
 
@@ -491,6 +527,25 @@
 	return result;
 }
 
+static inline u64 wwn_to_u64(u8 *wwn)
+{
+	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
+	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
+	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
+	    (u64)wwn[6] <<  8 | (u64)wwn[7];
+}
+
+static inline void u64_to_wwn(u64 inm, u8 *wwn)
+{
+	wwn[0] = (inm >> 56) & 0xff;
+	wwn[1] = (inm >> 48) & 0xff;
+	wwn[2] = (inm >> 40) & 0xff;
+	wwn[3] = (inm >> 32) & 0xff;
+	wwn[4] = (inm >> 24) & 0xff;
+	wwn[5] = (inm >> 16) & 0xff;
+	wwn[6] = (inm >> 8) & 0xff;
+	wwn[7] = inm & 0xff;
+}
 
 struct scsi_transport_template *fc_attach_transport(
 			struct fc_function_template *);
@@ -501,13 +556,14 @@
 void fc_remote_port_delete(struct fc_rport  *rport);
 void fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles);
 int scsi_is_fc_rport(const struct device *);
-
-static inline u64 wwn_to_u64(u8 *wwn)
-{
-	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
-	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
-	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
-	    (u64)wwn[6] <<  8 | (u64)wwn[7];
-}
+u32 fc_get_event_number(void);
+void fc_host_post_event(struct Scsi_Host *shost, u32 event_number,
+		enum fc_host_event_code event_code, u32 event_data);
+void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number,
+		u32 data_len, char * data_buf, u64 vendor_id);
+	/* Note: when specifying vendor_id to fc_host_post_vendor_event()
+	 *   be sure to read the Vendor Type and ID formatting requirements
+	 *   specified in scsi_netlink.h
+	 */
 
 #endif /* SCSI_TRANSPORT_FC_H */
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 6cc2314..5302437 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -24,15 +24,23 @@
 };
 
 enum sas_linkrate {
-	SAS_LINK_RATE_UNKNOWN,
-	SAS_PHY_DISABLED,
-	SAS_LINK_RATE_FAILED,
-	SAS_SATA_SPINUP_HOLD,
-	SAS_SATA_PORT_SELECTOR,
-	SAS_LINK_RATE_1_5_GBPS,
-	SAS_LINK_RATE_3_0_GBPS,
-	SAS_LINK_RATE_6_0_GBPS,
-	SAS_LINK_VIRTUAL,
+	/* These Values are defined in the SAS standard */
+	SAS_LINK_RATE_UNKNOWN = 0,
+	SAS_PHY_DISABLED = 1,
+	SAS_PHY_RESET_PROBLEM = 2,
+	SAS_SATA_SPINUP_HOLD = 3,
+	SAS_SATA_PORT_SELECTOR = 4,
+	SAS_PHY_RESET_IN_PROGRESS = 5,
+	SAS_LINK_RATE_1_5_GBPS = 8,
+	SAS_LINK_RATE_G1 = SAS_LINK_RATE_1_5_GBPS,
+	SAS_LINK_RATE_3_0_GBPS = 9,
+	SAS_LINK_RATE_G2 = SAS_LINK_RATE_3_0_GBPS,
+	SAS_LINK_RATE_6_0_GBPS = 10,
+	/* These are virtual to the transport class and may never
+	 * be signalled normally since the standard defined field
+	 * is only 4 bits */
+	SAS_LINK_RATE_FAILED = 0x10,
+	SAS_PHY_VIRTUAL = 0x11,
 };
 
 struct sas_identify {
@@ -57,9 +65,6 @@
 	enum sas_linkrate	maximum_linkrate_hw;
 	enum sas_linkrate	maximum_linkrate;
 
-	/* internal state */
-	unsigned int		local_attached : 1;
-
 	/* link error statistics */
 	u32			invalid_dword_count;
 	u32			running_disparity_error_count;
@@ -145,12 +150,18 @@
 #define transport_class_to_sas_port(cdev) \
 	dev_to_sas_port((cdev)->dev)
 
+struct sas_phy_linkrates {
+	enum sas_linkrate maximum_linkrate;
+	enum sas_linkrate minimum_linkrate;
+};
+
 /* The functions by which the transport class and the driver communicate */
 struct sas_function_template {
 	int (*get_linkerrors)(struct sas_phy *);
 	int (*get_enclosure_identifier)(struct sas_rphy *, u64 *);
 	int (*get_bay_identifier)(struct sas_rphy *);
 	int (*phy_reset)(struct sas_phy *, int);
+	int (*set_phy_speed)(struct sas_phy *, struct sas_phy_linkrates *);
 };
 
 
@@ -196,4 +207,6 @@
 		rphy->identify.device_type == SAS_EDGE_EXPANDER_DEVICE;
 }
 
+#define scsi_is_sas_phy_local(phy)	scsi_is_host_device((phy)->dev.parent)
+
 #endif /* SCSI_TRANSPORT_SAS_H */
diff --git a/include/scsi/scsi_transport_spi.h b/include/scsi/scsi_transport_spi.h
index 302680c..da180f7 100644
--- a/include/scsi/scsi_transport_spi.h
+++ b/include/scsi/scsi_transport_spi.h
@@ -53,7 +53,8 @@
 	unsigned int support_ius; /* support Information Units */
 	unsigned int support_qas; /* supports quick arbitration and selection */
 	/* Private Fields */
-	unsigned int dv_pending:1; /* Internal flag */
+	unsigned int dv_pending:1; /* Internal flag: DV Requested */
+	unsigned int dv_in_progress:1;	/* Internal: DV started */
 	struct mutex dv_mutex; /* semaphore to serialise dv */
 };
 
diff --git a/include/sound/Kbuild b/include/sound/Kbuild
index 3a5a3df..fd054a3 100644
--- a/include/sound/Kbuild
+++ b/include/sound/Kbuild
@@ -1,2 +1,10 @@
-header-y := asound_fm.h hdsp.h hdspm.h sfnt_info.h sscape_ioctl.h
-unifdef-y := asequencer.h asound.h emu10k1.h sb16_csp.h 
+header-y += asound_fm.h
+header-y += hdsp.h
+header-y += hdspm.h
+header-y += sfnt_info.h
+header-y += sscape_ioctl.h
+
+unifdef-y += asequencer.h
+unifdef-y += asound.h
+unifdef-y += emu10k1.h
+unifdef-y += sb16_csp.h
diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 758f8bf..4c43521 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -27,6 +27,7 @@
 
 #include <linux/bitops.h>
 #include <linux/device.h>
+#include <linux/workqueue.h>
 #include "pcm.h"
 #include "control.h"
 #include "info.h"
@@ -140,6 +141,20 @@
 #define AC97_GP_DRSS_1011	0x0000	/* LR(C) 10+11(+12) */
 #define AC97_GP_DRSS_78		0x0400	/* LR 7+8 */
 
+/* powerdown bits */
+#define AC97_PD_ADC_STATUS	0x0001	/* ADC status (RO) */
+#define AC97_PD_DAC_STATUS	0x0002	/* DAC status (RO) */
+#define AC97_PD_MIXER_STATUS	0x0004	/* Analog mixer status (RO) */
+#define AC97_PD_VREF_STATUS	0x0008	/* Vref status (RO) */
+#define AC97_PD_PR0		0x0100	/* Power down PCM ADCs and input MUX */
+#define AC97_PD_PR1		0x0200	/* Power down PCM front DAC */
+#define AC97_PD_PR2		0x0400	/* Power down Mixer (Vref still on) */
+#define AC97_PD_PR3		0x0800	/* Power down Mixer (Vref off) */
+#define AC97_PD_PR4		0x1000	/* Power down AC-Link */
+#define AC97_PD_PR5		0x2000	/* Disable internal clock usage */
+#define AC97_PD_PR6		0x4000	/* Headphone amplifier */
+#define AC97_PD_EAPD		0x8000	/* External Amplifer Power Down (EAPD) */
+
 /* extended audio ID bit defines */
 #define AC97_EI_VRA		0x0001	/* Variable bit rate supported */
 #define AC97_EI_DRA		0x0002	/* Double rate supported */
@@ -359,6 +374,7 @@
 #define AC97_SCAP_INV_EAPD	(1<<7)	/* inverted EAPD */
 #define AC97_SCAP_DETECT_BY_VENDOR (1<<8) /* use vendor registers for read tests */
 #define AC97_SCAP_NO_SPDIF	(1<<9)	/* don't build SPDIF controls */
+#define AC97_SCAP_EAPD_LED	(1<<10)	/* EAPD as mute LED */
 
 /* ac97->flags */
 #define AC97_HAS_PC_BEEP	(1<<0)	/* force PC Speaker usage */
@@ -491,6 +507,12 @@
 	/* jack-sharing info */
 	unsigned char indep_surround;
 	unsigned char channel_mode;
+
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	unsigned int power_up;	/* power states */
+	struct workqueue_struct *power_workq;
+	struct work_struct power_work;
+#endif
 	struct device dev;
 };
 
@@ -532,6 +554,15 @@
 void snd_ac97_write_cache(struct snd_ac97 *ac97, unsigned short reg, unsigned short value);
 int snd_ac97_update(struct snd_ac97 *ac97, unsigned short reg, unsigned short value);
 int snd_ac97_update_bits(struct snd_ac97 *ac97, unsigned short reg, unsigned short mask, unsigned short value);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup);
+#else
+static inline int snd_ac97_update_power(struct snd_ac97 *ac97, int reg,
+					int powerup)
+{
+	return 0;
+}
+#endif
 #ifdef CONFIG_PM
 void snd_ac97_suspend(struct snd_ac97 *ac97);
 void snd_ac97_resume(struct snd_ac97 *ac97);
@@ -583,6 +614,7 @@
 		     copy_flag: 1,	   /* lowlevel driver must fill all entries */
 		     spdif: 1;		   /* spdif pcm */
 	unsigned short aslots;		   /* active slots */
+	unsigned short cur_dbl;		   /* current double-rate state */
 	unsigned int rates;		   /* available rates */
 	struct {
 		unsigned short slots;	   /* driver input: requested AC97 slot numbers */
diff --git a/include/sound/ad1848.h b/include/sound/ad1848.h
index 57af1fe..c8de6f8 100644
--- a/include/sound/ad1848.h
+++ b/include/sound/ad1848.h
@@ -179,14 +179,13 @@
 #define AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert) \
 	((left_reg) | ((right_reg) << 8) | ((shift_left) << 16) | ((shift_right) << 19) | ((mask) << 24) | ((invert) << 22))
 
-int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int type, unsigned long value);
-
 /* for ease of use */
 struct ad1848_mix_elem {
 	const char *name;
 	int index;
 	int type;
 	unsigned long private_value;
+	unsigned int *tlv;
 };
 
 #define AD1848_SINGLE(xname, xindex, reg, shift, mask, invert) \
@@ -195,15 +194,26 @@
   .type = AD1848_MIX_SINGLE, \
   .private_value = AD1848_MIXVAL_SINGLE(reg, shift, mask, invert) }
 
+#define AD1848_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \
+{ .name = xname, \
+  .index = xindex, \
+  .type = AD1848_MIX_SINGLE, \
+  .private_value = AD1848_MIXVAL_SINGLE(reg, shift, mask, invert), \
+  .tlv = xtlv }
+
 #define AD1848_DOUBLE(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert) \
 { .name = xname, \
   .index = xindex, \
   .type = AD1848_MIX_DOUBLE, \
   .private_value = AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert) }
 
-static inline int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, const struct ad1848_mix_elem *c)
-{
-	return snd_ad1848_add_ctl(chip, c->name, c->index, c->type, c->private_value);
-}
+#define AD1848_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .name = xname, \
+  .index = xindex, \
+  .type = AD1848_MIX_DOUBLE, \
+  .private_value = AD1848_MIXVAL_DOUBLE(left_reg, right_reg, shift_left, shift_right, mask, invert), \
+  .tlv = xtlv }
+
+int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip, const struct ad1848_mix_elem *c);
 
 #endif /* __SOUND_AD1848_H */
diff --git a/include/sound/ak4xxx-adda.h b/include/sound/ak4xxx-adda.h
index 3d98884..d0deca6 100644
--- a/include/sound/ak4xxx-adda.h
+++ b/include/sound/ak4xxx-adda.h
@@ -39,26 +39,39 @@
 
 #define AK4XXX_IMAGE_SIZE	(AK4XXX_MAX_CHIPS * 16)	/* 64 bytes */
 
+/* DAC label and channels */
+struct snd_akm4xxx_dac_channel {
+	char *name;		/* mixer volume name */
+	unsigned int num_channels;
+};
+
+/* ADC labels and channels */
+struct snd_akm4xxx_adc_channel {
+	char *name;		/* capture gain volume label */
+	char *switch_name;	/* capture switch */
+	unsigned int num_channels;
+};
+
 struct snd_akm4xxx {
 	struct snd_card *card;
 	unsigned int num_adcs;			/* AK4524 or AK4528 ADCs */
 	unsigned int num_dacs;			/* AK4524 or AK4528 DACs */
 	unsigned char images[AK4XXX_IMAGE_SIZE]; /* saved register image */
-	unsigned char ipga_gain[AK4XXX_MAX_CHIPS][2]; /* saved register image
-						       * for IPGA (AK4528)
-						       */
+	unsigned char volumes[AK4XXX_IMAGE_SIZE]; /* saved volume values */
 	unsigned long private_value[AK4XXX_MAX_CHIPS];	/* helper for driver */
 	void *private_data[AK4XXX_MAX_CHIPS];		/* helper for driver */
 	/* template should fill the following fields */
 	unsigned int idx_offset;		/* control index offset */
 	enum {
 		SND_AK4524, SND_AK4528, SND_AK4529,
-		SND_AK4355, SND_AK4358, SND_AK4381
+		SND_AK4355, SND_AK4358, SND_AK4381,
+		SND_AK5365
 	} type;
-	unsigned int *num_stereo;	/* array of combined counts
-					 * for the mixer
-					 */
-	char **channel_names;		/* array of mixer channel names */
+
+	/* (array) information of combined codecs */
+	struct snd_akm4xxx_dac_channel *dac_info;
+	struct snd_akm4xxx_adc_channel *adc_info;
+
 	struct snd_ak4xxx_ops ops;
 };
 
@@ -72,9 +85,9 @@
 	(ak)->images[(chip) * 16 + (reg)]
 #define snd_akm4xxx_set(ak,chip,reg,val) \
 	((ak)->images[(chip) * 16 + (reg)] = (val))
-#define snd_akm4xxx_get_ipga(ak,chip,reg) \
-	(ak)->ipga_gain[chip][(reg)-4]
-#define snd_akm4xxx_set_ipga(ak,chip,reg,val) \
-	((ak)->ipga_gain[chip][(reg)-4] = (val))
+#define snd_akm4xxx_get_vol(ak,chip,reg) \
+	(ak)->volumes[(chip) * 16 + (reg)]
+#define snd_akm4xxx_set_vol(ak,chip,reg,val) \
+	((ak)->volumes[(chip) * 16 + (reg)] = (val))
 
 #endif /* __SOUND_AK4XXX_ADDA_H */
diff --git a/include/sound/asound.h b/include/sound/asound.h
index 41885f4..c1621c6 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -688,7 +688,7 @@
  *                                                                          *
  ****************************************************************************/
 
-#define SNDRV_CTL_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 3)
+#define SNDRV_CTL_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 4)
 
 struct snd_ctl_card_info {
 	int card;			/* card number */
@@ -727,10 +727,15 @@
 #define SNDRV_CTL_ELEM_ACCESS_WRITE		(1<<1)
 #define SNDRV_CTL_ELEM_ACCESS_READWRITE		(SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
 #define SNDRV_CTL_ELEM_ACCESS_VOLATILE		(1<<2)	/* control value may be changed without a notification */
-#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP		(1<<2)	/* when was control changed */
+#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP		(1<<3)	/* when was control changed */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_READ		(1<<4)	/* TLV read is possible */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE		(1<<5)	/* TLV write is possible */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE	(SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
+#define SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND	(1<<6)	/* TLV command is possible */
 #define SNDRV_CTL_ELEM_ACCESS_INACTIVE		(1<<8)	/* control does actually nothing, but may be updated */
 #define SNDRV_CTL_ELEM_ACCESS_LOCK		(1<<9)	/* write lock */
 #define SNDRV_CTL_ELEM_ACCESS_OWNER		(1<<10)	/* write lock owner */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK	(1<<28)	/* kernel use a TLV callback */ 
 #define SNDRV_CTL_ELEM_ACCESS_USER		(1<<29) /* user space element */
 #define SNDRV_CTL_ELEM_ACCESS_DINDIRECT		(1<<30)	/* indirect access for matrix dimensions in the info structure */
 #define SNDRV_CTL_ELEM_ACCESS_INDIRECT		(1<<31)	/* indirect access for element value in the value structure */
@@ -818,6 +823,12 @@
         unsigned char reserved[128-sizeof(struct timespec)];
 };
 
+struct snd_ctl_tlv {
+        unsigned int numid;	/* control element numeric identification */
+        unsigned int length;	/* in bytes aligned to 4 */
+        unsigned int tlv[0];	/* first TLV */
+};
+
 enum {
 	SNDRV_CTL_IOCTL_PVERSION = _IOR('U', 0x00, int),
 	SNDRV_CTL_IOCTL_CARD_INFO = _IOR('U', 0x01, struct snd_ctl_card_info),
@@ -831,6 +842,9 @@
 	SNDRV_CTL_IOCTL_ELEM_ADD = _IOWR('U', 0x17, struct snd_ctl_elem_info),
 	SNDRV_CTL_IOCTL_ELEM_REPLACE = _IOWR('U', 0x18, struct snd_ctl_elem_info),
 	SNDRV_CTL_IOCTL_ELEM_REMOVE = _IOWR('U', 0x19, struct snd_ctl_elem_id),
+	SNDRV_CTL_IOCTL_TLV_READ = _IOWR('U', 0x1a, struct snd_ctl_tlv),
+	SNDRV_CTL_IOCTL_TLV_WRITE = _IOWR('U', 0x1b, struct snd_ctl_tlv),
+	SNDRV_CTL_IOCTL_TLV_COMMAND = _IOWR('U', 0x1c, struct snd_ctl_tlv),
 	SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE = _IOWR('U', 0x20, int),
 	SNDRV_CTL_IOCTL_HWDEP_INFO = _IOR('U', 0x21, struct snd_hwdep_info),
 	SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE = _IOR('U', 0x30, int),
@@ -855,6 +869,7 @@
 #define SNDRV_CTL_EVENT_MASK_VALUE	(1<<0)	/* element value was changed */
 #define SNDRV_CTL_EVENT_MASK_INFO	(1<<1)	/* element info was changed */
 #define SNDRV_CTL_EVENT_MASK_ADD	(1<<2)	/* element was added */
+#define SNDRV_CTL_EVENT_MASK_TLV	(1<<3)	/* element TLV tree was changed */
 #define SNDRV_CTL_EVENT_MASK_REMOVE	(~0U)	/* element was removed */
 
 struct snd_ctl_event {
diff --git a/include/sound/control.h b/include/sound/control.h
index 2489b1e..1de148b 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -30,6 +30,11 @@
 typedef int (snd_kcontrol_info_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_info * uinfo);
 typedef int (snd_kcontrol_get_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_value * ucontrol);
 typedef int (snd_kcontrol_put_t) (struct snd_kcontrol * kcontrol, struct snd_ctl_elem_value * ucontrol);
+typedef int (snd_kcontrol_tlv_rw_t)(struct snd_kcontrol *kcontrol,
+				    int op_flag, /* 0=read,1=write,-1=command */
+				    unsigned int size,
+				    unsigned int __user *tlv);
+
 
 struct snd_kcontrol_new {
 	snd_ctl_elem_iface_t iface;	/* interface identifier */
@@ -42,6 +47,10 @@
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
+	union {
+		snd_kcontrol_tlv_rw_t *c;
+		unsigned int *p;
+	} tlv;
 	unsigned long private_value;
 };
 
@@ -58,6 +67,10 @@
 	snd_kcontrol_info_t *info;
 	snd_kcontrol_get_t *get;
 	snd_kcontrol_put_t *put;
+	union {
+		snd_kcontrol_tlv_rw_t *c;
+		unsigned int *p;
+	} tlv;
 	unsigned long private_value;
 	void *private_data;
 	void (*private_free)(struct snd_kcontrol *kcontrol);
diff --git a/include/sound/core.h b/include/sound/core.h
index bab3ff4..b056ea9 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -25,8 +25,8 @@
 #include <linux/sched.h>		/* wake_up() */
 #include <linux/mutex.h>		/* struct mutex */
 #include <linux/rwsem.h>		/* struct rw_semaphore */
-#include <linux/workqueue.h>		/* struct workqueue_struct */
 #include <linux/pm.h>			/* pm_message_t */
+#include <linux/device.h>
 
 /* forward declarations */
 #ifdef CONFIG_PCI
@@ -71,7 +71,6 @@
 	int (*dev_free)(struct snd_device *dev);
 	int (*dev_register)(struct snd_device *dev);
 	int (*dev_disconnect)(struct snd_device *dev);
-	int (*dev_unregister)(struct snd_device *dev);
 };
 
 struct snd_device {
@@ -131,8 +130,8 @@
 								state */
 	spinlock_t files_lock;		/* lock the files for this card */
 	int shutdown;			/* this card is going down */
+	int free_on_last_close;		/* free in context of file_release */
 	wait_queue_head_t shutdown_sleep;
-	struct work_struct free_workq;	/* for free in workqueue */
 	struct device *dev;
 
 #ifdef CONFIG_PM
@@ -188,6 +187,7 @@
 	int device;			/* device number */
 	const struct file_operations *f_ops;	/* file operations */
 	void *private_data;		/* private data for f_ops->open */
+	struct class_device *class_dev;	/* class device for sysfs */
 };
 
 /* sound.c */
@@ -202,6 +202,8 @@
 			const char *name);
 int snd_unregister_device(int type, struct snd_card *card, int dev);
 void *snd_lookup_minor_data(unsigned int minor, int type);
+int snd_add_device_sysfs_file(int type, struct snd_card *card, int dev,
+			      const struct class_device_attribute *attr);
 
 #ifdef CONFIG_SND_OSSEMUL
 int snd_register_oss_device(int type, struct snd_card *card, int dev,
@@ -244,7 +246,7 @@
 			 struct module *module, int extra_size);
 int snd_card_disconnect(struct snd_card *card);
 int snd_card_free(struct snd_card *card);
-int snd_card_free_in_thread(struct snd_card *card);
+int snd_card_free_when_closed(struct snd_card *card);
 int snd_card_register(struct snd_card *card);
 int snd_card_info_init(void);
 int snd_card_info_done(void);
diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 884bbf5..892e310 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1524,6 +1524,10 @@
 	unsigned int value[32];		/* initial values */
 	unsigned int min;		/* minimum range */
 	unsigned int max;		/* maximum range */
+	union {
+		snd_kcontrol_tlv_rw_t *c;
+		unsigned int *p;
+	} tlv;
 	unsigned int translation;	/* translation type (EMU10K1_GPR_TRANSLATION*) */
 };
 
diff --git a/include/sound/info.h b/include/sound/info.h
index 74f69967..97ffc4f 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -71,7 +71,6 @@
 	mode_t mode;
 	long size;
 	unsigned short content;
-	unsigned short disconnected: 1;
 	union {
 		struct snd_info_entry_text text;
 		struct snd_info_entry_ops *ops;
@@ -83,6 +82,8 @@
 	void (*private_free)(struct snd_info_entry *entry);
 	struct proc_dir_entry *p;
 	struct mutex access;
+	struct list_head children;
+	struct list_head list;
 };
 
 #if defined(CONFIG_SND_OSSEMUL) && defined(CONFIG_PROC_FS)
@@ -122,8 +123,8 @@
 int snd_info_card_create(struct snd_card * card);
 int snd_info_card_register(struct snd_card * card);
 int snd_info_card_free(struct snd_card * card);
+void snd_info_card_disconnect(struct snd_card * card);
 int snd_info_register(struct snd_info_entry * entry);
-int snd_info_unregister(struct snd_info_entry * entry);
 
 /* for card drivers */
 int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp);
@@ -156,8 +157,8 @@
 static inline int snd_info_card_create(struct snd_card * card) { return 0; }
 static inline int snd_info_card_register(struct snd_card * card) { return 0; }
 static inline int snd_info_card_free(struct snd_card * card) { return 0; }
+static inline void snd_info_card_disconnect(struct snd_card * card) { }
 static inline int snd_info_register(struct snd_info_entry * entry) { return 0; }
-static inline int snd_info_unregister(struct snd_info_entry * entry) { return 0; }
 
 static inline int snd_card_proc_new(struct snd_card *card, const char *name,
 				    struct snd_info_entry **entryp) { return -EINVAL; }
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index f84d849..60d40b3 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -190,7 +190,7 @@
 
 struct snd_pcm_file {
 	struct snd_pcm_substream *substream;
-	struct snd_pcm_file *next;
+	int no_compat_mmap;
 };
 
 struct snd_pcm_hw_rule;
@@ -384,7 +384,6 @@
 	struct snd_info_entry *proc_prealloc_entry;
 #endif
 	/* misc flags */
-	unsigned int no_mmap_ctrl: 1;
 	unsigned int hw_opened: 1;
 };
 
@@ -402,7 +401,6 @@
 	/* -- OSS things -- */
 	struct snd_pcm_oss_stream oss;
 #endif
-	struct snd_pcm_file *files;
 #ifdef CONFIG_SND_VERBOSE_PROCFS
 	struct snd_info_entry *proc_root;
 	struct snd_info_entry *proc_info_entry;
diff --git a/include/sound/timer.h b/include/sound/timer.h
index 5ece2bf..d42c083 100644
--- a/include/sound/timer.h
+++ b/include/sound/timer.h
@@ -129,7 +129,6 @@
 int snd_timer_global_new(char *id, int device, struct snd_timer **rtimer);
 int snd_timer_global_free(struct snd_timer *timer);
 int snd_timer_global_register(struct snd_timer *timer);
-int snd_timer_global_unregister(struct snd_timer *timer);
 
 int snd_timer_open(struct snd_timer_instance **ti, char *owner, struct snd_timer_id *tid, unsigned int slave_id);
 int snd_timer_close(struct snd_timer_instance *timeri);
diff --git a/include/sound/tlv.h b/include/sound/tlv.h
new file mode 100644
index 0000000..d93a96b
--- /dev/null
+++ b/include/sound/tlv.h
@@ -0,0 +1,60 @@
+#ifndef __SOUND_TLV_H
+#define __SOUND_TLV_H
+
+/*
+ *  Advanced Linux Sound Architecture - ALSA - Driver
+ *  Copyright (c) 2006 by Jaroslav Kysela <perex@suse.cz>
+ *
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+/*
+ * TLV structure is right behind the struct snd_ctl_tlv:
+ *   unsigned int type  	- see SNDRV_CTL_TLVT_*
+ *   unsigned int length
+ *   .... data aligned to sizeof(unsigned int), use
+ *        block_length = (length + (sizeof(unsigned int) - 1)) &
+ *                       ~(sizeof(unsigned int) - 1)) ....
+ */
+
+#define SNDRV_CTL_TLVT_CONTAINER 0	/* one level down - group of TLVs */
+#define SNDRV_CTL_TLVT_DB_SCALE	1       /* dB scale */
+#define SNDRV_CTL_TLVT_DB_LINEAR 2	/* linear volume */
+#define SNDRV_CTL_TLVT_DB_RANGE 3	/* dB range container */
+
+#define TLV_DB_SCALE_ITEM(min, step, mute)			\
+	SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int),	\
+	(min), ((step) & 0xffff) | ((mute) ? 0x10000 : 0)
+#define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
+	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
+
+/* linear volume between min_dB and max_dB (.01dB unit) */
+#define TLV_DB_LINEAR_ITEM(min_dB, max_dB)		    \
+	SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \
+	(min_dB), (max_dB)
+#define DECLARE_TLV_DB_LINEAR(name, min_dB, max_dB)	\
+	unsigned int name[] = { TLV_DB_LINEAR_ITEM(min_dB, max_dB) }
+
+/* dB range container */
+/* Each item is: <min> <max> <TLV> */
+/* The below assumes that each item TLV is 4 words like DB_SCALE or LINEAR */
+#define TLV_DB_RANGE_HEAD(num)			\
+	SNDRV_CTL_TLVT_DB_RANGE, 6 * (num) * sizeof(unsigned int)
+
+#define TLV_DB_GAIN_MUTE	-9999999
+
+#endif /* __SOUND_TLV_H */
diff --git a/include/sound/vx_core.h b/include/sound/vx_core.h
index 9821a61..dbca141 100644
--- a/include/sound/vx_core.h
+++ b/include/sound/vx_core.h
@@ -128,6 +128,7 @@
 	unsigned int num_ins;
 	unsigned int num_outs;
 	unsigned int output_level_max;
+	unsigned int *output_level_db_scale;
 };
 
 /* hwdep id string */
diff --git a/include/video/Kbuild b/include/video/Kbuild
index 76a6073..a14f9c0 100644
--- a/include/video/Kbuild
+++ b/include/video/Kbuild
@@ -1 +1 @@
-unifdef-y := sisfb.h
+unifdef-y += sisfb.h
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 9336f2e..ac1f850 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -252,7 +252,7 @@
 	mask_ack_irq(desc, irq);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
-		goto out;
+		goto out_unlock;
 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
 	kstat_cpu(cpu).irqs[irq]++;
 
@@ -263,7 +263,7 @@
 	action = desc->action;
 	if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
 		desc->status |= IRQ_PENDING;
-		goto out;
+		goto out_unlock;
 	}
 
 	desc->status |= IRQ_INPROGRESS;
@@ -276,9 +276,9 @@
 
 	spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
-out:
 	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
 		desc->chip->unmask(irq);
+out_unlock:
 	spin_unlock(&desc->lock);
 }
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e781876..2ed4040 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -75,7 +75,7 @@
 	/*
 	 * If new attributes are added, please revisit this allocation
 	 */
-	skb = nlmsg_new(size);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
diff --git a/lib/audit.c b/lib/audit.c
index 8c21625..3b1289f 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -28,8 +28,10 @@
 	switch(syscall) {
 	case __NR_open:
 		return 2;
+#ifdef __NR_openat
 	case __NR_openat:
 		return 3;
+#endif
 #ifdef __NR_socketcall
 	case __NR_socketcall:
 		return 4;
diff --git a/mm/mmap.c b/mm/mmap.c
index e66a0b5..d799d89 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -64,6 +64,13 @@
 	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 };
 
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+	return protection_map[vm_flags &
+				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+}
+EXPORT_SYMBOL(vm_get_page_prot);
+
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e630188..77a0bc4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -803,6 +803,15 @@
 EXPORT_SYMBOL(test_set_page_writeback);
 
 /*
+ * Wakes up tasks that are being throttled due to writeback congestion
+ */
+void writeback_congestion_end(void)
+{
+	blk_congestion_end(WRITE);
+}
+EXPORT_SYMBOL(writeback_congestion_end);
+
+/*
  * Return true if any of the pages in the mapping are marged with the
  * passed tag.
  */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 54a4f53..3b5358a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2363,7 +2363,7 @@
 	return 0;
 }
 
-__initdata int hashdist = HASHDIST_DEFAULT;
+int hashdist = HASHDIST_DEFAULT;
 
 #ifdef CONFIG_NUMA
 static int __init set_hashdist(char *str)
diff --git a/net/Kconfig b/net/Kconfig
index 4959a4e..6528a935 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -249,6 +249,11 @@
 config WIRELESS_EXT
 	bool
 
+source "net/netlabel/Kconfig"
+
+config FIB_RULES
+	bool
+
 endif   # if NET
 endmenu # Networking
 
diff --git a/net/Makefile b/net/Makefile
index 065796f..ad4d14f 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -46,6 +46,7 @@
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_IEEE80211)		+= ieee80211/
 obj-$(CONFIG_TIPC)		+= tipc/
+obj-$(CONFIG_NETLABEL)		+= netlabel/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5df4b9a..c0a4ae2 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,6 +1,5 @@
 /* ATM driver model support. */
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 0070466..b87c2a8 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -98,11 +98,6 @@
 	0
 };
 
-#ifdef CONFIG_PROC_FS
-extern int mpc_proc_init(void);
-extern void mpc_proc_clean(void);
-#endif
-
 struct mpoa_client *mpcs = NULL; /* FIXME */
 static struct atm_mpoa_qos *qos_head = NULL;
 static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
@@ -1439,12 +1434,8 @@
 {
 	register_atm_ioctl(&atm_ioctl_ops);
 
-#ifdef CONFIG_PROC_FS
 	if (mpc_proc_init() != 0)
 		printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n");
-	else
-		printk(KERN_INFO "mpoa: /proc/mpoa initialized\n");
-#endif
 
 	printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
 
@@ -1457,9 +1448,7 @@
 	struct atm_mpoa_qos *qos, *nextqos;
 	struct lec_priv *priv;
 
-#ifdef CONFIG_PROC_FS
 	mpc_proc_clean();
-#endif
 
 	del_timer(&mpc_timer);
 	unregister_netdevice_notifier(&mpoa_notifier);
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 863ddf6..3c7981a 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -50,4 +50,12 @@
 struct seq_file;
 void atm_mpoa_disp_qos(struct seq_file *m);
 
+#ifdef CONFIG_PROC_FS
+int mpc_proc_init(void);
+void mpc_proc_clean(void);
+#else
+#define mpc_proc_init() (0)
+#define mpc_proc_clean() do { } while(0)
+#endif
+
 #endif /* _MPC_H_ */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 864fbbc..191b861 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -38,13 +38,10 @@
 	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 		kfree_skb(skb);
 	else {
-#ifdef CONFIG_BRIDGE_NETFILTER
 		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
 		if (nf_bridge_maybe_copy_header(skb))
 			kfree_skb(skb);
-		else
-#endif
-		{
+		else {
 			skb_push(skb, ETH_HLEN);
 
 			dev_queue_xmit(skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05b3de8..ac181be 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -53,10 +53,10 @@
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
-static int brnf_call_iptables = 1;
-static int brnf_call_ip6tables = 1;
-static int brnf_call_arptables = 1;
-static int brnf_filter_vlan_tagged = 1;
+static int brnf_call_iptables __read_mostly = 1;
+static int brnf_call_ip6tables __read_mostly = 1;
+static int brnf_call_arptables __read_mostly = 1;
+static int brnf_filter_vlan_tagged __read_mostly = 1;
 #else
 #define brnf_filter_vlan_tagged 1
 #endif
@@ -127,14 +127,37 @@
 
 static inline void nf_bridge_save_header(struct sk_buff *skb)
 {
-        int header_size = 16;
+        int header_size = ETH_HLEN;
 
 	if (skb->protocol == htons(ETH_P_8021Q))
-		header_size = 18;
+		header_size += VLAN_HLEN;
 
 	memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
 }
 
+/*
+ * When forwarding bridge frames, we save a copy of the original
+ * header before processing.
+ */
+int nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+        int header_size = ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		header_size += VLAN_HLEN;
+
+	err = skb_cow(skb, header_size);
+	if (err)
+		return err;
+
+	memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		__skb_push(skb, VLAN_HLEN);
+	return 0;
+}
+
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
 /* Undo the changes made for ip6tables PREROUTING and continue the
  * bridge PRE_ROUTING hook. */
@@ -695,16 +718,6 @@
 	else
 		pf = PF_INET6;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Sometimes we get packets with NULL ->dst here (for example,
-	 * running a dhcp client daemon triggers this). This should now
-	 * be fixed, but let's keep the check around. */
-	if (skb->dst == NULL) {
-		printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
-		return NF_ACCEPT;
-	}
-#endif
-
 	nf_bridge = skb->nf_bridge;
 	nf_bridge->physoutdev = skb->dev;
 	realindev = nf_bridge->physindev;
@@ -786,7 +799,7 @@
 	 * keep the check just to be sure... */
 	if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
 		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
-		       "bad mac.raw pointer.");
+		       "bad mac.raw pointer.\n");
 		goto print_error;
 	}
 #endif
@@ -804,7 +817,7 @@
 
 #ifdef CONFIG_NETFILTER_DEBUG
 	if (skb->dst == NULL) {
-		printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
+		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
 		goto print_error;
 	}
 #endif
@@ -841,6 +854,7 @@
 	}
 	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
 	       skb->data);
+	dump_stack();
 	return NF_ACCEPT;
 #endif
 }
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 53086fb..8f66119 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/rtnetlink.h>
+#include <net/netlink.h>
 #include "br_private.h"
 
 /*
@@ -76,26 +77,24 @@
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
 	struct sk_buff *skb;
-	int err = -ENOMEM;
+	int payload = sizeof(struct ifinfomsg) + 128;
+	int err = -ENOBUFS;
 
 	pr_debug("bridge notify event=%d\n", event);
-	skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128),
-			GFP_ATOMIC);
-	if (!skb)
-		goto err_out;
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
 
-	err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0);
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+errout:
 	if (err < 0)
-		goto err_kfree;
-
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
-	return;
-
-err_kfree:
-	kfree_skb(skb);
-err_out:
-	netlink_set_err(rtnl, 0, RTNLGRP_LINK, err);
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /*
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3a13ed6..3df55b2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -24,6 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
@@ -31,36 +32,9 @@
 /* needed for logical [in,out]-dev filtering */
 #include "../br_private.h"
 
-/* list_named_find */
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-#include <linux/netfilter_ipv4/listhelp.h>
-#include <linux/mutex.h>
-
-#if 0
-/* use this for remote debugging
- * Copyright (C) 1998 by Ori Pomerantz
- * Print the string to the appropriate tty, the one
- * the current task uses
- */
-static void print_string(char *str)
-{
-	struct tty_struct *my_tty;
-
-	/* The tty for the current task */
-	my_tty = current->signal->tty;
-	if (my_tty != NULL) {
-		my_tty->driver->write(my_tty, 0, str, strlen(str));
-		my_tty->driver->write(my_tty, 0, "\015\012", 2);
-	}
-}
-
-#define BUGPRINT(args) print_string(args);
-#else
 #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\
                                          "report to author: "format, ## args)
 /* #define BUGPRINT(format, args...) */
-#endif
 #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\
                                          ": out of memory: "format, ## args)
 /* #define MEMPRINT(format, args...) */
@@ -299,18 +273,22 @@
 find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
    struct mutex *mutex)
 {
-	void *ret;
+	struct {
+		struct list_head list;
+		char name[EBT_FUNCTION_MAXNAMELEN];
+	} *e;
 
 	*error = mutex_lock_interruptible(mutex);
 	if (*error != 0)
 		return NULL;
 
-	ret = list_named_find(head, name);
-	if (!ret) {
-		*error = -ENOENT;
-		mutex_unlock(mutex);
+	list_for_each_entry(e, head, list) {
+		if (strcmp(e->name, name) == 0)
+			return e;
 	}
-	return ret;
+	*error = -ENOENT;
+	mutex_unlock(mutex);
+	return NULL;
 }
 
 #ifndef CONFIG_KMOD
@@ -1064,15 +1042,19 @@
 
 int ebt_register_target(struct ebt_target *target)
 {
+	struct ebt_target *t;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ebt_mutex);
 	if (ret != 0)
 		return ret;
-	if (!list_named_insert(&ebt_targets, target)) {
-		mutex_unlock(&ebt_mutex);
-		return -EEXIST;
+	list_for_each_entry(t, &ebt_targets, list) {
+		if (strcmp(t->name, target->name) == 0) {
+			mutex_unlock(&ebt_mutex);
+			return -EEXIST;
+		}
 	}
+	list_add(&target->list, &ebt_targets);
 	mutex_unlock(&ebt_mutex);
 
 	return 0;
@@ -1081,21 +1063,25 @@
 void ebt_unregister_target(struct ebt_target *target)
 {
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_targets, target);
+	list_del(&target->list);
 	mutex_unlock(&ebt_mutex);
 }
 
 int ebt_register_match(struct ebt_match *match)
 {
+	struct ebt_match *m;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ebt_mutex);
 	if (ret != 0)
 		return ret;
-	if (!list_named_insert(&ebt_matches, match)) {
-		mutex_unlock(&ebt_mutex);
-		return -EEXIST;
+	list_for_each_entry(m, &ebt_matches, list) {
+		if (strcmp(m->name, match->name) == 0) {
+			mutex_unlock(&ebt_mutex);
+			return -EEXIST;
+		}
 	}
+	list_add(&match->list, &ebt_matches);
 	mutex_unlock(&ebt_mutex);
 
 	return 0;
@@ -1104,21 +1090,25 @@
 void ebt_unregister_match(struct ebt_match *match)
 {
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_matches, match);
+	list_del(&match->list);
 	mutex_unlock(&ebt_mutex);
 }
 
 int ebt_register_watcher(struct ebt_watcher *watcher)
 {
+	struct ebt_watcher *w;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ebt_mutex);
 	if (ret != 0)
 		return ret;
-	if (!list_named_insert(&ebt_watchers, watcher)) {
-		mutex_unlock(&ebt_mutex);
-		return -EEXIST;
+	list_for_each_entry(w, &ebt_watchers, list) {
+		if (strcmp(w->name, watcher->name) == 0) {
+			mutex_unlock(&ebt_mutex);
+			return -EEXIST;
+		}
 	}
+	list_add(&watcher->list, &ebt_watchers);
 	mutex_unlock(&ebt_mutex);
 
 	return 0;
@@ -1127,13 +1117,14 @@
 void ebt_unregister_watcher(struct ebt_watcher *watcher)
 {
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_watchers, watcher);
+	list_del(&watcher->list);
 	mutex_unlock(&ebt_mutex);
 }
 
 int ebt_register_table(struct ebt_table *table)
 {
 	struct ebt_table_info *newinfo;
+	struct ebt_table *t;
 	int ret, i, countersize;
 
 	if (!table || !table->table ||!table->table->entries ||
@@ -1179,10 +1170,12 @@
 	if (ret != 0)
 		goto free_chainstack;
 
-	if (list_named_find(&ebt_tables, table->name)) {
-		ret = -EEXIST;
-		BUGPRINT("Table name already exists\n");
-		goto free_unlock;
+	list_for_each_entry(t, &ebt_tables, list) {
+		if (strcmp(t->name, table->name) == 0) {
+			ret = -EEXIST;
+			BUGPRINT("Table name already exists\n");
+			goto free_unlock;
+		}
 	}
 
 	/* Hold a reference count if the chains aren't empty */
@@ -1190,7 +1183,7 @@
 		ret = -ENOENT;
 		goto free_unlock;
 	}
-	list_prepend(&ebt_tables, table);
+	list_add(&table->list, &ebt_tables);
 	mutex_unlock(&ebt_mutex);
 	return 0;
 free_unlock:
@@ -1216,7 +1209,7 @@
 		return;
 	}
 	mutex_lock(&ebt_mutex);
-	LIST_DELETE(&ebt_tables, table);
+	list_del(&table->list);
 	mutex_unlock(&ebt_mutex);
 	vfree(table->private->entries);
 	if (table->private->chainstack) {
@@ -1486,7 +1479,7 @@
 	int ret;
 
 	mutex_lock(&ebt_mutex);
-	list_named_insert(&ebt_targets, &ebt_standard_target);
+	list_add(&ebt_standard_target.list, &ebt_targets);
 	mutex_unlock(&ebt_mutex);
 	if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0)
 		return ret;
diff --git a/net/core/Makefile b/net/core/Makefile
index 2645ba4..1195680 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -17,3 +17,4 @@
 obj-$(CONFIG_WIRELESS_EXT) += wireless.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index aecddcc..f558c61 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -417,7 +417,7 @@
 
 	sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
 	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -462,7 +462,7 @@
 			goto fault;
 		if ((unsigned short)csum_fold(csum))
 			goto csum_error;
-		if (unlikely(skb->ip_summed == CHECKSUM_HW))
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 		iov->iov_len -= chunk;
 		iov->iov_base += chunk;
diff --git a/net/core/dev.c b/net/core/dev.c
index d4a1ec3..14de297 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -640,6 +640,8 @@
 {
 	if (*name == '\0')
 		return 0;
+	if (strlen(name) >= IFNAMSIZ)
+		return 0;
 	if (!strcmp(name, ".") || !strcmp(name, ".."))
 		return 0;
 
@@ -1166,12 +1168,12 @@
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
  */
-int skb_checksum_help(struct sk_buff *skb, int inward)
+int skb_checksum_help(struct sk_buff *skb)
 {
 	unsigned int csum;
 	int ret = 0, offset = skb->h.raw - skb->data;
 
-	if (inward)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
 
 	if (unlikely(skb_shinfo(skb)->gso_size)) {
@@ -1223,7 +1225,7 @@
 	skb->mac_len = skb->nh.raw - skb->data;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
@@ -1232,7 +1234,7 @@
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
-			if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
+			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
 				err = ptype->gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -1444,11 +1446,11 @@
 	/* If packet is not checksummed and device does not support
 	 * checksumming for this protocol, complete checksumming here.
 	 */
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    (!(dev->features & NETIF_F_GEN_CSUM) &&
 	     (!(dev->features & NETIF_F_IP_CSUM) ||
 	      skb->protocol != htons(ETH_P_IP))))
-	      	if (skb_checksum_help(skb, 0))
+	      	if (skb_checksum_help(skb))
 	      		goto out_kfree_skb;
 
 gso:
@@ -3191,13 +3193,15 @@
 	struct net_device *dev;
 	int alloc_size;
 
+	BUG_ON(strlen(name) >= sizeof(dev->name));
+
 	/* ensure 32-byte alignment of both the device and private area */
 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
 	p = kzalloc(alloc_size, GFP_KERNEL);
 	if (!p) {
-		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
+		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
 		return NULL;
 	}
 
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index c57d887..b22648d 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -21,8 +21,7 @@
  *	2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h> 
-#include <linux/module.h> 
+#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
new file mode 100644
index 0000000..a99d87d
--- /dev/null
+++ b/net/core/fib_rules.c
@@ -0,0 +1,421 @@
+/*
+ * net/core/fib_rules.c		Generic Routing Rules
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <net/fib_rules.h>
+
+static LIST_HEAD(rules_ops);
+static DEFINE_SPINLOCK(rules_mod_lock);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid);
+
+static struct fib_rules_ops *lookup_rules_ops(int family)
+{
+	struct fib_rules_ops *ops;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &rules_ops, list) {
+		if (ops->family == family) {
+			if (!try_module_get(ops->owner))
+				ops = NULL;
+			rcu_read_unlock();
+			return ops;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void rules_ops_put(struct fib_rules_ops *ops)
+{
+	if (ops)
+		module_put(ops->owner);
+}
+
+int fib_rules_register(struct fib_rules_ops *ops)
+{
+	int err = -EEXIST;
+	struct fib_rules_ops *o;
+
+	if (ops->rule_size < sizeof(struct fib_rule))
+		return -EINVAL;
+
+	if (ops->match == NULL || ops->configure == NULL ||
+	    ops->compare == NULL || ops->fill == NULL ||
+	    ops->action == NULL)
+		return -EINVAL;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list)
+		if (ops->family == o->family)
+			goto errout;
+
+	list_add_tail_rcu(&ops->list, &rules_ops);
+	err = 0;
+errout:
+	spin_unlock(&rules_mod_lock);
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_register);
+
+static void cleanup_ops(struct fib_rules_ops *ops)
+{
+	struct fib_rule *rule, *tmp;
+
+	list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+		list_del_rcu(&rule->list);
+		fib_rule_put(rule);
+	}
+}
+
+int fib_rules_unregister(struct fib_rules_ops *ops)
+{
+	int err = 0;
+	struct fib_rules_ops *o;
+
+	spin_lock(&rules_mod_lock);
+	list_for_each_entry(o, &rules_ops, list) {
+		if (o == ops) {
+			list_del_rcu(&o->list);
+			cleanup_ops(ops);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	spin_unlock(&rules_mod_lock);
+
+	synchronize_rcu();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_unregister);
+
+int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
+		     int flags, struct fib_lookup_arg *arg)
+{
+	struct fib_rule *rule;
+	int err;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+		if (rule->ifindex && (rule->ifindex != fl->iif))
+			continue;
+
+		if (!ops->match(rule, fl, flags))
+			continue;
+
+		err = ops->action(rule, fl, flags, arg);
+		if (err != -EAGAIN) {
+			fib_rule_get(rule);
+			arg->rule = rule;
+			goto out;
+		}
+	}
+
+	err = -ENETUNREACH;
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_lookup);
+
+int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule, *r, *last = NULL;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	rule = kzalloc(ops->rule_size, GFP_KERNEL);
+	if (rule == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	if (tb[FRA_PRIORITY])
+		rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
+
+	if (tb[FRA_IFNAME]) {
+		struct net_device *dev;
+
+		rule->ifindex = -1;
+		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
+		dev = __dev_get_by_name(rule->ifname);
+		if (dev)
+			rule->ifindex = dev->ifindex;
+	}
+
+	rule->action = frh->action;
+	rule->flags = frh->flags;
+	rule->table = frh_get_table(frh, tb);
+
+	if (!rule->pref && ops->default_pref)
+		rule->pref = ops->default_pref();
+
+	err = ops->configure(rule, skb, nlh, frh, tb);
+	if (err < 0)
+		goto errout_free;
+
+	list_for_each_entry(r, ops->rules_list, list) {
+		if (r->pref > rule->pref)
+			break;
+		last = r;
+	}
+
+	fib_rule_get(rule);
+
+	if (last)
+		list_add_rcu(&rule->list, &last->list);
+	else
+		list_add_rcu(&rule->list, ops->rules_list);
+
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	rules_ops_put(ops);
+	return 0;
+
+errout_free:
+	kfree(rule);
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+{
+	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rules_ops *ops = NULL;
+	struct fib_rule *rule;
+	struct nlattr *tb[FRA_MAX+1];
+	int err = -EINVAL;
+
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+		goto errout;
+
+	ops = lookup_rules_ops(frh->family);
+	if (ops == NULL) {
+		err = EAFNOSUPPORT;
+		goto errout;
+	}
+
+	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
+	if (err < 0)
+		goto errout;
+
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (frh->action && (frh->action != rule->action))
+			continue;
+
+		if (frh->table && (frh_get_table(frh, tb) != rule->table))
+			continue;
+
+		if (tb[FRA_PRIORITY] &&
+		    (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
+			continue;
+
+		if (tb[FRA_IFNAME] &&
+		    nla_strcmp(tb[FRA_IFNAME], rule->ifname))
+			continue;
+
+		if (!ops->compare(rule, frh, tb))
+			continue;
+
+		if (rule->flags & FIB_RULE_PERMANENT) {
+			err = -EPERM;
+			goto errout;
+		}
+
+		list_del_rcu(&rule->list);
+		synchronize_rcu();
+		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
+				   NETLINK_CB(skb).pid);
+		fib_rule_put(rule);
+		rules_ops_put(ops);
+		return 0;
+	}
+
+	err = -ENOENT;
+errout:
+	rules_ops_put(ops);
+	return err;
+}
+
+static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
+			    u32 pid, u32 seq, int type, int flags,
+			    struct fib_rules_ops *ops)
+{
+	struct nlmsghdr *nlh;
+	struct fib_rule_hdr *frh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
+	if (nlh == NULL)
+		return -1;
+
+	frh = nlmsg_data(nlh);
+	frh->table = rule->table;
+	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
+	frh->res1 = 0;
+	frh->res2 = 0;
+	frh->action = rule->action;
+	frh->flags = rule->flags;
+
+	if (rule->ifname[0])
+		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
+
+	if (rule->pref)
+		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
+
+	if (ops->fill(rule, skb, nlh, frh) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
+}
+
+int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+{
+	int idx = 0;
+	struct fib_rule *rule;
+	struct fib_rules_ops *ops;
+
+	ops = lookup_rules_ops(family);
+	if (ops == NULL)
+		return -EAFNOSUPPORT;
+
+	rcu_read_lock();
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (idx < cb->args[0])
+			goto skip;
+
+		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
+				     NLM_F_MULTI, ops) < 0)
+			break;
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	rules_ops_put(ops);
+
+	return skb->len;
+}
+
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+static void notify_rule_change(int event, struct fib_rule *rule,
+			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+			       u32 pid)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(ops->nlgroup, err);
+}
+
+static void attach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list) {
+		if (rule->ifindex == -1 &&
+		    strcmp(dev->name, rule->ifname) == 0)
+			rule->ifindex = dev->ifindex;
+	}
+}
+
+static void detach_rules(struct list_head *rules, struct net_device *dev)
+{
+	struct fib_rule *rule;
+
+	list_for_each_entry(rule, rules, list)
+		if (rule->ifindex == dev->ifindex)
+			rule->ifindex = -1;
+}
+
+
+static int fib_rules_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+	rcu_read_lock();
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			attach_rules(ops->rules_list, dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		list_for_each_entry(ops, &rules_ops, list)
+			detach_rules(ops->rules_list, dev);
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fib_rules_notifier = {
+	.notifier_call = fib_rules_event,
+};
+
+static int __init fib_rules_init(void)
+{
+	return register_netdevice_notifier(&fib_rules_notifier);
+}
+
+subsys_initcall(fib_rules_init);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5b4486a..6732782 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -422,10 +422,10 @@
 	if (!err) {
 		struct sk_filter *old_fp;
 
-		spin_lock_bh(&sk->sk_lock.slock);
-		old_fp = sk->sk_filter;
-		sk->sk_filter = fp;
-		spin_unlock_bh(&sk->sk_lock.slock);
+		rcu_read_lock_bh();
+		old_fp = rcu_dereference(sk->sk_filter);
+		rcu_assign_pointer(sk->sk_filter, fp);
+		rcu_read_unlock_bh();
 		fp = old_fp;
 	}
 
diff --git a/net/core/flow.c b/net/core/flow.c
index 2191af5..f23e7e3 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -32,7 +32,6 @@
 	u8			dir;
 	struct flowi		key;
 	u32			genid;
-	u32			sk_sid;
 	void			*object;
 	atomic_t		*object_ref;
 };
@@ -165,7 +164,7 @@
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -189,7 +188,6 @@
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
-		    fle->sk_sid == sk_sid &&
 		    flow_key_compare(key, &fle->key) == 0) {
 			if (fle->genid == atomic_read(&flow_cache_genid)) {
 				void *ret = fle->object;
@@ -214,7 +212,6 @@
 			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
-			fle->sk_sid = sk_sid;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
 			flow_count(cpu)++;
@@ -226,7 +223,7 @@
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, sk_sid, family, dir, &obj, &obj_ref);
+		resolver(key, family, dir, &obj, &obj_ref);
 
 		if (fle) {
 			fle->genid = atomic_read(&flow_cache_genid);
@@ -346,12 +343,8 @@
 
 	flow_cachep = kmem_cache_create("flow_cache",
 					sizeof(struct flow_cache_entry),
-					0, SLAB_HWCACHE_ALIGN,
+					0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					NULL, NULL);
-
-	if (!flow_cachep)
-		panic("NET: failed to allocate flow cache slab\n");
-
 	flow_hash_shift = 10;
 	flow_lwm = 2 * flow_hash_size;
 	flow_hwm = 4 * flow_hash_size;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index fe2113f..b6c69e1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -30,6 +30,7 @@
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/netevent.h>
+#include <net/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
 #include <linux/string.h>
@@ -888,7 +889,7 @@
 	return rc;
 }
 
-static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
@@ -1338,14 +1339,10 @@
 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
 	if (!tbl->kmem_cachep)
-		tbl->kmem_cachep = kmem_cache_create(tbl->id,
-						     tbl->entry_size,
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-
-	if (!tbl->kmem_cachep)
-		panic("cannot create neighbour cache");
-
+		tbl->kmem_cachep =
+			kmem_cache_create(tbl->id, tbl->entry_size, 0,
+					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+					  NULL, NULL);
 	tbl->stats = alloc_percpu(struct neigh_statistics);
 	if (!tbl->stats)
 		panic("cannot create neighbour cache statistics");
@@ -1440,48 +1437,62 @@
 
 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
-	struct rtattr **nda = arg;
+	struct ndmsg *ndm;
+	struct nlattr *dst_attr;
 	struct neigh_table *tbl;
 	struct net_device *dev = NULL;
-	int err = -ENODEV;
+	int err = -EINVAL;
 
-	if (ndm->ndm_ifindex &&
-	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+	if (nlmsg_len(nlh) < sizeof(*ndm))
 		goto out;
 
+	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+	if (dst_attr == NULL)
+		goto out;
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = dev_get_by_index(ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+	}
+
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-		struct rtattr *dst_attr = nda[NDA_DST - 1];
-		struct neighbour *n;
+		struct neighbour *neigh;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
-		err = -EINVAL;
-		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+		if (nla_len(dst_attr) < tbl->key_len)
 			goto out_dev_put;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev);
+			err = pneigh_delete(tbl, nla_data(dst_attr), dev);
 			goto out_dev_put;
 		}
 
-		if (!dev)
-			goto out;
+		if (dev == NULL)
+			goto out_dev_put;
 
-		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-		if (n) {
-			err = neigh_update(n, NULL, NUD_FAILED, 
-					   NEIGH_UPDATE_F_OVERRIDE|
-					   NEIGH_UPDATE_F_ADMIN);
-			neigh_release(n);
+		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+		if (neigh == NULL) {
+			err = -ENOENT;
+			goto out_dev_put;
 		}
+
+		err = neigh_update(neigh, NULL, NUD_FAILED,
+				   NEIGH_UPDATE_F_OVERRIDE |
+				   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
 		goto out_dev_put;
 	}
 	read_unlock(&neigh_tbl_lock);
-	err = -EADDRNOTAVAIL;
+	err = -EAFNOSUPPORT;
+
 out_dev_put:
 	if (dev)
 		dev_put(dev);
@@ -1491,76 +1502,93 @@
 
 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
-	struct rtattr **nda = arg;
+	struct ndmsg *ndm;
+	struct nlattr *tb[NDA_MAX+1];
 	struct neigh_table *tbl;
 	struct net_device *dev = NULL;
-	int err = -ENODEV;
+	int err;
 
-	if (ndm->ndm_ifindex &&
-	    (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
 		goto out;
 
+	err = -EINVAL;
+	if (tb[NDA_DST] == NULL)
+		goto out;
+
+	ndm = nlmsg_data(nlh);
+	if (ndm->ndm_ifindex) {
+		dev = dev_get_by_index(ndm->ndm_ifindex);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto out;
+		}
+
+		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+			goto out_dev_put;
+	}
+
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-		struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1];
-		struct rtattr *dst_attr = nda[NDA_DST - 1];
-		int override = 1;
-		struct neighbour *n;
+		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+		struct neighbour *neigh;
+		void *dst, *lladdr;
 
 		if (tbl->family != ndm->ndm_family)
 			continue;
 		read_unlock(&neigh_tbl_lock);
 
-		err = -EINVAL;
-		if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len)
+		if (nla_len(tb[NDA_DST]) < tbl->key_len)
 			goto out_dev_put;
+		dst = nla_data(tb[NDA_DST]);
+		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
+			struct pneigh_entry *pn;
+
 			err = -ENOBUFS;
-			if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1))
+			pn = pneigh_lookup(tbl, dst, dev, 1);
+			if (pn) {
+				pn->flags = ndm->ndm_flags;
 				err = 0;
+			}
 			goto out_dev_put;
 		}
 
-		err = -EINVAL;
-		if (!dev)
-			goto out;
-		if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len)
+		if (dev == NULL)
 			goto out_dev_put;
+
+		neigh = neigh_lookup(tbl, dst, dev);
+		if (neigh == NULL) {
+			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+				err = -ENOENT;
+				goto out_dev_put;
+			}
 	
-		n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev);
-		if (n) {
+			neigh = __neigh_lookup_errno(tbl, dst, dev);
+			if (IS_ERR(neigh)) {
+				err = PTR_ERR(neigh);
+				goto out_dev_put;
+			}
+		} else {
 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
 				err = -EEXIST;
-				neigh_release(n);
+				neigh_release(neigh);
 				goto out_dev_put;
 			}
-			
-			override = nlh->nlmsg_flags & NLM_F_REPLACE;
-		} else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
-			err = -ENOENT;
-			goto out_dev_put;
-		} else {
-			n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev);
-			if (IS_ERR(n)) {
-				err = PTR_ERR(n);
-				goto out_dev_put;
-			}
+
+			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
 		}
 
-		err = neigh_update(n,
-				   lladdr_attr ? RTA_DATA(lladdr_attr) : NULL,
-				   ndm->ndm_state,
-				   (override ? NEIGH_UPDATE_F_OVERRIDE : 0) |
-				   NEIGH_UPDATE_F_ADMIN);
-
-		neigh_release(n);
+		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+		neigh_release(neigh);
 		goto out_dev_put;
 	}
 
 	read_unlock(&neigh_tbl_lock);
-	err = -EADDRNOTAVAIL;
+	err = -EAFNOSUPPORT;
+
 out_dev_put:
 	if (dev)
 		dev_put(dev);
@@ -1570,56 +1598,59 @@
 
 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 {
-	struct rtattr *nest = NULL;
-	
-	nest = RTA_NEST(skb, NDTA_PARMS);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NDTA_PARMS);
+	if (nest == NULL)
+		return -ENOBUFS;
 
 	if (parms->dev)
-		RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
 
-	RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
-	RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
-	RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
-	RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
-	RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
-	RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
-	RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
-	RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
 		      parms->base_reachable_time);
-	RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
-	RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
-	RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
-	RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
-	RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
-	RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
 
-	return RTA_NEST_END(skb, nest);
+	return nla_nest_end(skb, nest);
 
-rtattr_failure:
-	return RTA_NEST_CANCEL(skb, nest);
+nla_put_failure:
+	return nla_nest_cancel(skb, nest);
 }
 
-static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
-			      struct netlink_callback *cb)
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+			      u32 pid, u32 seq, int type, int flags)
 {
 	struct nlmsghdr *nlh;
 	struct ndtmsg *ndtmsg;
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-			       NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndtmsg = NLMSG_DATA(nlh);
+	ndtmsg = nlmsg_data(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
 	ndtmsg->ndtm_pad1   = 0;
 	ndtmsg->ndtm_pad2   = 0;
 
-	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
-	RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
-	RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
-	RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
-	RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
 
 	{
 		unsigned long now = jiffies;
@@ -1638,7 +1669,7 @@
 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
 		};
 
-		RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
 	}
 
 	{
@@ -1663,55 +1694,50 @@
 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
 		}
 
-		RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
 	}
 
 	BUG_ON(tbl->parms.dev);
 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
 
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_END(skb, nlh);
+	return nlmsg_end(skb, nlh);
 
-rtattr_failure:
+nla_put_failure:
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_CANCEL(skb, nlh);
- 
-nlmsg_failure:
-	return -1;
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int neightbl_fill_param_info(struct neigh_table *tbl,
+static int neightbl_fill_param_info(struct sk_buff *skb,
+				    struct neigh_table *tbl,
 				    struct neigh_parms *parms,
-				    struct sk_buff *skb,
-				    struct netlink_callback *cb)
+				    u32 pid, u32 seq, int type,
+				    unsigned int flags)
 {
 	struct ndtmsg *ndtmsg;
 	struct nlmsghdr *nlh;
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
-			       NLM_F_MULTI);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
 
-	ndtmsg = NLMSG_DATA(nlh);
+	ndtmsg = nlmsg_data(nlh);
 
 	read_lock_bh(&tbl->lock);
 	ndtmsg->ndtm_family = tbl->family;
 	ndtmsg->ndtm_pad1   = 0;
 	ndtmsg->ndtm_pad2   = 0;
-	RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
 
-	if (neightbl_fill_parms(skb, parms) < 0)
-		goto rtattr_failure;
+	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
+	    neightbl_fill_parms(skb, parms) < 0)
+		goto errout;
 
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_END(skb, nlh);
-
-rtattr_failure:
+	return nlmsg_end(skb, nlh);
+errout:
 	read_unlock_bh(&tbl->lock);
-	return NLMSG_CANCEL(skb, nlh);
-
-nlmsg_failure:
-	return -1;
+	return nlmsg_cancel(skb, nlh);
 }
  
 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
@@ -1727,28 +1753,61 @@
 	return NULL;
 }
 
+static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+	[NDTA_NAME]		= { .type = NLA_STRING },
+	[NDTA_THRESH1]		= { .type = NLA_U32 },
+	[NDTA_THRESH2]		= { .type = NLA_U32 },
+	[NDTA_THRESH3]		= { .type = NLA_U32 },
+	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
+	[NDTA_PARMS]		= { .type = NLA_NESTED },
+};
+
+static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
+	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
+	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
+	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
+	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
+	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
+	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
+	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
+};
+
 int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct neigh_table *tbl;
-	struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
-	struct rtattr **tb = arg;
-	int err = -EINVAL;
+	struct ndtmsg *ndtmsg;
+	struct nlattr *tb[NDTA_MAX+1];
+	int err;
 
-	if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+			  nl_neightbl_policy);
+	if (err < 0)
+		goto errout;
 
+	if (tb[NDTA_NAME] == NULL) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	ndtmsg = nlmsg_data(nlh);
 	read_lock(&neigh_tbl_lock);
 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
 			continue;
 
-		if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
+		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
 			break;
 	}
 
 	if (tbl == NULL) {
 		err = -ENOENT;
-		goto errout;
+		goto errout_locked;
 	}
 
 	/* 
@@ -1757,165 +1816,178 @@
 	 */
 	write_lock_bh(&tbl->lock);
 
-	if (tb[NDTA_THRESH1 - 1])
-		tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
-
-	if (tb[NDTA_THRESH2 - 1])
-		tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
-
-	if (tb[NDTA_THRESH3 - 1])
-		tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
-
-	if (tb[NDTA_GC_INTERVAL - 1])
-		tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
-
-	if (tb[NDTA_PARMS - 1]) {
-		struct rtattr *tbp[NDTPA_MAX];
+	if (tb[NDTA_PARMS]) {
+		struct nlattr *tbp[NDTPA_MAX+1];
 		struct neigh_parms *p;
-		u32 ifindex = 0;
+		int i, ifindex = 0;
 
-		if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
-			goto rtattr_failure;
+		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+				       nl_ntbl_parm_policy);
+		if (err < 0)
+			goto errout_tbl_lock;
 
-		if (tbp[NDTPA_IFINDEX - 1])
-			ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
+		if (tbp[NDTPA_IFINDEX])
+			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
 		p = lookup_neigh_params(tbl, ifindex);
 		if (p == NULL) {
 			err = -ENOENT;
-			goto rtattr_failure;
+			goto errout_tbl_lock;
 		}
-	
-		if (tbp[NDTPA_QUEUE_LEN - 1])
-			p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
 
-		if (tbp[NDTPA_PROXY_QLEN - 1])
-			p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
+		for (i = 1; i <= NDTPA_MAX; i++) {
+			if (tbp[i] == NULL)
+				continue;
 
-		if (tbp[NDTPA_APP_PROBES - 1])
-			p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
-
-		if (tbp[NDTPA_UCAST_PROBES - 1])
-			p->ucast_probes =
-			   RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
-
-		if (tbp[NDTPA_MCAST_PROBES - 1])
-			p->mcast_probes =
-			   RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
-
-		if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
-			p->base_reachable_time =
-			   RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
-
-		if (tbp[NDTPA_GC_STALETIME - 1])
-			p->gc_staletime =
-			   RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
-
-		if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
-			p->delay_probe_time =
-			   RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
-
-		if (tbp[NDTPA_RETRANS_TIME - 1])
-			p->retrans_time =
-			   RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
-
-		if (tbp[NDTPA_ANYCAST_DELAY - 1])
-			p->anycast_delay =
-			   RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
-
-		if (tbp[NDTPA_PROXY_DELAY - 1])
-			p->proxy_delay =
-			   RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
-
-		if (tbp[NDTPA_LOCKTIME - 1])
-			p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
+			switch (i) {
+			case NDTPA_QUEUE_LEN:
+				p->queue_len = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_PROXY_QLEN:
+				p->proxy_qlen = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_APP_PROBES:
+				p->app_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_UCAST_PROBES:
+				p->ucast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_MCAST_PROBES:
+				p->mcast_probes = nla_get_u32(tbp[i]);
+				break;
+			case NDTPA_BASE_REACHABLE_TIME:
+				p->base_reachable_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_GC_STALETIME:
+				p->gc_staletime = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_DELAY_PROBE_TIME:
+				p->delay_probe_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_RETRANS_TIME:
+				p->retrans_time = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_ANYCAST_DELAY:
+				p->anycast_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_PROXY_DELAY:
+				p->proxy_delay = nla_get_msecs(tbp[i]);
+				break;
+			case NDTPA_LOCKTIME:
+				p->locktime = nla_get_msecs(tbp[i]);
+				break;
+			}
+		}
 	}
 
+	if (tb[NDTA_THRESH1])
+		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
+
+	if (tb[NDTA_THRESH2])
+		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
+
+	if (tb[NDTA_THRESH3])
+		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
+
+	if (tb[NDTA_GC_INTERVAL])
+		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
+
 	err = 0;
 
-rtattr_failure:
+errout_tbl_lock:
 	write_unlock_bh(&tbl->lock);
-errout:
+errout_locked:
 	read_unlock(&neigh_tbl_lock);
+errout:
 	return err;
 }
 
 int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx, family;
-	int s_idx = cb->args[0];
+	int family, tidx, nidx = 0;
+	int tbl_skip = cb->args[0];
+	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
-	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
 	read_lock(&neigh_tbl_lock);
-	for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
+	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
 		struct neigh_parms *p;
 
-		if (idx < s_idx || (family && tbl->family != family))
+		if (tidx < tbl_skip || (family && tbl->family != family))
 			continue;
 
-		if (neightbl_fill_info(tbl, skb, cb) <= 0)
+		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+				       NLM_F_MULTI) <= 0)
 			break;
 
-		for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
-			if (idx < s_idx)
+		for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+			if (nidx < neigh_skip)
 				continue;
 
-			if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
+			if (neightbl_fill_param_info(skb, tbl, p,
+						     NETLINK_CB(cb->skb).pid,
+						     cb->nlh->nlmsg_seq,
+						     RTM_NEWNEIGHTBL,
+						     NLM_F_MULTI) <= 0)
 				goto out;
 		}
 
+		neigh_skip = 0;
 	}
 out:
 	read_unlock(&neigh_tbl_lock);
-	cb->args[0] = idx;
+	cb->args[0] = tidx;
+	cb->args[1] = nidx;
 
 	return skb->len;
 }
 
-static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
-			   u32 pid, u32 seq, int event, unsigned int flags)
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+			   u32 pid, u32 seq, int type, unsigned int flags)
 {
 	unsigned long now = jiffies;
-	unsigned char *b = skb->tail;
 	struct nda_cacheinfo ci;
-	int locked = 0;
-	u32 probes;
-	struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
-					 sizeof(struct ndmsg), flags);
-	struct ndmsg *ndm = NLMSG_DATA(nlh);
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
 
-	ndm->ndm_family	 = n->ops->family;
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family	 = neigh->ops->family;
 	ndm->ndm_pad1    = 0;
 	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags	 = n->flags;
-	ndm->ndm_type	 = n->type;
-	ndm->ndm_ifindex = n->dev->ifindex;
-	RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
-	read_lock_bh(&n->lock);
-	locked		 = 1;
-	ndm->ndm_state	 = n->nud_state;
-	if (n->nud_state & NUD_VALID)
-		RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
-	ci.ndm_used	 = now - n->used;
-	ci.ndm_confirmed = now - n->confirmed;
-	ci.ndm_updated	 = now - n->updated;
-	ci.ndm_refcnt	 = atomic_read(&n->refcnt) - 1;
-	probes = atomic_read(&n->probes);
-	read_unlock_bh(&n->lock);
-	locked		 = 0;
-	RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
-	RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes);
-	nlh->nlmsg_len	 = skb->tail - b;
-	return skb->len;
+	ndm->ndm_flags	 = neigh->flags;
+	ndm->ndm_type	 = neigh->type;
+	ndm->ndm_ifindex = neigh->dev->ifindex;
 
-nlmsg_failure:
-rtattr_failure:
-	if (locked)
-		read_unlock_bh(&n->lock);
-	skb_trim(skb, b - skb->data);
-	return -1;
+	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
+
+	read_lock_bh(&neigh->lock);
+	ndm->ndm_state	 = neigh->nud_state;
+	if ((neigh->nud_state & NUD_VALID) &&
+	    nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
+		read_unlock_bh(&neigh->lock);
+		goto nla_put_failure;
+	}
+
+	ci.ndm_used	 = now - neigh->used;
+	ci.ndm_confirmed = now - neigh->confirmed;
+	ci.ndm_updated	 = now - neigh->updated;
+	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
+	read_unlock_bh(&neigh->lock);
+
+	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
+	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 
@@ -1959,7 +2031,7 @@
 	int t, family, s_t;
 
 	read_lock(&neigh_tbl_lock);
-	family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 	s_t = cb->args[0];
 
 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
@@ -2338,41 +2410,35 @@
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_ARPD
+static void __neigh_notify(struct neighbour *n, int type, int flags)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = neigh_fill_info(skb, n, 0, 0, type, flags);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_NEIGH, err);
+}
+
 void neigh_app_ns(struct neighbour *n)
 {
-	struct nlmsghdr  *nlh;
-	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
-
-	if (!skb)
-		return;
-
-	if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
-		kfree_skb(skb);
-		return;
-	}
-	nlh			   = (struct nlmsghdr *)skb->data;
-	nlh->nlmsg_flags	   = NLM_F_REQUEST;
-	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
 }
 
 static void neigh_app_notify(struct neighbour *n)
 {
-	struct nlmsghdr *nlh;
-	int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-	struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
-
-	if (!skb)
-		return;
-
-	if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
-		kfree_skb(skb);
-		return;
-	}
-	nlh			   = (struct nlmsghdr *)skb->data;
-	NETLINK_CB(skb).dst_group  = RTNLGRP_NEIGH;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC);
+	__neigh_notify(n, RTM_NEWNEIGH, 0);
 }
 
 #endif /* CONFIG_ARPD */
@@ -2386,7 +2452,7 @@
 	ctl_table		neigh_neigh_dir[2];
 	ctl_table		neigh_proto_dir[2];
 	ctl_table		neigh_root_dir[2];
-} neigh_sysctl_template = {
+} neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
 		{
 			.ctl_name	= NET_NEIGH_MCAST_SOLICIT,
@@ -2659,7 +2725,6 @@
 #endif	/* CONFIG_SYSCTL */
 
 EXPORT_SYMBOL(__neigh_event_send);
-EXPORT_SYMBOL(neigh_add);
 EXPORT_SYMBOL(neigh_changeaddr);
 EXPORT_SYMBOL(neigh_compat_output);
 EXPORT_SYMBOL(neigh_connected_output);
@@ -2679,11 +2744,8 @@
 EXPORT_SYMBOL(neigh_table_init);
 EXPORT_SYMBOL(neigh_table_init_no_netlink);
 EXPORT_SYMBOL(neigh_update);
-EXPORT_SYMBOL(neigh_update_hhs);
 EXPORT_SYMBOL(pneigh_enqueue);
 EXPORT_SYMBOL(pneigh_lookup);
-EXPORT_SYMBOL(neightbl_dump_info);
-EXPORT_SYMBOL(neightbl_set);
 
 #ifdef CONFIG_ARPD
 EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 471da45..ead5920 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -110,7 +110,7 @@
 
 	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
 
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !(u16)csum_fold(csum_add(psum, skb->csum)))
 		return 0;
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6a7320b..72145d4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1786,7 +1786,7 @@
 			 * use ipv6_get_lladdr if/when it's get exported
 			 */
 
-			read_lock(&addrconf_lock);
+			rcu_read_lock();
 			if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) {
 				struct inet6_ifaddr *ifp;
 
@@ -1805,7 +1805,7 @@
 				}
 				read_unlock_bh(&idev->lock);
 			}
-			read_unlock(&addrconf_lock);
+			rcu_read_unlock();
 			if (err)
 				printk("pktgen: ERROR: IPv6 link address not availble.\n");
 		}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 30cc1ba..d8e25e0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/security.h>
 #include <linux/mutex.h>
+#include <linux/if_addr.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -49,6 +50,7 @@
 #include <net/udp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
+#include <net/fib_rules.h>
 #include <net/netlink.h>
 #ifdef CONFIG_NET_WIRELESS_RTNETLINK
 #include <linux/wireless.h>
@@ -56,6 +58,7 @@
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
 static DEFINE_MUTEX(rtnl_mutex);
+static struct sock *rtnl;
 
 void rtnl_lock(void)
 {
@@ -93,8 +96,6 @@
 	return 0;
 }
 
-struct sock *rtnl;
-
 struct rtnetlink_link * rtnetlink_links[NPROTO];
 
 static const int rtm_min[RTM_NR_FAMILIES] =
@@ -102,8 +103,7 @@
 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
 	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
 	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)),
-	[RTM_FAM(RTM_NEWNEIGH)]     = NLMSG_LENGTH(sizeof(struct ndmsg)),
-	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct rtmsg)),
+	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
 	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)),
 	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)),
@@ -111,7 +111,6 @@
 	[RTM_FAM(RTM_NEWPREFIX)]    = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
 	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
-	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NLMSG_LENGTH(sizeof(struct ndtmsg)),
 };
 
 static const int rta_max[RTM_NR_FAMILIES] =
@@ -119,13 +118,11 @@
 	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX,
 	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX,
 	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX,
-	[RTM_FAM(RTM_NEWNEIGH)]     = NDA_MAX,
-	[RTM_FAM(RTM_NEWRULE)]      = RTA_MAX,
+	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX,
 	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX,
 	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX,
 	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX,
 	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX,
-	[RTM_FAM(RTM_NEWNEIGHTBL)]  = NDTA_MAX,
 };
 
 void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -168,24 +165,52 @@
 	return err;
 }
 
+int rtnl_unicast(struct sk_buff *skb, u32 pid)
+{
+	return nlmsg_unicast(rtnl, skb, pid);
+}
+
+int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+		struct nlmsghdr *nlh, gfp_t flags)
+{
+	int report = 0;
+
+	if (nlh)
+		report = nlmsg_report(nlh);
+
+	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+}
+
+void rtnl_set_sk_err(u32 group, int error)
+{
+	netlink_set_err(rtnl, 0, group, error);
+}
+
 int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 {
-	struct rtattr *mx = (struct rtattr*)skb->tail;
-	int i;
+	struct nlattr *mx;
+	int i, valid = 0;
 
-	RTA_PUT(skb, RTA_METRICS, 0, NULL);
-	for (i=0; i<RTAX_MAX; i++) {
-		if (metrics[i])
-			RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+	mx = nla_nest_start(skb, RTA_METRICS);
+	if (mx == NULL)
+		return -ENOBUFS;
+
+	for (i = 0; i < RTAX_MAX; i++) {
+		if (metrics[i]) {
+			valid++;
+			NLA_PUT_U32(skb, i+1, metrics[i]);
+		}
 	}
-	mx->rta_len = skb->tail - (u8*)mx;
-	if (mx->rta_len == RTA_LENGTH(0))
-		skb_trim(skb, (u8*)mx - skb->data);
-	return 0;
 
-rtattr_failure:
-	skb_trim(skb, (u8*)mx - skb->data);
-	return -1;
+	if (!valid) {
+		nla_nest_cancel(skb, mx);
+		return 0;
+	}
+
+	return nla_nest_end(skb, mx);
+
+nla_put_failure:
+	return nla_nest_cancel(skb, mx);
 }
 
 
@@ -216,41 +241,73 @@
 	}
 }
 
-static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
-				 int type, u32 pid, u32 seq, u32 change, 
-				 unsigned int flags)
+static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
+				 struct net_device_stats *b)
 {
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
 
-	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev_get_flags(dev);
-	r->ifi_change = change;
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
 
-	RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
 
-	if (1) {
-		u32 txqlen = dev->tx_queue_len;
-		RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen);
-	}
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
 
-	if (1) {
-		u32 weight = dev->weight;
-		RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight);
-	}
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+};
 
-	if (1) {
-		u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN;
-		u8 link_mode = dev->link_mode;
-		RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
-		RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
-	}
+static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+			    void *iwbuf, int iwbuflen, int type, u32 pid,
+			    u32 seq, u32 change, unsigned int flags)
+{
+	struct ifinfomsg *ifm;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifi_family = AF_UNSPEC;
+	ifm->__ifi_pad = 0;
+	ifm->ifi_type = dev->type;
+	ifm->ifi_index = dev->ifindex;
+	ifm->ifi_flags = dev_get_flags(dev);
+	ifm->ifi_change = change;
+
+	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
+	NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
+	NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
+	NLA_PUT_U8(skb, IFLA_OPERSTATE,
+		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
+	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
+	NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+
+	if (dev->ifindex != dev->iflink)
+		NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
+
+	if (dev->master)
+		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
+
+	if (dev->qdisc_sleeping)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
 
 	if (1) {
 		struct rtnl_link_ifmap map = {
@@ -261,58 +318,38 @@
 			.dma         = dev->dma,
 			.port        = dev->if_port,
 		};
-		RTA_PUT(skb, IFLA_MAP, sizeof(map), &map);
+		NLA_PUT(skb, IFLA_MAP, sizeof(map), &map);
 	}
 
 	if (dev->addr_len) {
-		RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
-		RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
-	}
-
-	if (1) {
-		u32 mtu = dev->mtu;
-		RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
-	}
-
-	if (dev->ifindex != dev->iflink) {
-		u32 iflink = dev->iflink;
-		RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink);
-	}
-
-	if (dev->qdisc_sleeping)
-		RTA_PUT(skb, IFLA_QDISC,
-			strlen(dev->qdisc_sleeping->ops->id) + 1,
-			dev->qdisc_sleeping->ops->id);
-	
-	if (dev->master) {
-		u32 master = dev->master->ifindex;
-		RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master);
+		NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+		NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
 	}
 
 	if (dev->get_stats) {
-		unsigned long *stats = (unsigned long*)dev->get_stats(dev);
+		struct net_device_stats *stats = dev->get_stats(dev);
 		if (stats) {
-			struct rtattr  *a;
-			__u32	       *s;
-			int		i;
-			int		n = sizeof(struct rtnl_link_stats)/4;
+			struct nlattr *attr;
 
-			a = __RTA_PUT(skb, IFLA_STATS, n*4);
-			s = RTA_DATA(a);
-			for (i=0; i<n; i++)
-				s[i] = stats[i];
+			attr = nla_reserve(skb, IFLA_STATS,
+					   sizeof(struct rtnl_link_stats));
+			if (attr == NULL)
+				goto nla_put_failure;
+
+			copy_rtnl_link_stats(nla_data(attr), stats);
 		}
 	}
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (iwbuf)
+		NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->args[0];
@@ -322,10 +359,9 @@
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
-		if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
-					  NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, 0,
-					  NLM_F_MULTI) <= 0)
+		if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
+				     NETLINK_CB(cb->skb).pid,
+				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
 			break;
 	}
 	read_unlock(&dev_base_lock);
@@ -334,52 +370,70 @@
 	return skb->len;
 }
 
-static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
+	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
+	[IFLA_MTU]		= { .type = NLA_U32 },
+	[IFLA_TXQLEN]		= { .type = NLA_U32 },
+	[IFLA_WEIGHT]		= { .type = NLA_U32 },
+	[IFLA_OPERSTATE]	= { .type = NLA_U8 },
+	[IFLA_LINKMODE]		= { .type = NLA_U8 },
+};
+
+static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct ifinfomsg  *ifm = NLMSG_DATA(nlh);
-	struct rtattr    **ida = arg;
+	struct ifinfomsg *ifm;
 	struct net_device *dev;
-	int err, send_addr_notify = 0;
+	int err, send_addr_notify = 0, modified = 0;
+	struct nlattr *tb[IFLA_MAX+1];
+	char ifname[IFNAMSIZ];
 
-	if (ifm->ifi_index >= 0)
-		dev = dev_get_by_index(ifm->ifi_index);
-	else if (ida[IFLA_IFNAME - 1]) {
-		char ifname[IFNAMSIZ];
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
 
-		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-		                   IFNAMSIZ) >= IFNAMSIZ)
-			return -EINVAL;
-		dev = dev_get_by_name(ifname);
-	} else
-		return -EINVAL;
-
-	if (!dev)
-		return -ENODEV;
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		ifname[0] = '\0';
 
 	err = -EINVAL;
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index >= 0)
+		dev = dev_get_by_index(ifm->ifi_index);
+	else if (tb[IFLA_IFNAME])
+		dev = dev_get_by_name(ifname);
+	else
+		goto errout;
 
-	if (ifm->ifi_flags)
-		dev_change_flags(dev, ifm->ifi_flags);
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
 
-	if (ida[IFLA_MAP - 1]) {
+	if (tb[IFLA_ADDRESS] &&
+	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+		goto errout_dev;
+
+	if (tb[IFLA_BROADCAST] &&
+	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+		goto errout_dev;
+
+	if (tb[IFLA_MAP]) {
 		struct rtnl_link_ifmap *u_map;
 		struct ifmap k_map;
 
 		if (!dev->set_config) {
 			err = -EOPNOTSUPP;
-			goto out;
+			goto errout_dev;
 		}
 
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto out;
+			goto errout_dev;
 		}
-		
-		if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map)))
-			goto out;
 
-		u_map = RTA_DATA(ida[IFLA_MAP - 1]);
-
+		u_map = nla_data(tb[IFLA_MAP]);
 		k_map.mem_start = (unsigned long) u_map->mem_start;
 		k_map.mem_end = (unsigned long) u_map->mem_end;
 		k_map.base_addr = (unsigned short) u_map->base_addr;
@@ -388,200 +442,175 @@
 		k_map.port = (unsigned char) u_map->port;
 
 		err = dev->set_config(dev, &k_map);
+		if (err < 0)
+			goto errout_dev;
 
-		if (err)
-			goto out;
+		modified = 1;
 	}
 
-	if (ida[IFLA_ADDRESS - 1]) {
+	if (tb[IFLA_ADDRESS]) {
 		struct sockaddr *sa;
 		int len;
 
 		if (!dev->set_mac_address) {
 			err = -EOPNOTSUPP;
-			goto out;
+			goto errout_dev;
 		}
+
 		if (!netif_device_present(dev)) {
 			err = -ENODEV;
-			goto out;
+			goto errout_dev;
 		}
-		if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-			goto out;
 
 		len = sizeof(sa_family_t) + dev->addr_len;
 		sa = kmalloc(len, GFP_KERNEL);
 		if (!sa) {
 			err = -ENOMEM;
-			goto out;
+			goto errout_dev;
 		}
 		sa->sa_family = dev->type;
-		memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]),
+		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
 		       dev->addr_len);
 		err = dev->set_mac_address(dev, sa);
 		kfree(sa);
 		if (err)
-			goto out;
+			goto errout_dev;
 		send_addr_notify = 1;
+		modified = 1;
 	}
 
-	if (ida[IFLA_BROADCAST - 1]) {
-		if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len))
-			goto out;
-		memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]),
-		       dev->addr_len);
-		send_addr_notify = 1;
+	if (tb[IFLA_MTU]) {
+		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
 	}
 
-	if (ida[IFLA_MTU - 1]) {
-		if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
-		err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1])));
-
-		if (err)
-			goto out;
-
-	}
-
-	if (ida[IFLA_TXQLEN - 1]) {
-		if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
-
-		dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1]));
-	}
-
-	if (ida[IFLA_WEIGHT - 1]) {
-		if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32)))
-			goto out;
-
-		dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
-	}
-
-	if (ida[IFLA_OPERSTATE - 1]) {
-		if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-			goto out;
-
-		set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1])));
-	}
-
-	if (ida[IFLA_LINKMODE - 1]) {
-		if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
-			goto out;
-
-		write_lock_bh(&dev_base_lock);
-		dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1]));
-		write_unlock_bh(&dev_base_lock);
-	}
-
-	if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
-		char ifname[IFNAMSIZ];
-
-		if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1],
-		                   IFNAMSIZ) >= IFNAMSIZ)
-			goto out;
+	/*
+	 * Interface selected by interface index but interface
+	 * name provided implies that a name change has been
+	 * requested.
+	 */
+	if (ifm->ifi_index >= 0 && ifname[0]) {
 		err = dev_change_name(dev, ifname);
-		if (err)
-			goto out;
+		if (err < 0)
+			goto errout_dev;
+		modified = 1;
 	}
 
 #ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (ida[IFLA_WIRELESS - 1]) {
-
+	if (tb[IFLA_WIRELESS]) {
 		/* Call Wireless Extensions.
 		 * Various stuff checked in there... */
-		err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len);
-		if (err)
-			goto out;
+		err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]));
+		if (err < 0)
+			goto errout_dev;
 	}
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
+	if (tb[IFLA_BROADCAST]) {
+		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
+		send_addr_notify = 1;
+	}
+
+
+	if (ifm->ifi_flags)
+		dev_change_flags(dev, ifm->ifi_flags);
+
+	if (tb[IFLA_TXQLEN])
+		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+
+	if (tb[IFLA_WEIGHT])
+		dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
+
+	if (tb[IFLA_OPERSTATE])
+		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+
+	if (tb[IFLA_LINKMODE]) {
+		write_lock_bh(&dev_base_lock);
+		dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+		write_unlock_bh(&dev_base_lock);
+	}
+
 	err = 0;
 
-out:
+errout_dev:
+	if (err < 0 && modified && net_ratelimit())
+		printk(KERN_WARNING "A link change request failed with "
+		       "some changes comitted already. Interface %s may "
+		       "have been left with an inconsistent configuration, "
+		       "please check.\n", dev->name);
+
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 
 	dev_put(dev);
+errout:
 	return err;
 }
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg)
+static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct ifinfomsg  *ifm = NLMSG_DATA(in_nlh);
-	struct rtattr    **ida = arg;
-	struct net_device *dev;
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	int err = -ENOBUFS;
-	struct sk_buff *skb;
-	unsigned char	 *b;
-	char *iw_buf = NULL;
+	struct ifinfomsg *ifm;
+	struct nlattr *tb[IFLA_MAX+1];
+	struct net_device *dev = NULL;
+	struct sk_buff *nskb;
+	char *iw_buf = NULL, *iw = NULL;
 	int iw_buf_len = 0;
+	int err, payload;
 
-	if (ifm->ifi_index >= 0)
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_index >= 0) {
 		dev = dev_get_by_index(ifm->ifi_index);
-	else
+		if (dev == NULL)
+			return -ENODEV;
+	} else
 		return -EINVAL;
-	if (!dev)
-		return -ENODEV;
+
 
 #ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (ida[IFLA_WIRELESS - 1]) {
-
+	if (tb[IFLA_WIRELESS]) {
 		/* Call Wireless Extensions. We need to know the size before
 		 * we can alloc. Various stuff checked in there... */
-		err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len);
-		if (err)
-			goto out;
+		err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
+					     nla_len(tb[IFLA_WIRELESS]),
+					     &iw_buf, &iw_buf_len);
+		if (err < 0)
+			goto errout;
+
+		iw += IW_EV_POINT_OFF;
 	}
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-	/* Create a skb big enough to include all the data.
-	 * Some requests are way bigger than 4k... Jean II */
-	skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)),
-			GFP_KERNEL);
-	if (!skb)
-		goto out;
-	b = skb->tail;
+	payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) +
+			      nla_total_size(iw_buf_len));
+	nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	if (nskb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
-	/* Put in the message the usual good stuff */
-	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq,
-			RTM_NEWLINK, sizeof(*r));
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev->flags;
-	r->ifi_change = 0;
+	err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK,
+			       NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0);
+	if (err <= 0) {
+		kfree_skb(nskb);
+		goto errout;
+	}
 
-	/* Put the wireless payload if it exist */
-	if(iw_buf != NULL)
-		RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len,
-			iw_buf + IW_EV_POINT_OFF);
-
-	nlh->nlmsg_len = skb->tail - b;
-
-	/* Needed ? */
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:
-	if(iw_buf != NULL)
-		kfree(iw_buf);
+	err = rtnl_unicast(skb, NETLINK_CB(skb).pid);
+errout:
+	kfree(iw_buf);
 	dev_put(dev);
+
 	return err;
-
-rtattr_failure:
-nlmsg_failure:
-	kfree_skb(skb);
-	goto out;
 }
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
-static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->family;
@@ -608,20 +637,22 @@
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) +
-			       sizeof(struct rtnl_link_ifmap) +
-			       sizeof(struct rtnl_link_stats) + 128);
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb)
-		return;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
 
-	if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) {
+	err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_LINK, err);
 }
 
 /* Protected by RTNL sempahore.  */
@@ -746,18 +777,19 @@
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
-	[RTM_GETLINK     - RTM_BASE] = {
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-					 .doit   = do_getlink,
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-					 .dumpit = rtnetlink_dump_ifinfo },
-	[RTM_SETLINK     - RTM_BASE] = { .doit   = do_setlink		 },
-	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
-	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+	[RTM_GETLINK     - RTM_BASE] = { .doit   = rtnl_getlink,
+					 .dumpit = rtnl_dump_ifinfo	 },
+	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
+	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
+	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
 	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
 	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
-	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnetlink_dump_all	 },
+#ifdef CONFIG_FIB_RULES
+	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
+	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
+#endif
+	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
 	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
 };
@@ -817,7 +849,9 @@
 EXPORT_SYMBOL(rtattr_parse);
 EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
-EXPORT_SYMBOL(rtnl);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
 EXPORT_SYMBOL(rtnl_unlock);
+EXPORT_SYMBOL(rtnl_unicast);
+EXPORT_SYMBOL(rtnl_notify);
+EXPORT_SYMBOL(rtnl_set_sk_err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c54f366..c448c7f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1397,7 +1397,7 @@
 	unsigned int csum;
 	long csstart;
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		csstart = skb->h.raw - skb->data;
 	else
 		csstart = skb_headlen(skb);
@@ -1411,7 +1411,7 @@
 		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
 					      skb->len - csstart, 0);
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		long csstuff = csstart + skb->csum;
 
 		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
@@ -1898,10 +1898,10 @@
  *	@len: length of data pulled
  *
  *	This function performs an skb_pull on the packet and updates
- *	update the CHECKSUM_HW checksum.  It should be used on receive
- *	path processing instead of skb_pull unless you know that the
- *	checksum difference is zero (e.g., a valid IP header) or you
- *	are setting ip_summed to CHECKSUM_NONE.
+ *	update the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_pull unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
  */
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
@@ -1994,7 +1994,7 @@
 		frag = skb_shinfo(nskb)->frags;
 		k = 0;
 
-		nskb->ip_summed = CHECKSUM_HW;
+		nskb->ip_summed = CHECKSUM_PARTIAL;
 		nskb->csum = skb->csum;
 		memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
 
@@ -2046,19 +2046,14 @@
 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 					      sizeof(struct sk_buff),
 					      0,
-					      SLAB_HWCACHE_ALIGN,
+					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					      NULL, NULL);
-	if (!skbuff_head_cache)
-		panic("cannot create skbuff cache");
-
 	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
 						(2*sizeof(struct sk_buff)) +
 						sizeof(atomic_t),
 						0,
-						SLAB_HWCACHE_ALIGN,
+						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL, NULL);
-	if (!skbuff_fclone_cache)
-		panic("cannot create skbuff cache");
 }
 
 EXPORT_SYMBOL(___pskb_trim);
diff --git a/net/core/sock.c b/net/core/sock.c
index 51fcfbc..b77e155 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -187,13 +187,13 @@
 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
 /* Run time adjustable parameters. */
-__u32 sysctl_wmem_max = SK_WMEM_MAX;
-__u32 sysctl_rmem_max = SK_RMEM_MAX;
-__u32 sysctl_wmem_default = SK_WMEM_MAX;
-__u32 sysctl_rmem_default = SK_RMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 /* Maximal space eaten by iovec or ancilliary data plus some space */
-int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
@@ -247,11 +247,7 @@
 		goto out;
 	}
 
-	/* It would be deadlock, if sock_queue_rcv_skb is used
-	   with socket lock! We assume that users of this
-	   function are lock free.
-	*/
-	err = sk_filter(sk, skb, 1);
+	err = sk_filter(sk, skb);
 	if (err)
 		goto out;
 
@@ -278,7 +274,7 @@
 {
 	int rc = NET_RX_SUCCESS;
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
@@ -606,15 +602,15 @@
 			break;
 
 		case SO_DETACH_FILTER:
-			spin_lock_bh(&sk->sk_lock.slock);
-			filter = sk->sk_filter;
+			rcu_read_lock_bh();
+			filter = rcu_dereference(sk->sk_filter);
                         if (filter) {
-				sk->sk_filter = NULL;
-				spin_unlock_bh(&sk->sk_lock.slock);
+				rcu_assign_pointer(sk->sk_filter, NULL);
 				sk_filter_release(sk, filter);
+				rcu_read_unlock_bh();
 				break;
 			}
-			spin_unlock_bh(&sk->sk_lock.slock);
+			rcu_read_unlock_bh();
 			ret = -ENONET;
 			break;
 
@@ -884,10 +880,10 @@
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
 
-	filter = sk->sk_filter;
+	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		sk_filter_release(sk, filter);
-		sk->sk_filter = NULL;
+		rcu_assign_pointer(sk->sk_filter, NULL);
 	}
 
 	sock_disable_timestamp(sk);
@@ -911,7 +907,7 @@
 	if (newsk != NULL) {
 		struct sk_filter *filter;
 
-		memcpy(newsk, sk, sk->sk_prot->obj_size);
+		sock_copy(newsk, sk);
 
 		/* SANITY */
 		sk_node_init(&newsk->sk_node);
diff --git a/net/core/utils.c b/net/core/utils.c
index e31c90e..2682490 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -4,6 +4,7 @@
  *	Authors:
  *	net_random Alan Cox
  *	net_ratelimit Andy Kleen
+ *	in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project
  *
  *	Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  *
@@ -191,3 +192,215 @@
 }
 
 EXPORT_SYMBOL(in_aton);
+
+#define IN6PTON_XDIGIT		0x00010000
+#define IN6PTON_DIGIT		0x00020000
+#define IN6PTON_COLON_MASK	0x00700000
+#define IN6PTON_COLON_1		0x00100000	/* single : requested */
+#define IN6PTON_COLON_2		0x00200000	/* second : requested */
+#define IN6PTON_COLON_1_2	0x00400000	/* :: requested */
+#define IN6PTON_DOT		0x00800000	/* . */
+#define IN6PTON_DELIM		0x10000000
+#define IN6PTON_NULL		0x20000000	/* first/tail */
+#define IN6PTON_UNKNOWN		0x40000000
+
+static inline int digit2bin(char c, char delim)
+{
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == '.')
+		return IN6PTON_DOT;
+	if (c >= '0' && c <= '9')
+		return (IN6PTON_DIGIT | (c - '0'));
+	return IN6PTON_UNKNOWN;
+}
+
+static inline int xdigit2bin(char c, char delim)
+{
+	if (c == delim || c == '\0')
+		return IN6PTON_DELIM;
+	if (c == ':')
+		return IN6PTON_COLON_MASK;
+	if (c == '.')
+		return IN6PTON_DOT;
+	if (c >= '0' && c <= '9')
+		return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
+	if (c >= 'a' && c <= 'f')
+		return (IN6PTON_XDIGIT | (c - 'a' + 10));
+	if (c >= 'A' && c <= 'F')
+		return (IN6PTON_XDIGIT | (c - 'A' + 10));
+	return IN6PTON_UNKNOWN;
+}
+
+int in4_pton(const char *src, int srclen,
+	     u8 *dst,
+	     char delim, const char **end)
+{
+	const char *s;
+	u8 *d;
+	u8 dbuf[4];
+	int ret = 0;
+	int i;
+	int w = 0;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+	s = src;
+	d = dbuf;
+	i = 0;
+	while(1) {
+		int c;
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) {
+			goto out;
+		}
+		if (c & (IN6PTON_DOT | IN6PTON_DELIM)) {
+			if (w == 0)
+				goto out;
+			*d++ = w & 0xff;
+			w = 0;
+			i++;
+			if (c & IN6PTON_DELIM) {
+				if (i != 4)
+					goto out;
+				break;
+			}
+			goto cont;
+		}
+		w = (w * 10) + c;
+		if ((w & 0xffff) > 255) {
+			goto out;
+		}
+cont:
+		if (i >= 4)
+			goto out;
+		s++;
+		srclen--;
+	}
+	ret = 1;
+	memcpy(dst, dbuf, sizeof(dbuf));
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+
+EXPORT_SYMBOL(in4_pton);
+
+int in6_pton(const char *src, int srclen,
+	     u8 *dst,
+	     char delim, const char **end)
+{
+	const char *s, *tok = NULL;
+	u8 *d, *dc = NULL;
+	u8 dbuf[16];
+	int ret = 0;
+	int i;
+	int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL;
+	int w = 0;
+
+	memset(dbuf, 0, sizeof(dbuf));
+
+	s = src;
+	d = dbuf;
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	while (1) {
+		int c;
+
+		c = xdigit2bin(srclen > 0 ? *s : '\0', delim);
+		if (!(c & state))
+			goto out;
+		if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) {
+			/* process one 16-bit word */
+			if (!(state & IN6PTON_NULL)) {
+				*d++ = (w >> 8) & 0xff;
+				*d++ = w & 0xff;
+			}
+			w = 0;
+			if (c & IN6PTON_DELIM) {
+				/* We've processed last word */
+				break;
+			}
+			/*
+			 * COLON_1 => XDIGIT
+			 * COLON_2 => XDIGIT|DELIM
+			 * COLON_1_2 => COLON_2
+			 */
+			switch (state & IN6PTON_COLON_MASK) {
+			case IN6PTON_COLON_2:
+				dc = d;
+				state = IN6PTON_XDIGIT | IN6PTON_DELIM;
+				if (dc - dbuf >= sizeof(dbuf))
+					state |= IN6PTON_NULL;
+				break;
+			case IN6PTON_COLON_1|IN6PTON_COLON_1_2:
+				state = IN6PTON_XDIGIT | IN6PTON_COLON_2;
+				break;
+			case IN6PTON_COLON_1:
+				state = IN6PTON_XDIGIT;
+				break;
+			case IN6PTON_COLON_1_2:
+				state = IN6PTON_COLON_2;
+				break;
+			default:
+				state = 0;
+			}
+			tok = s + 1;
+			goto cont;
+		}
+
+		if (c & IN6PTON_DOT) {
+			ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s);
+			if (ret > 0) {
+				d += 4;
+				break;
+			}
+			goto out;
+		}
+
+		w = (w << 4) | (0xff & c);
+		state = IN6PTON_COLON_1 | IN6PTON_DELIM;
+		if (!(w & 0xf000)) {
+			state |= IN6PTON_XDIGIT;
+		}
+		if (!dc && d + 2 < dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_COLON_1_2;
+			state &= ~IN6PTON_DELIM;
+		}
+		if (d + 2 >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2);
+		}
+cont:
+		if ((dc && d + 4 < dbuf + sizeof(dbuf)) ||
+		    d + 4 == dbuf + sizeof(dbuf)) {
+			state |= IN6PTON_DOT;
+		}
+		if (d >= dbuf + sizeof(dbuf)) {
+			state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK);
+		}
+		s++;
+		srclen--;
+	}
+
+	i = 15; d--;
+
+	if (dc) {
+		while(d >= dc)
+			dst[i--] = *d--;
+		while(i >= dc - dbuf)
+			dst[i--] = 0;
+		while(i >= 0)
+			dst[i--] = *d--;
+	} else
+		memcpy(dst, dbuf, sizeof(dbuf));
+
+	ret = 1;
+out:
+	if (end)
+		*end = s;
+	return ret;
+}
+
+EXPORT_SYMBOL(in6_pton);
diff --git a/net/core/wireless.c b/net/core/wireless.c
index de0bde4..3168fca 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -72,7 +72,6 @@
 
 /***************************** INCLUDES *****************************/
 
-#include <linux/config.h>		/* Not needed ??? */
 #include <linux/module.h>
 #include <linux/types.h>		/* off_t */
 #include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
@@ -86,6 +85,7 @@
 
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
+#include <net/netlink.h>
 
 #include <asm/uaccess.h>		/* copy_to_user() */
 
@@ -1850,7 +1850,7 @@
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
+		rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 }
 
 static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 8c211c5..4d176d3 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -142,14 +142,13 @@
 	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
 
 	if (av != NULL) {
-		av->dccpav_buf_head	=
-			av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
+		av->dccpav_buf_head	= DCCP_MAX_ACKVEC_LEN - 1;
 		av->dccpav_buf_ackno	= DCCP_MAX_SEQNO + 1;
 		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
 		av->dccpav_ack_ptr	= 0;
 		av->dccpav_time.tv_sec	= 0;
 		av->dccpav_time.tv_usec	= 0;
-		av->dccpav_sent_len	= av->dccpav_vec_len = 0;
+		av->dccpav_vec_len	= 0;
 		INIT_LIST_HEAD(&av->dccpav_records);
 	}
 
@@ -353,11 +352,13 @@
 {
 	struct dccp_ackvec_record *next;
 
-	av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1;
-	if (av->dccpav_buf_tail == 0)
-		av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
-
-	av->dccpav_vec_len -= avr->dccpavr_sent_len;
+	/* sort out vector length */
+	if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
+		av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
+	else
+		av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
+				     - av->dccpav_buf_head
+				     + avr->dccpavr_ack_ptr;
 
 	/* free records */
 	list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
@@ -434,8 +435,7 @@
 		break;
 found:
 		if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
-			const u8 state = (*vector &
-					  DCCP_ACKVEC_STATE_MASK) >> 6;
+			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
 			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
 #ifdef CONFIG_IP_DCCP_DEBUG
 				struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 0adf4b5..2424eff 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -54,9 +54,7 @@
 	struct list_head dccpav_records;
 	struct timeval	dccpav_time;
 	u8		dccpav_buf_head;
-	u8		dccpav_buf_tail;
 	u8		dccpav_ack_ptr;
-	u8		dccpav_sent_len;
 	u8		dccpav_vec_len;
 	u8		dccpav_buf_nonce;
 	u8		dccpav_ack_nonce;
@@ -107,7 +105,7 @@
 
 static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
-	return av->dccpav_sent_len != av->dccpav_vec_len;
+	return av->dccpav_vec_len;
 }
 #else /* CONFIG_IP_DCCP_ACKVEC */
 static inline int dccp_ackvec_init(void)
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index ca00191..32752f7 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -30,6 +30,14 @@
 
 	  If in doubt, say M.
 
+config IP_DCCP_CCID2_DEBUG
+	  bool "CCID2 debug"
+	  depends on IP_DCCP_CCID2
+	  ---help---
+	    Enable CCID2 debug messages.
+
+	    If in doubt, say N.
+
 config IP_DCCP_CCID3
 	tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
 	depends on IP_DCCP
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e961562..457dd3d 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -27,7 +27,6 @@
  *
  * BUGS:
  * - sequence number wrapping
- * - jiffies wrapping
  */
 
 #include "../ccid.h"
@@ -36,8 +35,7 @@
 
 static int ccid2_debug;
 
-#undef CCID2_DEBUG
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 #define ccid2_pr_debug(format, a...) \
         do { if (ccid2_debug) \
                 printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
@@ -46,9 +44,7 @@
 #define ccid2_pr_debug(format, a...)
 #endif
 
-static const int ccid2_seq_len = 128;
-
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
 {
 	int len = 0;
@@ -71,8 +67,8 @@
 
 			/* packets are sent sequentially */
 			BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
-			BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent);
-			BUG_ON(len > ccid2_seq_len);
+			BUG_ON(time_before(seqp->ccid2s_sent,
+					   prev->ccid2s_sent));
 
 			seqp = prev;
 		}
@@ -84,16 +80,57 @@
 	do {
 		seqp = seqp->ccid2s_prev;
 		len++;
-		BUG_ON(len > ccid2_seq_len);
 	} while (seqp != hctx->ccid2hctx_seqh);
 
-	BUG_ON(len != ccid2_seq_len);
 	ccid2_pr_debug("total len=%d\n", len);
+	BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
 }
 #else
 #define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
 #endif
 
+static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num,
+				 gfp_t gfp)
+{
+	struct ccid2_seq *seqp;
+	int i;
+
+	/* check if we have space to preserve the pointer to the buffer */
+	if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
+					sizeof(struct ccid2_seq*)))
+		return -ENOMEM;
+
+	/* allocate buffer and initialize linked list */
+	seqp = kmalloc(sizeof(*seqp) * num, gfp);
+	if (seqp == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < (num - 1); i++) {
+		seqp[i].ccid2s_next = &seqp[i + 1];
+		seqp[i + 1].ccid2s_prev = &seqp[i];
+	}
+	seqp[num - 1].ccid2s_next = seqp;
+	seqp->ccid2s_prev = &seqp[num - 1];
+
+	/* This is the first allocation.  Initiate the head and tail.  */
+	if (hctx->ccid2hctx_seqbufc == 0)
+		hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
+	else {
+		/* link the existing list with the one we just created */
+		hctx->ccid2hctx_seqh->ccid2s_next = seqp;
+		seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
+
+		hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1];
+		seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt;
+	}
+
+	/* store the original pointer to the buffer so we can free it */
+	hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
+	hctx->ccid2hctx_seqbufc++;
+
+	return 0;
+}
+
 static int ccid2_hc_tx_send_packet(struct sock *sk,
 				   struct sk_buff *skb, int len)
 {
@@ -122,7 +159,7 @@
 		}
 	}
 
-	return 100; /* XXX */
+	return 1; /* XXX CCID should dequeue when ready instead of polling */
 }
 
 static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
@@ -150,10 +187,8 @@
 	dp->dccps_l_ack_ratio = val;
 }
 
-static void ccid2_change_cwnd(struct sock *sk, int val)
+static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val)
 {
-	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
-
 	if (val == 0)
 		val = 1;
 
@@ -164,6 +199,17 @@
 	hctx->ccid2hctx_cwnd = val;
 }
 
+static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
+{
+	ccid2_pr_debug("change SRTT to %ld\n", val);
+	hctx->ccid2hctx_srtt = val;
+}
+
+static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
+{
+	hctx->ccid2hctx_pipe = val;
+}
+
 static void ccid2_start_rto_timer(struct sock *sk);
 
 static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -193,11 +239,11 @@
 	ccid2_start_rto_timer(sk);
 
 	/* adjust pipe, cwnd etc */
-	hctx->ccid2hctx_pipe = 0;
+	ccid2_change_pipe(hctx, 0);
 	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
 	if (hctx->ccid2hctx_ssthresh < 2)
 		hctx->ccid2hctx_ssthresh = 2;
-	ccid2_change_cwnd(sk, 1);
+	ccid2_change_cwnd(hctx, 1);
 
 	/* clear state about stuff we sent */
 	hctx->ccid2hctx_seqt	= hctx->ccid2hctx_seqh;
@@ -232,13 +278,14 @@
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	struct ccid2_seq *next;
 	u64 seq;
 
 	ccid2_hc_tx_check_sanity(hctx);
 
 	BUG_ON(!hctx->ccid2hctx_sendwait);
 	hctx->ccid2hctx_sendwait = 0;
-	hctx->ccid2hctx_pipe++;
+	ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1);
 	BUG_ON(hctx->ccid2hctx_pipe < 0);
 
 	/* There is an issue.  What if another packet is sent between
@@ -251,16 +298,24 @@
 	hctx->ccid2hctx_seqh->ccid2s_seq   = seq;
 	hctx->ccid2hctx_seqh->ccid2s_acked = 0;
 	hctx->ccid2hctx_seqh->ccid2s_sent  = jiffies;
-	hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next;
+
+	next = hctx->ccid2hctx_seqh->ccid2s_next;
+	/* check if we need to alloc more space */
+	if (next == hctx->ccid2hctx_seqt) {
+		int rc;
+
+		ccid2_pr_debug("allocating more space in history\n");
+		rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL);
+		BUG_ON(rc); /* XXX what do we do? */
+
+		next = hctx->ccid2hctx_seqh->ccid2s_next;
+		BUG_ON(next == hctx->ccid2hctx_seqt);
+	}
+	hctx->ccid2hctx_seqh = next;
 
 	ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
 		       hctx->ccid2hctx_pipe);
 
-	if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) {
-		/* XXX allocate more space */
-		WARN_ON(1);
-	}
-
 	hctx->ccid2hctx_sent++;
 
 	/* Ack Ratio.  Need to maintain a concept of how many windows we sent */
@@ -295,7 +350,7 @@
 	if (!timer_pending(&hctx->ccid2hctx_rtotimer))
 		ccid2_start_rto_timer(sk);
 
-#ifdef CCID2_DEBUG
+#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 	ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
 	ccid2_pr_debug("Sent: seq=%llu\n", seq);
 	do {
@@ -398,7 +453,7 @@
 			/* increase every 2 acks */
 			hctx->ccid2hctx_ssacks++;
 			if (hctx->ccid2hctx_ssacks == 2) {
-				ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+				ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1);
 				hctx->ccid2hctx_ssacks = 0;
 				*maxincr = *maxincr - 1;
 			}
@@ -411,26 +466,28 @@
 		hctx->ccid2hctx_acks++;
 
 		if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
-			ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+			ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1);
 			hctx->ccid2hctx_acks = 0;
 		}
 	}
 
 	/* update RTO */
 	if (hctx->ccid2hctx_srtt == -1 ||
-	    (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) {
-		unsigned long r = jiffies - seqp->ccid2s_sent;
+	    time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
+		unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
 		int s;
 
 		/* first measurement */
 		if (hctx->ccid2hctx_srtt == -1) {
 			ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
 			       	       r, jiffies, seqp->ccid2s_seq);
-			hctx->ccid2hctx_srtt = r;
+			ccid2_change_srtt(hctx, r);
 			hctx->ccid2hctx_rttvar = r >> 1;
 		} else {
 			/* RTTVAR */
 			long tmp = hctx->ccid2hctx_srtt - r;
+			long srtt;
+
 			if (tmp < 0)
 				tmp *= -1;
 
@@ -440,10 +497,12 @@
 			hctx->ccid2hctx_rttvar += tmp;
 
 			/* SRTT */
-			hctx->ccid2hctx_srtt *= 7;
-			hctx->ccid2hctx_srtt >>= 3;
+			srtt = hctx->ccid2hctx_srtt;
+			srtt *= 7;
+			srtt >>= 3;
 			tmp = r >> 3;
-			hctx->ccid2hctx_srtt += tmp;
+			srtt += tmp;
+			ccid2_change_srtt(hctx, srtt);
 		}
 		s = hctx->ccid2hctx_rttvar << 2;
 		/* clock granularity is 1 when based on jiffies */
@@ -479,13 +538,29 @@
 {
 	struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-	hctx->ccid2hctx_pipe--;
+	ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1);
 	BUG_ON(hctx->ccid2hctx_pipe < 0);
 
 	if (hctx->ccid2hctx_pipe == 0)
 		ccid2_hc_tx_kill_rto_timer(sk);
 }
 
+static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx,
+				   struct ccid2_seq *seqp)
+{
+	if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
+		ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
+		return;
+	}
+
+	hctx->ccid2hctx_last_cong = jiffies;
+
+	ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1);
+	hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
+	if (hctx->ccid2hctx_ssthresh < 2)
+		hctx->ccid2hctx_ssthresh = 2;
+}
+
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -496,7 +571,6 @@
 	unsigned char veclen;
 	int offset = 0;
 	int done = 0;
-	int loss = 0;
 	unsigned int maxincr = 0;
 
 	ccid2_hc_tx_check_sanity(hctx);
@@ -582,15 +656,16 @@
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = (*vector &
-						  DCCP_ACKVEC_STATE_MASK) >> 6;
+				const u8 state = *vector &
+						 DCCP_ACKVEC_STATE_MASK;
 
 				/* new packet received or marked */
 				if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
 				    !seqp->ccid2s_acked) {
 				    	if (state ==
 					    DCCP_ACKVEC_STATE_ECN_MARKED) {
-						loss = 1;
+					    	ccid2_congestion_event(hctx,
+								       seqp);
 					} else
 						ccid2_new_ack(sk, seqp,
 							      &maxincr);
@@ -642,7 +717,13 @@
 		/* check for lost packets */
 		while (1) {
 			if (!seqp->ccid2s_acked) {
-				loss = 1;
+				ccid2_pr_debug("Packet lost: %llu\n",
+					       seqp->ccid2s_seq);
+				/* XXX need to traverse from tail -> head in
+				 * order to detect multiple congestion events in
+				 * one ack vector.
+				 */
+				ccid2_congestion_event(hctx, seqp);
 				ccid2_hc_tx_dec_pipe(sk);
 			}
 			if (seqp == hctx->ccid2hctx_seqt)
@@ -661,53 +742,33 @@
 		hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
 	}
 
-	if (loss) {
-		/* XXX do bit shifts guarantee a 0 as the new bit? */
-		ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1);
-		hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
-		if (hctx->ccid2hctx_ssthresh < 2)
-			hctx->ccid2hctx_ssthresh = 2;
-	}
-
 	ccid2_hc_tx_check_sanity(hctx);
 }
 
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
         struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
-	int seqcount = ccid2_seq_len;
-	int i;
 
-	/* XXX init variables with proper values */
-	hctx->ccid2hctx_cwnd	  = 1;
-	hctx->ccid2hctx_ssthresh  = 10;
+	ccid2_change_cwnd(hctx, 1);
+	/* Initialize ssthresh to infinity.  This means that we will exit the
+	 * initial slow-start after the first packet loss.  This is what we
+	 * want.
+	 */
+	hctx->ccid2hctx_ssthresh  = ~0;
 	hctx->ccid2hctx_numdupack = 3;
+	hctx->ccid2hctx_seqbufc   = 0;
 
 	/* XXX init ~ to window size... */
-	hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) *
-					 seqcount, gfp_any());
-	if (hctx->ccid2hctx_seqbuf == NULL)
+	if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0)
 		return -ENOMEM;
 
-	for (i = 0; i < (seqcount - 1); i++) {
-		hctx->ccid2hctx_seqbuf[i].ccid2s_next =
-					&hctx->ccid2hctx_seqbuf[i + 1];
-		hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev =
-					&hctx->ccid2hctx_seqbuf[i];
-	}
-	hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next =
-					hctx->ccid2hctx_seqbuf;
-	hctx->ccid2hctx_seqbuf->ccid2s_prev =
-					&hctx->ccid2hctx_seqbuf[seqcount - 1];
-
-	hctx->ccid2hctx_seqh	 = hctx->ccid2hctx_seqbuf;
-	hctx->ccid2hctx_seqt	 = hctx->ccid2hctx_seqh;
 	hctx->ccid2hctx_sent	 = 0;
 	hctx->ccid2hctx_rto	 = 3 * HZ;
-	hctx->ccid2hctx_srtt	 = -1;
+	ccid2_change_srtt(hctx, -1);
 	hctx->ccid2hctx_rttvar	 = -1;
 	hctx->ccid2hctx_lastrtt  = 0;
 	hctx->ccid2hctx_rpdupack = -1;
+	hctx->ccid2hctx_last_cong = jiffies;
 
 	hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
 	hctx->ccid2hctx_rtotimer.data	  = (unsigned long)sk;
@@ -720,10 +781,13 @@
 static void ccid2_hc_tx_exit(struct sock *sk)
 {
         struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+	int i;
 
 	ccid2_hc_tx_kill_rto_timer(sk);
-	kfree(hctx->ccid2hctx_seqbuf);
-	hctx->ccid2hctx_seqbuf = NULL;
+
+	for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
+		kfree(hctx->ccid2hctx_seqbuf[i]);
+	hctx->ccid2hctx_seqbufc = 0;
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 451a874..5b2ef4a 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -35,6 +35,9 @@
 	struct ccid2_seq	*ccid2s_next;
 };
 
+#define CCID2_SEQBUF_LEN 256
+#define CCID2_SEQBUF_MAX 128
+
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
  * @ccid2hctx_ssacks - ACKs recv in slow start
@@ -50,10 +53,11 @@
 	int			ccid2hctx_cwnd;
 	int			ccid2hctx_ssacks;
 	int			ccid2hctx_acks;
-	int			ccid2hctx_ssthresh;
+	unsigned int		ccid2hctx_ssthresh;
 	int			ccid2hctx_pipe;
 	int			ccid2hctx_numdupack;
-	struct ccid2_seq	*ccid2hctx_seqbuf;
+	struct ccid2_seq	*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
+	int			ccid2hctx_seqbufc;
 	struct ccid2_seq	*ccid2hctx_seqh;
 	struct ccid2_seq	*ccid2hctx_seqt;
 	long			ccid2hctx_rto;
@@ -67,6 +71,7 @@
 	u64			ccid2hctx_rpseq;
 	int			ccid2hctx_rpdupack;
 	int			ccid2hctx_sendwait;
+	unsigned long		ccid2hctx_last_cong;
 };
 
 struct ccid2_hc_rx_sock {
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 090bc39..195aa95 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -900,7 +900,7 @@
 static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	struct dccp_li_hist_entry *next, *head;
+	struct dccp_li_hist_entry *head;
 	u64 seq_temp;
 
 	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
@@ -908,15 +908,15 @@
 		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
 			return;
 
-		next = (struct dccp_li_hist_entry *)
-		   hcrx->ccid3hcrx_li_hist.next;
-		next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+		   struct dccp_li_hist_entry, dccplih_node);
+		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
 	} else {
 		struct dccp_li_hist_entry *entry;
 		struct list_head *tail;
 
-		head = (struct dccp_li_hist_entry *)
-		   hcrx->ccid3hcrx_li_hist.next;
+		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+		   struct dccp_li_hist_entry, dccplih_node);
 		/* FIXME win count check removed as was wrong */
 		/* should make this check with receive history */
 		/* and compare there as per section 10.2 of RFC4342 */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a5c5475..0a21be4 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -130,7 +130,7 @@
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
-extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo);
+extern void dccp_write_xmit(struct sock *sk, int block);
 extern void dccp_write_space(struct sock *sk);
 
 extern void dccp_init_xmit_timers(struct sock *sk);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index b44c4550..cee553d 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -27,5 +27,10 @@
 extern int  dccp_feat_init(struct dccp_minisock *dmsk);
 
 extern int  dccp_feat_default_sequence_window;
+extern int  dccp_feat_default_rx_ccid;
+extern int  dccp_feat_default_tx_ccid;
+extern int  dccp_feat_default_ack_ratio;
+extern int  dccp_feat_default_send_ack_vector;
+extern int  dccp_feat_default_send_ndp_count;
 
 #endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 7f56f7e..9a1a76a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -501,6 +501,9 @@
 
 	dccp_openreq_init(req, &dp, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
@@ -605,10 +608,10 @@
 	if (req != NULL)
 		return dccp_check_req(sk, skb, req, prev);
 
-	nsk = __inet_lookup_established(&dccp_hashinfo,
-					iph->saddr, dh->dccph_sport,
-					iph->daddr, ntohs(dh->dccph_dport),
-					inet_iif(skb));
+	nsk = inet_lookup_established(&dccp_hashinfo,
+				      iph->saddr, dh->dccph_sport,
+				      iph->daddr, dh->dccph_dport,
+				      inet_iif(skb));
 	if (nsk != NULL) {
 		if (nsk->sk_state != DCCP_TIME_WAIT) {
 			bh_lock_sock(nsk);
@@ -678,6 +681,7 @@
 			   	     }
 			  };
 
+	security_skb_classify_flow(skb, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
@@ -921,7 +925,7 @@
 	 * 	Look up flow ID in table and get corresponding socket */
 	sk = __inet_lookup(&dccp_hashinfo,
 			   skb->nh.iph->saddr, dh->dccph_sport,
-			   skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+			   skb->nh.iph->daddr, dh->dccph_dport,
 			   inet_iif(skb));
 
 	/* 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 610c722..7a47399 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -201,6 +201,7 @@
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->sport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt != NULL && np->opt->srcrt != NULL) {
 		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -230,7 +231,7 @@
 	ipv6_addr_copy(&np->saddr, saddr);
 	inet->rcv_saddr = LOOPBACK4_IPV6;
 
-	__ip6_dst_store(sk, dst, NULL);
+	__ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt != NULL)
@@ -322,6 +323,7 @@
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_sk_classify_flow(sk, &fl);
 
 			err = ip6_dst_lookup(sk, &dst, &fl);
 			if (err) {
@@ -422,6 +424,7 @@
 	fl.oif = ireq6->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_req_classify_flow(req, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -566,6 +569,7 @@
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_skb_classify_flow(rxskb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
@@ -622,6 +626,7 @@
 	fl.oif = inet6_iif(rxskb);
 	fl.fl_ip_dport = dh->dccph_dport;
 	fl.fl_ip_sport = dh->dccph_sport;
+	security_req_classify_flow(req, &fl);
 
 	if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
 		if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
@@ -704,6 +709,9 @@
 
 	dccp_openreq_init(req, &dp, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq6 = inet6_rsk(req);
 	ireq = inet_rsk(req);
 	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
@@ -842,6 +850,7 @@
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
@@ -863,7 +872,7 @@
 	 * comment in that function for the gory details. -acme
 	 */
 
-	__ip6_dst_store(newsk, dst, NULL);
+	__ip6_dst_store(newsk, dst, NULL, NULL);
 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
 						      NETIF_F_TSO);
 	newdp6 = (struct dccp6_sock *)newsk;
@@ -961,7 +970,7 @@
 	if (skb->protocol == htons(ETH_P_IP))
 		return dccp_v4_do_rcv(sk, skb);
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard;
 
 	/*
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 58669be..7102e3a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -198,7 +198,7 @@
 	while (1) {
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
-		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		if (sk->sk_err)
 			goto do_error;
 		if (!*timeo)
 			goto do_nonblock;
@@ -234,37 +234,72 @@
 	goto out;
 }
 
-int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
+static void dccp_write_xmit_timer(unsigned long data) {
+	struct sock *sk = (struct sock *)data;
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk))
+		sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+	else
+		dccp_write_xmit(sk, 0);
+	bh_unlock_sock(sk);
+	sock_put(sk);
+}
+
+void dccp_write_xmit(struct sock *sk, int block)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct sk_buff *skb;
+	long timeo = 30000; 	/* If a packet is taking longer than 2 secs
+				   we have other issues */
+
+	while ((skb = skb_peek(&sk->sk_write_queue))) {
+		int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
 					 skb->len);
 
-	if (err > 0)
-		err = dccp_wait_for_ccid(sk, skb, timeo);
+		if (err > 0) {
+			if (!block) {
+				sk_reset_timer(sk, &dp->dccps_xmit_timer,
+						msecs_to_jiffies(err)+jiffies);
+				break;
+			} else
+				err = dccp_wait_for_ccid(sk, skb, &timeo);
+			if (err) {
+				printk(KERN_CRIT "%s:err at dccp_wait_for_ccid"
+						 " %d\n", __FUNCTION__, err);
+				dump_stack();
+			}
+		}
 
-	if (err == 0) {
-		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-		const int len = skb->len;
+		skb_dequeue(&sk->sk_write_queue);
+		if (err == 0) {
+			struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+			const int len = skb->len;
 
-		if (sk->sk_state == DCCP_PARTOPEN) {
-			/* See 8.1.5.  Handshake Completion */
-			inet_csk_schedule_ack(sk);
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+			if (sk->sk_state == DCCP_PARTOPEN) {
+				/* See 8.1.5.  Handshake Completion */
+				inet_csk_schedule_ack(sk);
+				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  inet_csk(sk)->icsk_rto,
 						  DCCP_RTO_MAX);
-			dcb->dccpd_type = DCCP_PKT_DATAACK;
-		} else if (dccp_ack_pending(sk))
-			dcb->dccpd_type = DCCP_PKT_DATAACK;
-		else
-			dcb->dccpd_type = DCCP_PKT_DATA;
+				dcb->dccpd_type = DCCP_PKT_DATAACK;
+			} else if (dccp_ack_pending(sk))
+				dcb->dccpd_type = DCCP_PKT_DATAACK;
+			else
+				dcb->dccpd_type = DCCP_PKT_DATA;
 
-		err = dccp_transmit_skb(sk, skb);
-		ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
-	} else
-		kfree_skb(skb);
-
-	return err;
+			err = dccp_transmit_skb(sk, skb);
+			ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+			if (err) {
+				printk(KERN_CRIT "%s:err from "
+					         "ccid_hc_tx_packet_sent %d\n",
+					         __FUNCTION__, err);
+				dump_stack();
+			}
+		} else
+			kfree(skb);
+	}
 }
 
 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -426,6 +461,9 @@
 	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
 
 	icsk->icsk_retransmits = 0;
+	init_timer(&dp->dccps_xmit_timer);
+	dp->dccps_xmit_timer.data = (unsigned long)sk;
+	dp->dccps_xmit_timer.function = dccp_write_xmit_timer;
 }
 
 int dccp_connect(struct sock *sk)
@@ -560,8 +598,10 @@
 					DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
 
 	if (active) {
+		dccp_write_xmit(sk, 1);
 		dccp_skb_entail(sk, skb);
 		dccp_transmit_skb(sk, skb_clone(skb, prio));
+		/* FIXME do we need a retransmit timer here? */
 	} else
 		dccp_transmit_skb(sk, skb);
 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6f14bb5..962df0e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -662,17 +662,8 @@
 	if (rc != 0)
 		goto out_discard;
 
-	rc = dccp_write_xmit(sk, skb, &timeo);
-	/*
-	 * XXX we don't use sk_write_queue, so just discard the packet.
-	 *     Current plan however is to _use_ sk_write_queue with
-	 *     an algorith similar to tcp_sendmsg, where the main difference
-	 *     is that in DCCP we have to respect packet boundaries, so
-	 *     no coalescing of skbs.
-	 *
-	 *     This bug was _quickly_ found & fixed by just looking at an OSTRA
-	 *     generated callgraph 8) -acme
-	 */
+	skb_queue_tail(&sk->sk_write_queue, skb);
+	dccp_write_xmit(sk,0);
 out_release:
 	release_sock(sk);
 	return rc ? : len;
@@ -846,6 +837,7 @@
 
 void dccp_close(struct sock *sk, long timeout)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 	int state;
 
@@ -862,6 +854,8 @@
 		goto adjudge_to_death;
 	}
 
+	sk_stop_timer(sk, &dp->dccps_xmit_timer);
+
 	/*
 	 * We need to flush the recv. buffs.  We do this only on the
 	 * descriptor close, not protocol-sourced closes, because the
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index c1ba945..38bc157 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -11,18 +11,12 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include "feat.h"
 
 #ifndef CONFIG_SYSCTL
 #error This file should not be compiled without CONFIG_SYSCTL defined
 #endif
 
-extern int dccp_feat_default_sequence_window;
-extern int dccp_feat_default_rx_ccid;
-extern int dccp_feat_default_tx_ccid;
-extern int dccp_feat_default_ack_ratio;
-extern int dccp_feat_default_send_ack_vector;
-extern int dccp_feat_default_send_ndp_count;
-
 static struct ctl_table dccp_default_table[] = {
 	{
 		.ctl_name	= NET_DCCP_DEFAULT_SEQ_WINDOW,
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 92f2ec4..36e72cb 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -27,6 +27,7 @@
 config DECNET_ROUTER
 	bool "DECnet: router support (EXPERIMENTAL)"
 	depends on DECNET && EXPERIMENTAL
+	select FIB_RULES
 	---help---
 	  Add support for turning your DECnet Endnode into a level 1 or 2
 	  router.  This is an experimental, but functional option.  If you
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 5486247..70e0273 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -130,6 +130,7 @@
 #include <linux/poll.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_nsp.h>
 #include <net/dn_dev.h>
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 476455f..01861fe 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -34,6 +34,7 @@
 #include <linux/seq_file.h>
 #include <linux/timer.h>
 #include <linux/string.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/skbuff.h>
@@ -45,6 +46,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_dev.h>
 #include <net/dn_route.h>
@@ -744,20 +746,23 @@
 static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+	int payload = sizeof(struct ifaddrmsg) + 128;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS);
-		return;
-	}
-	if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
 }
 
 static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1417,8 +1422,6 @@
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_delroute,	},
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
 				      .dumpit	= dn_fib_dump,		},
-	[RTM_NEWRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_newrule,	},
-	[RTM_DELRULE  - RTM_BASE] = { .doit	= dn_fib_rtm_delrule,	},
 	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= dn_fib_dump_rules,	},
 #else
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index fa20e2e..1cf0101 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -34,6 +34,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_route.h>
 #include <net/dn_fib.h>
@@ -54,11 +55,9 @@
 
 #define endfor_nexthops(fi) }
 
-extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
 static DEFINE_SPINLOCK(dn_fib_multipath_lock);
 static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_RWLOCK(dn_fib_info_lock);
+static DEFINE_SPINLOCK(dn_fib_info_lock);
 
 static struct
 {
@@ -79,6 +78,9 @@
 	[RTN_XRESOLVE] =    { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
 };
 
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
+static int dn_fib_sync_up(struct net_device *dev);
+
 void dn_fib_free_info(struct dn_fib_info *fi)
 {
 	if (fi->fib_dead == 0) {
@@ -96,7 +98,7 @@
 
 void dn_fib_release_info(struct dn_fib_info *fi)
 {
-	write_lock(&dn_fib_info_lock);
+	spin_lock(&dn_fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		if (fi->fib_next)
 			fi->fib_next->fib_prev = fi->fib_prev;
@@ -107,7 +109,7 @@
 		fi->fib_dead = 1;
 		dn_fib_info_put(fi);
 	}
-	write_unlock(&dn_fib_info_lock);
+	spin_unlock(&dn_fib_info_lock);
 }
 
 static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
@@ -378,13 +380,13 @@
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock(&dn_fib_info_lock);
+	spin_lock(&dn_fib_info_lock);
 	fi->fib_next = dn_fib_info_list;
 	fi->fib_prev = NULL;
 	if (dn_fib_info_list)
 		dn_fib_info_list->fib_prev = fi;
 	dn_fib_info_list = fi;
-	write_unlock(&dn_fib_info_lock);
+	spin_unlock(&dn_fib_info_lock);
 	return fi;
 
 err_inval:
@@ -490,7 +492,8 @@
 		if (attr) {
 			if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
 				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS)
+			if (i != RTA_MULTIPATH && i != RTA_METRICS &&
+			    i != RTA_TABLE)
 				rta[i-1] = (struct rtattr *)RTA_DATA(attr);
 		}
 	}
@@ -507,7 +510,7 @@
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = dn_fib_get_table(r->rtm_table, 0);
+	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0);
 	if (tb)
 		return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
 
@@ -523,46 +526,13 @@
 	if (dn_fib_check_attr(r, rta))
 		return -EINVAL;
 
-	tb = dn_fib_get_table(r->rtm_table, 1);
+	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1);
 	if (tb) 
 		return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb));
 
 	return -ENOBUFS;
 }
 
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	int t;
-	int s_t;
-	struct dn_fib_table *tb;
-
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
-		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
-			return dn_cache_dump(skb, cb);
-
-	s_t = cb->args[0];
-	if (s_t == 0)
-		s_t = cb->args[0] = RT_MIN_TABLE;
-
-	for(t = s_t; t <= RT_TABLE_MAX; t++) {
-		if (t < s_t)
-			continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0,
-			       sizeof(cb->args) - sizeof(cb->args[0]));
-		tb = dn_fib_get_table(t, 0);
-		if (tb == NULL)
-			continue;
-		if (tb->dump(tb, skb, cb) < 0)
-			break;
-	}
-
-	cb->args[0] = t;
-
-	return skb->len;
-}
-
 static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
 {
 	struct dn_fib_table *tb;
@@ -682,7 +652,7 @@
 	return NOTIFY_DONE;
 }
 
-int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
+static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
 {
         int ret = 0;
         int scope = RT_SCOPE_NOWHERE;
@@ -726,7 +696,7 @@
 }
 
 
-int dn_fib_sync_up(struct net_device *dev)
+static int dn_fib_sync_up(struct net_device *dev)
 {
         int ret = 0;
 
@@ -760,22 +730,6 @@
         return ret;
 }
 
-void dn_fib_flush(void)
-{
-        int flushed = 0;
-        struct dn_fib_table *tb;
-        int id;
-
-        for(id = RT_TABLE_MAX; id > 0; id--) {
-                if ((tb = dn_fib_get_table(id, 0)) == NULL)
-                        continue;
-                flushed += tb->flush(tb);
-        }
-
-        if (flushed)
-                dn_rt_cache_flush(-1);
-}
-
 static struct notifier_block dn_fib_dnaddr_notifier = {
 	.notifier_call = dn_fib_dnaddr_event,
 };
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 86f7f3b..72ecc6e 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -586,7 +586,7 @@
         	goto out;
         }
 
-	err = sk_filter(sk, skb, 0);
+	err = sk_filter(sk, skb);
 	if (err)
 		goto out;
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 743e9fc..dd0761e 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -80,6 +80,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_dev.h>
 #include <net/dn_nsp.h>
@@ -1284,7 +1285,7 @@
 		dev_hold(out_dev);
 
 		if (res.r)
-			src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags);
+			src_map = fl.fld_src; /* no NAT support for now */
 
 		gateway = DN_FIB_RES_GW(res);
 		if (res.type == RTN_NAT) {
@@ -1485,6 +1486,7 @@
 	r->rtm_src_len = 0;
 	r->rtm_tos = 0;
 	r->rtm_table = RT_TABLE_MAIN;
+	RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type = rt->rt_type;
 	r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	r->rtm_scope = RT_SCOPE_UNIVERSE;
@@ -1609,9 +1611,7 @@
 		goto out_free;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-
-	return err;
+	return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 
 out_free:
 	kfree_skb(skb);
@@ -1781,14 +1781,9 @@
 {
 	int i, goal, order;
 
-	dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache",
-						   sizeof(struct dn_route),
-						   0, SLAB_HWCACHE_ALIGN,
-						   NULL, NULL);
-
-	if (!dn_dst_ops.kmem_cachep)
-		panic("DECnet: Failed to allocate dn_dst_cache\n");
-
+	dn_dst_ops.kmem_cachep =
+		kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 	init_timer(&dn_route_timer);
 	dn_route_timer.function = dn_dst_check_expire;
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 6986be7..3e0c882 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -11,259 +11,213 @@
  *
  *
  * Changes:
+ *              Steve Whitehouse <steve@chygwyn.com>
+ *              Updated for Thomas Graf's generic rules
  *
  */
-#include <linux/string.h>
 #include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
 #include <linux/init.h>
-#include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
 #include <linux/netdevice.h>
-#include <linux/timer.h>
 #include <linux/spinlock.h>
-#include <linux/in_route.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
 
+static struct fib_rules_ops dn_fib_rules_ops;
+
 struct dn_fib_rule
 {
-	struct hlist_node	r_hlist;
-	atomic_t		r_clntref;
-	u32			r_preference;
-	unsigned char		r_table;
-	unsigned char		r_action;
-	unsigned char		r_dst_len;
-	unsigned char		r_src_len;
-	__le16			r_src;
-	__le16			r_srcmask;
-	__le16			r_dst;
-	__le16			r_dstmask;
-	__le16			r_srcmap;
-	u8			r_flags;
+	struct fib_rule		common;
+	unsigned char		dst_len;
+	unsigned char		src_len;
+	__le16			src;
+	__le16			srcmask;
+	__le16			dst;
+	__le16			dstmask;
+	__le16			srcmap;
+	u8			flags;
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	u32			r_fwmark;
+	u32			fwmark;
+	u32			fwmask;
 #endif
-	int			r_ifindex;
-	char			r_ifname[IFNAMSIZ];
-	int			r_dead;
-	struct rcu_head		rcu;
 };
 
 static struct dn_fib_rule default_rule = {
-	.r_clntref =		ATOMIC_INIT(2),
-	.r_preference =		0x7fff,
-	.r_table =		RT_TABLE_MAIN,
-	.r_action =		RTN_UNICAST
+	.common = {
+		.refcnt =		ATOMIC_INIT(2),
+		.pref =			0x7fff,
+		.table =		RT_TABLE_MAIN,
+		.action =		FR_ACT_TO_TBL,
+	},
 };
 
-static struct hlist_head dn_fib_rules;
+static LIST_HEAD(dn_fib_rules);
 
-int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+
+int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct dn_fib_rule *r;
-	struct hlist_node *node;
-	int err = -ESRCH;
+	struct fib_lookup_arg arg = {
+		.result = res,
+	};
+	int err;
 
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) &&
-			rtm->rtm_src_len == r->r_src_len &&
-			rtm->rtm_dst_len == r->r_dst_len &&
-			(!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) &&
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-			(!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
-			(!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
-			(!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
-			(!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
-			(!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-
-			err = -EPERM;
-			if (r == &default_rule)
-				break;
-
-			hlist_del_rcu(&r->r_hlist);
-			r->r_dead = 1;
-			dn_fib_rule_put(r);
-			err = 0;
-			break;
-		}
-	}
+	err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg);
+	res->r = arg.rule;
 
 	return err;
 }
 
-static inline void dn_fib_rule_put_rcu(struct rcu_head *head)
+static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
+			      int flags, struct fib_lookup_arg *arg)
 {
-	struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu);
-	kfree(r);
-}
+	int err = -EAGAIN;
+	struct dn_fib_table *tbl;
 
-void dn_fib_rule_put(struct dn_fib_rule *r)
-{
-	if (atomic_dec_and_test(&r->r_clntref)) {
-		if (r->r_dead)
-			call_rcu(&r->rcu, dn_fib_rule_put_rcu);
-		else
-			printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n");
-	}
-}
+	switch(rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
 
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		goto errout;
 
-int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
-{
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct dn_fib_rule *r, *new_r, *last = NULL;
-	struct hlist_node *node = NULL;
-	unsigned char table_id;
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		goto errout;
 
-	if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16)
-		return -EINVAL;
-
-	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
-		return -EINVAL;
-
-	if (rtm->rtm_type == RTN_NAT)
-		return -EINVAL;
-
-	table_id = rtm->rtm_table;
-	if (table_id == RT_TABLE_UNSPEC) {
-		struct dn_fib_table *tb;
-		if (rtm->rtm_type == RTN_UNICAST) {
-			if ((tb = dn_fib_empty_table()) == NULL)
-				return -ENOBUFS;
-			table_id = tb->n;
-		}
+	case FR_ACT_BLACKHOLE:
+	default:
+		err = -EINVAL;
+		goto errout;
 	}
 
-	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
-	if (!new_r)
-		return -ENOMEM;
+	tbl = dn_fib_get_table(rule->table, 0);
+	if (tbl == NULL)
+		goto errout;
 
-	if (rta[RTA_SRC-1])
-		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2);
-	if (rta[RTA_DST-1])
-		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2);
-	if (rta[RTA_GATEWAY-1])
-		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2);
-	new_r->r_src_len = rtm->rtm_src_len;
-	new_r->r_dst_len = rtm->rtm_dst_len;
-	new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len);
-	new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len);
+	err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result);
+	if (err > 0)
+		err = -EAGAIN;
+errout:
+	return err;
+}
+
+static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .type = NLA_U16 },
+	[FRA_DST]	= { .type = NLA_U16 },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FWMASK]	= { .type = NLA_U32 },
+	[FRA_TABLE]     = { .type = NLA_U32 },
+};
+
+static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+	u16 daddr = fl->fld_dst;
+	u16 saddr = fl->fld_src;
+
+	if (((saddr ^ r->src) & r->srcmask) ||
+	    ((daddr ^ r->dst) & r->dstmask))
+		return 0;
+
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (rta[RTA_PROTOINFO-1])
-		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
+	if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask)
+		return 0;
 #endif
-	new_r->r_action = rtm->rtm_type;
-	new_r->r_flags = rtm->rtm_flags;
-	if (rta[RTA_PRIORITY-1])
-		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	new_r->r_table = table_id;
-	if (rta[RTA_IIF-1]) {
-		struct net_device *dev;
-		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
-		new_r->r_ifindex = -1;
-		dev = dev_get_by_name(new_r->r_ifname);
-		if (dev) {
-			new_r->r_ifindex = dev->ifindex;
-			dev_put(dev);
-		}
-	}
 
-	r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist);
-	if (!new_r->r_preference) {
-		if (r && r->r_hlist.next != NULL) {
-			r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist);
-			if (r->r_preference)
-				new_r->r_preference = r->r_preference - 1;
-		}
-	}
-
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_preference > new_r->r_preference)
-			break;
-		last = r;
-	}
-	atomic_inc(&new_r->r_clntref);
-
-	if (last)
-		hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist);
-	else
-		hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist);
-	return 0;
+	return 1;
 }
 
-
-int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res)
+static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+				 struct nlattr **tb)
 {
-	struct dn_fib_rule *r, *policy;
-	struct dn_fib_table *tb;
-	__le16 saddr = flp->fld_src;
-	__le16 daddr = flp->fld_dst;
-	struct hlist_node *node;
-	int err;
+	int err = -EINVAL;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	rcu_read_lock();
+	if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
+		goto  errout;
 
-	hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) {
-		if (((saddr^r->r_src) & r->r_srcmask) ||
-		    ((daddr^r->r_dst) & r->r_dstmask) ||
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) ||
-#endif
-		    (r->r_ifindex && r->r_ifindex != flp->iif))
-			continue;
+	if (rule->table == RT_TABLE_UNSPEC) {
+		if (rule->action == FR_ACT_TO_TBL) {
+			struct dn_fib_table *table;
 
-		switch(r->r_action) {
-			case RTN_UNICAST:
-			case RTN_NAT:
-				policy = r;
-				break;
-			case RTN_UNREACHABLE:
-				rcu_read_unlock();
-				return -ENETUNREACH;
-			default:
-			case RTN_BLACKHOLE:
-				rcu_read_unlock();
-				return -EINVAL;
-			case RTN_PROHIBIT:
-				rcu_read_unlock();
-				return -EACCES;
-		}
+			table = dn_fib_empty_table();
+			if (table == NULL) {
+				err = -ENOBUFS;
+				goto errout;
+			}
 
-		if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL)
-			continue;
-		err = tb->lookup(tb, flp, res);
-		if (err == 0) {
-			res->r = policy;
-			if (policy)
-				atomic_inc(&policy->r_clntref);
-			rcu_read_unlock();
-			return 0;
-		}
-		if (err < 0 && err != -EAGAIN) {
-			rcu_read_unlock();
-			return err;
+			rule->table = table->n;
 		}
 	}
 
-	rcu_read_unlock();
-	return -ESRCH;
+	if (tb[FRA_SRC])
+		r->src = nla_get_u16(tb[FRA_SRC]);
+
+	if (tb[FRA_DST])
+		r->dst = nla_get_u16(tb[FRA_DST]);
+
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (tb[FRA_FWMARK]) {
+		r->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+		if (r->fwmark)
+			/* compatibility: if the mark value is non-zero all bits
+			 * are compared unless a mask is explicitly specified.
+			 */
+			r->fwmask = 0xFFFFFFFF;
+	}
+
+	if (tb[FRA_FWMASK])
+		r->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+
+	r->src_len = frh->src_len;
+	r->srcmask = dnet_make_mask(r->src_len);
+	r->dst_len = frh->dst_len;
+	r->dstmask = dnet_make_mask(r->dst_len);
+	err = 0;
+errout:
+	return err;
+}
+
+static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
+{
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
+
+	if (frh->src_len && (r->src_len != frh->src_len))
+		return 0;
+
+	if (frh->dst_len && (r->dst_len != frh->dst_len))
+		return 0;
+
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
+
+	if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+		return 0;
+#endif
+
+	if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC])))
+		return 0;
+
+	if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST])))
+		return 0;
+
+	return 1;
 }
 
 unsigned dnet_addr_type(__le16 addr)
@@ -271,7 +225,7 @@
 	struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
 	struct dn_fib_res res;
 	unsigned ret = RTN_UNICAST;
-	struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL];
+	struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
 
 	res.r = NULL;
 
@@ -284,142 +238,79 @@
 	return ret;
 }
 
-__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags)
+static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			    struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
 {
-	struct dn_fib_rule *r = res->r;
+	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	if (r->r_action == RTN_NAT) {
-		int addrtype = dnet_addr_type(r->r_srcmap);
+	frh->family = AF_DECnet;
+	frh->dst_len = r->dst_len;
+	frh->src_len = r->src_len;
+	frh->tos = 0;
 
-		if (addrtype == RTN_NAT) {
-			saddr = (saddr&~r->r_srcmask)|r->r_srcmap;
-			*flags |= RTCF_SNAT;
-		} else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) {
-			saddr = r->r_srcmap;
-			*flags |= RTCF_MASQ;
+#ifdef CONFIG_DECNET_ROUTE_FWMARK
+	if (r->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark);
+	if (r->fwmask || r->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask);
+#endif
+	if (r->dst_len)
+		NLA_PUT_U16(skb, FRA_DST, r->dst);
+	if (r->src_len)
+		NLA_PUT_U16(skb, FRA_SRC, r->src);
+
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+static u32 dn_fib_rule_default_pref(void)
+{
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&dn_fib_rules)) {
+		pos = dn_fib_rules.next;
+		if (pos->next != &dn_fib_rules) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
 		}
 	}
-	return saddr;
-}
 
-static void dn_fib_rules_detach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct dn_fib_rule *r;
-
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_ifindex == dev->ifindex)
-			r->r_ifindex = -1;
-	}
-}
-
-static void dn_fib_rules_attach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct dn_fib_rule *r;
-
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
-			r->r_ifindex = dev->ifindex;
-	}
-}
-
-static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-
-	switch(event) {
-		case NETDEV_UNREGISTER:
-			dn_fib_rules_detach(dev);
-			dn_fib_sync_down(0, dev, 1);
-		case NETDEV_REGISTER:
-			dn_fib_rules_attach(dev);
-			dn_fib_sync_up(dev);
-	}
-
-	return NOTIFY_DONE;
-}
-
-
-static struct notifier_block dn_fib_rules_notifier = {
-	.notifier_call =	dn_fib_rules_event,
-};
-
-static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
-			    struct netlink_callback *cb, unsigned int flags)
-{
-	struct rtmsg *rtm;
-	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
-
-
-	nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
-	rtm->rtm_family = AF_DECnet;
-	rtm->rtm_dst_len = r->r_dst_len;
-	rtm->rtm_src_len = r->r_src_len;
-	rtm->rtm_tos = 0;
-#ifdef CONFIG_DECNET_ROUTE_FWMARK
-	if (r->r_fwmark)
-		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
-#endif
-	rtm->rtm_table = r->r_table;
-	rtm->rtm_protocol = 0;
-	rtm->rtm_scope = 0;
-	rtm->rtm_type = r->r_action;
-	rtm->rtm_flags = r->r_flags;
-
-	if (r->r_dst_len)
-		RTA_PUT(skb, RTA_DST, 2, &r->r_dst);
-	if (r->r_src_len)
-		RTA_PUT(skb, RTA_SRC, 2, &r->r_src);
-	if (r->r_ifname[0])
-		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
-	if (r->r_preference)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
-	if (r->r_srcmap)
-		RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return 0;
 }
 
 int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int idx = 0;
-	int s_idx = cb->args[0];
-	struct dn_fib_rule *r;
-	struct hlist_node *node;
-
-	rcu_read_lock();
-	hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
-		if (idx < s_idx)
-			goto next;
-		if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
-			break;
-next:
-		idx++;
-	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
-
-	return skb->len;
+	return fib_rules_dump(skb, cb, AF_DECnet);
 }
 
+static struct fib_rules_ops dn_fib_rules_ops = {
+	.family		= AF_DECnet,
+	.rule_size	= sizeof(struct dn_fib_rule),
+	.action		= dn_fib_rule_action,
+	.match		= dn_fib_rule_match,
+	.configure	= dn_fib_rule_configure,
+	.compare	= dn_fib_rule_compare,
+	.fill		= dn_fib_rule_fill,
+	.default_pref	= dn_fib_rule_default_pref,
+	.nlgroup	= RTNLGRP_DECnet_RULE,
+	.policy		= dn_fib_rule_policy,
+	.rules_list	= &dn_fib_rules,
+	.owner		= THIS_MODULE,
+};
+
 void __init dn_fib_rules_init(void)
 {
-	INIT_HLIST_HEAD(&dn_fib_rules);
-	hlist_add_head(&default_rule.r_hlist, &dn_fib_rules);
-	register_netdevice_notifier(&dn_fib_rules_notifier);
+	list_add_tail(&default_rule.common.list, &dn_fib_rules);
+	fib_rules_register(&dn_fib_rules_ops);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
-	unregister_netdevice_notifier(&dn_fib_rules_notifier);
+	fib_rules_unregister(&dn_fib_rules_ops);
 }
 
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index e926c95..317904b 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -30,6 +30,7 @@
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/fib_rules.h>
 #include <net/dn.h>
 #include <net/dn_route.h>
 #include <net/dn_fib.h>
@@ -74,9 +75,9 @@
 for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
 
 #define RT_TABLE_MIN 1
-
+#define DN_FIB_TABLE_HASHSZ 256
+static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
 static DEFINE_RWLOCK(dn_fib_tables_lock);
-struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1];
 
 static kmem_cache_t *dn_hash_kmem __read_mostly;
 static int dn_fib_hash_zombies;
@@ -263,7 +264,7 @@
 }
 
 static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-                        u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
+                        u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
                         struct dn_fib_info *fi, unsigned int flags)
 {
         struct rtmsg *rtm;
@@ -277,6 +278,7 @@
         rtm->rtm_src_len = 0;
         rtm->rtm_tos = 0;
         rtm->rtm_table = tb_id;
+	RTA_PUT_U32(skb, RTA_TABLE, tb_id);
         rtm->rtm_flags = fi->fib_flags;
         rtm->rtm_scope = scope;
 	rtm->rtm_type  = type;
@@ -326,29 +328,29 @@
 }
 
 
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
+static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
                         struct nlmsghdr *nlh, struct netlink_skb_parms *req)
 {
         struct sk_buff *skb;
         u32 pid = req ? req->pid : 0;
-        int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256);
+	int err = -ENOBUFS;
 
-        skb = alloc_skb(size, GFP_KERNEL);
-        if (!skb)
-                return;
+        skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+        if (skb == NULL)
+		goto errout;
 
-        if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 
-                                f->fn_type, f->fn_scope, &f->fn_key, z, 
-                                DN_FIB_INFO(f), 0) < 0) {
+        err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
+			       f->fn_type, f->fn_scope, &f->fn_key, z,
+			       DN_FIB_INFO(f), 0);
+	if (err < 0) {
                 kfree_skb(skb);
-                return;
+		goto errout;
         }
-        NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE;
-        if (nlh->nlmsg_flags & NLM_F_ECHO)
-                atomic_inc(&skb->users);
-        netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL);
-        if (nlh->nlmsg_flags & NLM_F_ECHO)
-                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+	err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
 }
 
 static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, 
@@ -359,7 +361,7 @@
 {
 	int i, s_i;
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	for(i = 0; f; i++, f = f->fn_next) {
 		if (i < s_i)
 			continue;
@@ -372,11 +374,11 @@
 				(f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
 				f->fn_scope, &f->fn_key, dz->dz_order, 
 				f->fn_info, NLM_F_MULTI) < 0) {
-			cb->args[3] = i;
+			cb->args[4] = i;
 			return -1;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -387,20 +389,20 @@
 {
 	int h, s_h;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 	for(h = 0; h < dz->dz_divisor; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
 		if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
 			continue;
 		if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -411,26 +413,63 @@
 	struct dn_zone *dz;
 	struct dn_hash *table = (struct dn_hash *)tb->data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 	read_lock(&dn_fib_tables_lock);
 	for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
 		if (m < s_m)
 			continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
 		if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			read_unlock(&dn_fib_tables_lock);
 			return -1;
 		}
 	}
 	read_unlock(&dn_fib_tables_lock);
-	cb->args[1] = m;
+	cb->args[2] = m;
 
         return skb->len;
 }
 
+int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct dn_fib_table *tb;
+	struct hlist_node *node;
+	int dumped = 0;
+
+	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+			return dn_cache_dump(skb, cb);
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
+			if (e < s_e)
+				goto next;
+			if (dumped)
+				memset(&cb->args[2], 0, sizeof(cb->args) -
+				                 2 * sizeof(cb->args[0]));
+			if (tb->dump(tb, skb, cb) < 0)
+				goto out;
+			dumped = 1;
+next:
+			e++;
+		}
+	}
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	return skb->len;
+}
+
 static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req)
 {
 	struct dn_hash *table = (struct dn_hash *)tb->data;
@@ -739,9 +778,11 @@
 }
 
 
-struct dn_fib_table *dn_fib_get_table(int n, int create)
+struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 {
         struct dn_fib_table *t;
+	struct hlist_node *node;
+	unsigned int h;
 
         if (n < RT_TABLE_MIN)
                 return NULL;
@@ -749,8 +790,15 @@
         if (n > RT_TABLE_MAX)
                 return NULL;
 
-        if (dn_fib_tables[n]) 
-                return dn_fib_tables[n];
+	h = n & (DN_FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
+		if (t->n == n) {
+			rcu_read_unlock();
+			return t;
+		}
+	}
+	rcu_read_unlock();
 
         if (!create)
                 return NULL;
@@ -771,33 +819,37 @@
         t->flush  = dn_fib_table_flush;
         t->dump = dn_fib_table_dump;
 	memset(t->data, 0, sizeof(struct dn_hash));
-        dn_fib_tables[n] = t;
+	hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
 
         return t;
 }
 
-static void dn_fib_del_tree(int n)
-{
-	struct dn_fib_table *t;
-
-	write_lock(&dn_fib_tables_lock);
-	t = dn_fib_tables[n];
-	dn_fib_tables[n] = NULL;
-	write_unlock(&dn_fib_tables_lock);
-
-	kfree(t);
-}
-
 struct dn_fib_table *dn_fib_empty_table(void)
 {
-        int id;
+        u32 id;
 
         for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
-                if (dn_fib_tables[id] == NULL)
+		if (dn_fib_get_table(id, 0) == NULL)
                         return dn_fib_get_table(id, 1);
         return NULL;
 }
 
+void dn_fib_flush(void)
+{
+        int flushed = 0;
+        struct dn_fib_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
+
+	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
+	                flushed += tb->flush(tb);
+        }
+
+        if (flushed)
+                dn_rt_cache_flush(-1);
+}
+
 void __init dn_fib_table_init(void)
 {
 	dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
@@ -808,10 +860,17 @@
 
 void __exit dn_fib_table_cleanup(void)
 {
-	int i;
+	struct dn_fib_table *t;
+	struct hlist_node *node, *next;
+	unsigned int h;
 
-	for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i)
-		dn_fib_del_tree(i);
-
-	return;
+	write_lock(&dn_fib_tables_lock);
+	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
+		                          hlist) {
+			hlist_del(&t->hlist);
+			kfree(t);
+		}
+	}
+	write_unlock(&dn_fib_tables_lock);
 }
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 387c71c..4386393 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -64,81 +64,79 @@
 
 __setup("ether=", netdev_boot_setup);
 
-/*
- *	 Create the Ethernet MAC header for an arbitrary protocol layer 
+/**
+ * eth_header - create the Ethernet header
+ * @skb:	buffer to alter
+ * @dev:	source device
+ * @type:	Ethernet type field
+ * @daddr: destination address (NULL leave destination address)
+ * @saddr: source address (NULL use device source address)
+ * @len:   packet length (<= skb->len)
  *
- *	saddr=NULL	means use device source address
- *	daddr=NULL	means leave destination address (eg unresolved arp)
+ *
+ * Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+ * in here instead. It is up to the 802.2 layer to carry protocol information.
  */
-
 int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
-	   void *daddr, void *saddr, unsigned len)
+	       void *daddr, void *saddr, unsigned len)
 {
-	struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+	struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
 
-	/* 
-	 *	Set the protocol type. For a packet of type ETH_P_802_3 we put the length
-	 *	in here instead. It is up to the 802.2 layer to carry protocol information.
-	 */
-	
-	if(type!=ETH_P_802_3) 
+	if (type != ETH_P_802_3)
 		eth->h_proto = htons(type);
 	else
 		eth->h_proto = htons(len);
 
 	/*
-	 *	Set the source hardware address. 
+	 *      Set the source hardware address.
 	 */
-	 
-	if(!saddr)
-		saddr = dev->dev_addr;
-	memcpy(eth->h_source,saddr,dev->addr_len);
 
-	if(daddr)
-	{
-		memcpy(eth->h_dest,daddr,dev->addr_len);
+	if (!saddr)
+		saddr = dev->dev_addr;
+	memcpy(eth->h_source, saddr, dev->addr_len);
+
+	if (daddr) {
+		memcpy(eth->h_dest, daddr, dev->addr_len);
 		return ETH_HLEN;
 	}
-	
+
 	/*
-	 *	Anyway, the loopback-device should never use this function... 
+	 *      Anyway, the loopback-device should never use this function...
 	 */
 
-	if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 
-	{
+	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
 		memset(eth->h_dest, 0, dev->addr_len);
 		return ETH_HLEN;
 	}
-	
+
 	return -ETH_HLEN;
 }
 
-
-/*
- *	Rebuild the Ethernet MAC header. This is called after an ARP
- *	(or in future other address resolution) has completed on this
- *	sk_buff. We now let ARP fill in the other fields.
+/**
+ * eth_rebuild_header- rebuild the Ethernet MAC header.
+ * @skb: socket buffer to update
  *
- *	This routine CANNOT use cached dst->neigh!
- *	Really, it is used only when dst->neigh is wrong.
+ * This is called after an ARP or IPV6 ndisc it's resolution on this
+ * sk_buff. We now let protocol (ARP) fill in the other fields.
+ *
+ * This routine CANNOT use cached dst->neigh!
+ * Really, it is used only when dst->neigh is wrong.
  */
-
 int eth_rebuild_header(struct sk_buff *skb)
 {
 	struct ethhdr *eth = (struct ethhdr *)skb->data;
 	struct net_device *dev = skb->dev;
 
-	switch (eth->h_proto)
-	{
+	switch (eth->h_proto) {
 #ifdef CONFIG_INET
 	case __constant_htons(ETH_P_IP):
- 		return arp_find(eth->h_dest, skb);
-#endif	
+		return arp_find(eth->h_dest, skb);
+#endif
 	default:
 		printk(KERN_DEBUG
-		       "%s: unable to resolve type %X addresses.\n", 
+		       "%s: unable to resolve type %X addresses.\n",
 		       dev->name, (int)eth->h_proto);
-		
+
 		memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
 		break;
 	}
@@ -146,62 +144,70 @@
 	return 0;
 }
 
-
-/*
- *	Determine the packet's protocol ID. The rule here is that we 
- *	assume 802.3 if the type field is short enough to be a length.
- *	This is normal practice and works for any 'now in use' protocol.
+/**
+ * eth_type_trans - determine the packet's protocol ID.
+ * @skb: received socket data
+ * @dev: receiving network device
+ *
+ * The rule here is that we
+ * assume 802.3 if the type field is short enough to be a length.
+ * This is normal practice and works for any 'now in use' protocol.
  */
- 
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
 	unsigned char *rawp;
-	
+
 	skb->mac.raw = skb->data;
-	skb_pull(skb,ETH_HLEN);
+	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
-	
+
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (!compare_ether_addr(eth->h_dest, dev->broadcast))
 			skb->pkt_type = PACKET_BROADCAST;
 		else
 			skb->pkt_type = PACKET_MULTICAST;
 	}
-	
+
 	/*
-	 *	This ALLMULTI check should be redundant by 1.4
-	 *	so don't forget to remove it.
+	 *      This ALLMULTI check should be redundant by 1.4
+	 *      so don't forget to remove it.
 	 *
-	 *	Seems, you forgot to remove it. All silly devices
-	 *	seems to set IFF_PROMISC.
+	 *      Seems, you forgot to remove it. All silly devices
+	 *      seems to set IFF_PROMISC.
 	 */
-	 
-	else if(1 /*dev->flags&IFF_PROMISC*/) {
+
+	else if (1 /*dev->flags&IFF_PROMISC */ ) {
 		if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr)))
 			skb->pkt_type = PACKET_OTHERHOST;
 	}
-	
+
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
-		
+
 	rawp = skb->data;
-	
+
 	/*
-	 *	This is a magic hack to spot IPX packets. Older Novell breaks
-	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
-	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
-	 *	won't work for fault tolerant netware but does for the rest.
+	 *      This is a magic hack to spot IPX packets. Older Novell breaks
+	 *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
+	 *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+	 *      won't work for fault tolerant netware but does for the rest.
 	 */
 	if (*(unsigned short *)rawp == 0xFFFF)
 		return htons(ETH_P_802_3);
-		
+
 	/*
-	 *	Real 802.2 LLC
+	 *      Real 802.2 LLC
 	 */
 	return htons(ETH_P_802_2);
 }
+EXPORT_SYMBOL(eth_type_trans);
 
+/**
+ * eth_header_parse - extract hardware address from packet
+ * @skb: packet to extract header from
+ * @haddr: destination buffer
+ */
 static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
 	struct ethhdr *eth = eth_hdr(skb);
@@ -209,14 +215,20 @@
 	return ETH_ALEN;
 }
 
+/**
+ * eth_header_cache - fill cache entry from neighbour
+ * @neigh: source neighbour
+ * @hh: destination cache entry
+ * Create an Ethernet header template from the neighbour.
+ */
 int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 {
 	unsigned short type = hh->hh_type;
 	struct ethhdr *eth;
 	struct net_device *dev = neigh->dev;
 
-	eth = (struct ethhdr*)
-		(((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
+	eth = (struct ethhdr *)
+	    (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
 
 	if (type == __constant_htons(ETH_P_802_3))
 		return -1;
@@ -228,27 +240,47 @@
 	return 0;
 }
 
-/*
+/**
+ * eth_header_cache_update - update cache entry
+ * @hh: destination cache entry
+ * @dev: network device
+ * @haddr: new hardware address
+ *
  * Called by Address Resolution module to notify changes in address.
  */
-
-void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
+			     unsigned char *haddr)
 {
-	memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
+	memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)),
 	       haddr, dev->addr_len);
 }
 
-EXPORT_SYMBOL(eth_type_trans);
-
+/**
+ * eth_mac_addr - set new Ethernet hardware address
+ * @dev: network device
+ * @p: socket address
+ * Change hardware address of device.
+ *
+ * This doesn't change hardware matching, so needs to be overridden
+ * for most real devices.
+ */
 static int eth_mac_addr(struct net_device *dev, void *p)
 {
-	struct sockaddr *addr=p;
+	struct sockaddr *addr = p;
 	if (netif_running(dev))
 		return -EBUSY;
-	memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	return 0;
 }
 
+/**
+ * eth_change_mtu - set new MTU size
+ * @dev: network device
+ * @new_mtu: new Maximum Transfer Unit
+ *
+ * Allow changing MTU size. Needs to be overridden for devices
+ * supporting jumbo frames.
+ */
 static int eth_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
@@ -257,8 +289,10 @@
 	return 0;
 }
 
-/*
- * Fill in the fields of the device structure with ethernet-generic values.
+/**
+ * ether_setup - setup Ethernet network device
+ * @dev: network device
+ * Fill in the fields of the device structure with Ethernet-generic values.
  */
 void ether_setup(struct net_device *dev)
 {
@@ -277,21 +311,21 @@
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */	
 	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
 	
-	memset(dev->broadcast,0xFF, ETH_ALEN);
+	memset(dev->broadcast, 0xFF, ETH_ALEN);
 
 }
 EXPORT_SYMBOL(ether_setup);
 
 /**
- * alloc_etherdev - Allocates and sets up an ethernet device
+ * alloc_etherdev - Allocates and sets up an Ethernet device
  * @sizeof_priv: Size of additional driver-private structure to be allocated
- *	for this ethernet device
+ *	for this Ethernet device
  *
- * Fill in the fields of the device structure with ethernet-generic
+ * Fill in the fields of the device structure with Ethernet-generic
  * values. Basically does everything except registering the device.
  *
  * Constructs a new net device, complete with a private data area of
- * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * size (sizeof_priv).  A 32-byte (not bit) alignment is enforced for
  * this private data area.
  */
 
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index ed90a8a..fdfe770 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -9,6 +9,7 @@
  * more details.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -48,7 +49,7 @@
 
 	int key_idx;
 
-	struct crypto_tfm *tfm;
+	struct crypto_cipher *tfm;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
 	u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN],
@@ -56,20 +57,10 @@
 	u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN];
 };
 
-static void ieee80211_ccmp_aes_encrypt(struct crypto_tfm *tfm,
-				       const u8 pt[16], u8 ct[16])
+static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm,
+					      const u8 pt[16], u8 ct[16])
 {
-	struct scatterlist src, dst;
-
-	src.page = virt_to_page(pt);
-	src.offset = offset_in_page(pt);
-	src.length = AES_BLOCK_LEN;
-
-	dst.page = virt_to_page(ct);
-	dst.offset = offset_in_page(ct);
-	dst.length = AES_BLOCK_LEN;
-
-	crypto_cipher_encrypt(tfm, &dst, &src, AES_BLOCK_LEN);
+	crypto_cipher_encrypt_one(tfm, ct, pt);
 }
 
 static void *ieee80211_ccmp_init(int key_idx)
@@ -81,10 +72,11 @@
 		goto fail;
 	priv->key_idx = key_idx;
 
-	priv->tfm = crypto_alloc_tfm("aes", 0);
-	if (priv->tfm == NULL) {
+	priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm)) {
 		printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate "
 		       "crypto API aes\n");
+		priv->tfm = NULL;
 		goto fail;
 	}
 
@@ -93,7 +85,7 @@
       fail:
 	if (priv) {
 		if (priv->tfm)
-			crypto_free_tfm(priv->tfm);
+			crypto_free_cipher(priv->tfm);
 		kfree(priv);
 	}
 
@@ -104,7 +96,7 @@
 {
 	struct ieee80211_ccmp_data *_priv = priv;
 	if (_priv && _priv->tfm)
-		crypto_free_tfm(_priv->tfm);
+		crypto_free_cipher(_priv->tfm);
 	kfree(priv);
 }
 
@@ -115,7 +107,7 @@
 		b[i] ^= a[i];
 }
 
-static void ccmp_init_blocks(struct crypto_tfm *tfm,
+static void ccmp_init_blocks(struct crypto_cipher *tfm,
 			     struct ieee80211_hdr_4addr *hdr,
 			     u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0)
 {
@@ -377,7 +369,7 @@
 {
 	struct ieee80211_ccmp_data *data = priv;
 	int keyidx;
-	struct crypto_tfm *tfm = data->tfm;
+	struct crypto_cipher *tfm = data->tfm;
 
 	keyidx = data->key_idx;
 	memset(data, 0, sizeof(*data));
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 34dba0b..407a174 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -9,6 +9,7 @@
  * more details.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -52,8 +53,8 @@
 
 	int key_idx;
 
-	struct crypto_tfm *tfm_arc4;
-	struct crypto_tfm *tfm_michael;
+	struct crypto_blkcipher *tfm_arc4;
+	struct crypto_hash *tfm_michael;
 
 	/* scratch buffers for virt_to_page() (crypto API) */
 	u8 rx_hdr[16], tx_hdr[16];
@@ -85,17 +86,21 @@
 
 	priv->key_idx = key_idx;
 
-	priv->tfm_arc4 = crypto_alloc_tfm("arc4", 0);
-	if (priv->tfm_arc4 == NULL) {
+	priv->tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
+						CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm_arc4)) {
 		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
 		       "crypto API arc4\n");
+		priv->tfm_arc4 = NULL;
 		goto fail;
 	}
 
-	priv->tfm_michael = crypto_alloc_tfm("michael_mic", 0);
-	if (priv->tfm_michael == NULL) {
+	priv->tfm_michael = crypto_alloc_hash("michael_mic", 0,
+					      CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm_michael)) {
 		printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate "
 		       "crypto API michael_mic\n");
+		priv->tfm_michael = NULL;
 		goto fail;
 	}
 
@@ -104,9 +109,9 @@
       fail:
 	if (priv) {
 		if (priv->tfm_michael)
-			crypto_free_tfm(priv->tfm_michael);
+			crypto_free_hash(priv->tfm_michael);
 		if (priv->tfm_arc4)
-			crypto_free_tfm(priv->tfm_arc4);
+			crypto_free_blkcipher(priv->tfm_arc4);
 		kfree(priv);
 	}
 
@@ -117,9 +122,9 @@
 {
 	struct ieee80211_tkip_data *_priv = priv;
 	if (_priv && _priv->tfm_michael)
-		crypto_free_tfm(_priv->tfm_michael);
+		crypto_free_hash(_priv->tfm_michael);
 	if (_priv && _priv->tfm_arc4)
-		crypto_free_tfm(_priv->tfm_arc4);
+		crypto_free_blkcipher(_priv->tfm_arc4);
 	kfree(priv);
 }
 
@@ -318,6 +323,7 @@
 static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct ieee80211_tkip_data *tkey = priv;
+	struct blkcipher_desc desc = { .tfm = tkey->tfm_arc4 };
 	int len;
 	u8 rc4key[16], *pos, *icv;
 	u32 crc;
@@ -351,18 +357,17 @@
 	icv[2] = crc >> 16;
 	icv[3] = crc >> 24;
 
-	crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+	crypto_blkcipher_setkey(tkey->tfm_arc4, rc4key, 16);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = len + 4;
-	crypto_cipher_encrypt(tkey->tfm_arc4, &sg, &sg, len + 4);
-
-	return 0;
+	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
 }
 
 static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct ieee80211_tkip_data *tkey = priv;
+	struct blkcipher_desc desc = { .tfm = tkey->tfm_arc4 };
 	u8 rc4key[16];
 	u8 keyidx, *pos;
 	u32 iv32;
@@ -434,11 +439,18 @@
 
 	plen = skb->len - hdr_len - 12;
 
-	crypto_cipher_setkey(tkey->tfm_arc4, rc4key, 16);
+	crypto_blkcipher_setkey(tkey->tfm_arc4, rc4key, 16);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = plen + 4;
-	crypto_cipher_decrypt(tkey->tfm_arc4, &sg, &sg, plen + 4);
+	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) {
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG ": TKIP: failed to decrypt "
+			       "received packet from " MAC_FMT "\n",
+			       MAC_ARG(hdr->addr2));
+		}
+		return -7;
+	}
 
 	crc = ~crc32_le(~0, pos, plen);
 	icv[0] = crc;
@@ -475,6 +487,7 @@
 static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr,
 		       u8 * data, size_t data_len, u8 * mic)
 {
+	struct hash_desc desc;
 	struct scatterlist sg[2];
 
 	if (tkey->tfm_michael == NULL) {
@@ -489,12 +502,12 @@
 	sg[1].offset = offset_in_page(data);
 	sg[1].length = data_len;
 
-	crypto_digest_init(tkey->tfm_michael);
-	crypto_digest_setkey(tkey->tfm_michael, key, 8);
-	crypto_digest_update(tkey->tfm_michael, sg, 2);
-	crypto_digest_final(tkey->tfm_michael, mic);
+	if (crypto_hash_setkey(tkey->tfm_michael, key, 8))
+		return -1;
 
-	return 0;
+	desc.tfm = tkey->tfm_michael;
+	desc.flags = 0;
+	return crypto_hash_digest(&desc, sg, data_len + 16, mic);
 }
 
 static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
@@ -618,8 +631,8 @@
 {
 	struct ieee80211_tkip_data *tkey = priv;
 	int keyidx;
-	struct crypto_tfm *tfm = tkey->tfm_michael;
-	struct crypto_tfm *tfm2 = tkey->tfm_arc4;
+	struct crypto_hash *tfm = tkey->tfm_michael;
+	struct crypto_blkcipher *tfm2 = tkey->tfm_arc4;
 
 	keyidx = tkey->key_idx;
 	memset(tkey, 0, sizeof(*tkey));
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 0ebf235..3d46d3e 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -9,6 +9,7 @@
  * more details.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -32,7 +33,7 @@
 	u8 key[WEP_KEY_LEN + 1];
 	u8 key_len;
 	u8 key_idx;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
 };
 
 static void *prism2_wep_init(int keyidx)
@@ -44,10 +45,11 @@
 		goto fail;
 	priv->key_idx = keyidx;
 
-	priv->tfm = crypto_alloc_tfm("arc4", 0);
-	if (priv->tfm == NULL) {
+	priv->tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(priv->tfm)) {
 		printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate "
 		       "crypto API arc4\n");
+		priv->tfm = NULL;
 		goto fail;
 	}
 
@@ -59,7 +61,7 @@
       fail:
 	if (priv) {
 		if (priv->tfm)
-			crypto_free_tfm(priv->tfm);
+			crypto_free_blkcipher(priv->tfm);
 		kfree(priv);
 	}
 	return NULL;
@@ -69,7 +71,7 @@
 {
 	struct prism2_wep_data *_priv = priv;
 	if (_priv && _priv->tfm)
-		crypto_free_tfm(_priv->tfm);
+		crypto_free_blkcipher(_priv->tfm);
 	kfree(priv);
 }
 
@@ -120,6 +122,7 @@
 static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
+	struct blkcipher_desc desc = { .tfm = wep->tfm };
 	u32 crc, klen, len;
 	u8 *pos, *icv;
 	struct scatterlist sg;
@@ -151,13 +154,11 @@
 	icv[2] = crc >> 16;
 	icv[3] = crc >> 24;
 
-	crypto_cipher_setkey(wep->tfm, key, klen);
+	crypto_blkcipher_setkey(wep->tfm, key, klen);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = len + 4;
-	crypto_cipher_encrypt(wep->tfm, &sg, &sg, len + 4);
-
-	return 0;
+	return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4);
 }
 
 /* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -170,6 +171,7 @@
 static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
+	struct blkcipher_desc desc = { .tfm = wep->tfm };
 	u32 crc, klen, plen;
 	u8 key[WEP_KEY_LEN + 3];
 	u8 keyidx, *pos, icv[4];
@@ -194,11 +196,12 @@
 	/* Apply RC4 to data and compute CRC32 over decrypted data */
 	plen = skb->len - hdr_len - 8;
 
-	crypto_cipher_setkey(wep->tfm, key, klen);
+	crypto_blkcipher_setkey(wep->tfm, key, klen);
 	sg.page = virt_to_page(pos);
 	sg.offset = offset_in_page(pos);
 	sg.length = plen + 4;
-	crypto_cipher_decrypt(wep->tfm, &sg, &sg, plen + 4);
+	if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4))
+		return -7;
 
 	crc = ~crc32_le(~0, pos, plen);
 	icv[0] = crc;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8514106..1650b64 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -88,6 +88,7 @@
 config IP_MULTIPLE_TABLES
 	bool "IP: policy routing"
 	depends on IP_ADVANCED_ROUTER
+	select FIB_RULES
 	---help---
 	  Normally, a router decides what to do with a received packet based
 	  solely on the packet's final destination address. If you say Y here,
@@ -386,6 +387,7 @@
 	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
+	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
 	---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4878fc5..f66049e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -47,6 +47,7 @@
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		      xfrm4_output.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c84a320..fdd89e3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -67,7 +67,6 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -392,7 +391,7 @@
 }
 
 /* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind;
+int sysctl_ip_nonlocal_bind __read_mostly;
 
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
@@ -988,7 +987,7 @@
  *      Shall we try to damage output packets if routing dev changes?
  */
 
-int sysctl_ip_dynaddr;
+int sysctl_ip_dynaddr __read_mostly;
 
 static int inet_sk_reselect_saddr(struct sock *sk)
 {
@@ -1074,6 +1073,7 @@
 		},
 	};
 						
+	security_sk_classify_flow(sk, &fl);
 	err = ip_route_output_flow(&rt, &fl, sk, 0);
 }
 	if (!err)
@@ -1254,10 +1254,7 @@
 	struct list_head *r;
 	int rc = -EINVAL;
 
-	if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
-		goto out;
-	}
+	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
 
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 1366bc6..9954297 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,3 +1,4 @@
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -34,7 +35,7 @@
 		switch (*optptr) {
 		case IPOPT_SEC:
 		case 0x85:	/* Some "Extended Security" crap. */
-		case 0x86:	/* Another "Commercial Security" crap. */
+		case IPOPT_CIPSO:
 		case IPOPT_RA:
 		case 0x80|21:	/* RFC1770 */
 			break;
@@ -97,7 +98,10 @@
 	ah->spi = x->id.spi;
 	ah->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
-	ahp->icv(ahp, skb, ah->auth_data);
+	err = ah_mac_digest(ahp, skb, ah->auth_data);
+	if (err)
+		goto error;
+	memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
 
 	top_iph->tos = iph->tos;
 	top_iph->ttl = iph->ttl;
@@ -119,6 +123,7 @@
 {
 	int ah_hlen;
 	int ihl;
+	int err = -EINVAL;
 	struct iphdr *iph;
 	struct ip_auth_hdr *ah;
 	struct ah_data *ahp;
@@ -166,8 +171,11 @@
 		
 		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
 		skb_push(skb, ihl);
-		ahp->icv(ahp, skb, ah->auth_data);
-		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+		err = ah_mac_digest(ahp, skb, ah->auth_data);
+		if (err)
+			goto out;
+		err = -EINVAL;
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
 			x->stats.integrity_failed++;
 			goto out;
 		}
@@ -179,7 +187,7 @@
 	return 0;
 
 out:
-	return -EINVAL;
+	return err;
 }
 
 static void ah4_err(struct sk_buff *skb, u32 info)
@@ -204,6 +212,7 @@
 {
 	struct ah_data *ahp = NULL;
 	struct xfrm_algo_desc *aalg_desc;
+	struct crypto_hash *tfm;
 
 	if (!x->aalg)
 		goto error;
@@ -221,24 +230,27 @@
 
 	ahp->key = x->aalg->alg_key;
 	ahp->key_len = (x->aalg->alg_key_len+7)/8;
-	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-	if (!ahp->tfm)
+	tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		goto error;
-	ahp->icv = ah_hmac_digest;
+
+	ahp->tfm = tfm;
+	if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len))
+		goto error;
 	
 	/*
 	 * Lookup the algorithm description maintained by xfrm_algo,
 	 * verify crypto transform properties, and store information
 	 * we need for AH processing.  This lookup cannot fail here
-	 * after a successful crypto_alloc_tfm().
+	 * after a successful crypto_alloc_hash().
 	 */
 	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 	BUG_ON(!aalg_desc);
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+	    crypto_hash_digestsize(tfm)) {
 		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       x->aalg->alg_name, crypto_hash_digestsize(tfm),
 		       aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
@@ -253,7 +265,7 @@
 		goto error;
 	
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	x->data = ahp;
 
@@ -262,7 +274,7 @@
 error:
 	if (ahp) {
 		kfree(ahp->work_icv);
-		crypto_free_tfm(ahp->tfm);
+		crypto_free_hash(ahp->tfm);
 		kfree(ahp);
 	}
 	return -EINVAL;
@@ -277,7 +289,7 @@
 
 	kfree(ahp->work_icv);
 	ahp->work_icv = NULL;
-	crypto_free_tfm(ahp->tfm);
+	crypto_free_hash(ahp->tfm);
 	ahp->tfm = NULL;
 	kfree(ahp);
 }
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
new file mode 100644
index 0000000..80a2a09
--- /dev/null
+++ b/net/ipv4/cipso_ipv4.c
@@ -0,0 +1,1607 @@
+/*
+ * CIPSO - Commercial IP Security Option
+ *
+ * This is an implementation of the CIPSO 2.2 protocol as specified in
+ * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
+ * FIPS-188, copies of both documents can be found in the Documentation
+ * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * have chosen to adopt the protocol and over the years it has become a
+ * de-facto standard for labeled networking.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+struct cipso_v4_domhsh_entry {
+	char *domain;
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* List of available DOI definitions */
+/* XXX - Updates should be minimal so having a single lock for the
+ * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
+ * okay. */
+/* XXX - This currently assumes a minimal number of different DOIs in use,
+ * if in practice there are a lot of different DOIs this list should
+ * probably be turned into a hash table or something similar so we
+ * can do quick lookups. */
+static DEFINE_SPINLOCK(cipso_v4_doi_list_lock);
+static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list);
+
+/* Label mapping cache */
+int cipso_v4_cache_enabled = 1;
+int cipso_v4_cache_bucketsize = 10;
+#define CIPSO_V4_CACHE_BUCKETBITS     7
+#define CIPSO_V4_CACHE_BUCKETS        (1 << CIPSO_V4_CACHE_BUCKETBITS)
+#define CIPSO_V4_CACHE_REORDERLIMIT   10
+struct cipso_v4_map_cache_bkt {
+	spinlock_t lock;
+	u32 size;
+	struct list_head list;
+};
+struct cipso_v4_map_cache_entry {
+	u32 hash;
+	unsigned char *key;
+	size_t key_len;
+
+	struct netlbl_lsm_cache lsm_data;
+
+	u32 activity;
+	struct list_head list;
+};
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+/* Restricted bitmap (tag #1) flags */
+int cipso_v4_rbm_optfmt = 0;
+int cipso_v4_rbm_strictvalid = 1;
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit
+ * @bitmap: the bitmap
+ * @bitmap_len: length in bits
+ * @offset: starting offset
+ * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit
+ *
+ * Description:
+ * Starting at @offset, walk the bitmap from left to right until either the
+ * desired bit is found or we reach the end.  Return the bit offset, -1 if
+ * not found, or -2 if error.
+ */
+static int cipso_v4_bitmap_walk(const unsigned char *bitmap,
+				u32 bitmap_len,
+				u32 offset,
+				u8 state)
+{
+	u32 bit_spot;
+	u32 byte_offset;
+	unsigned char bitmask;
+	unsigned char byte;
+
+	/* gcc always rounds to zero when doing integer division */
+	byte_offset = offset / 8;
+	byte = bitmap[byte_offset];
+	bit_spot = offset;
+	bitmask = 0x80 >> (offset % 8);
+
+	while (bit_spot < bitmap_len) {
+		if ((state && (byte & bitmask) == bitmask) ||
+		    (state == 0 && (byte & bitmask) == 0))
+			return bit_spot;
+
+		bit_spot++;
+		bitmask >>= 1;
+		if (bitmask == 0) {
+			byte = bitmap[++byte_offset];
+			bitmask = 0x80;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap
+ * @bitmap: the bitmap
+ * @bit: the bit
+ * @state: if non-zero, set the bit (1) else clear the bit (0)
+ *
+ * Description:
+ * Set a single bit in the bitmask.  Returns zero on success, negative values
+ * on error.
+ */
+static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
+				   u32 bit,
+				   u8 state)
+{
+	u32 byte_spot;
+	u8 bitmask;
+
+	/* gcc always rounds to zero when doing integer division */
+	byte_spot = bit / 8;
+	bitmask = 0x80 >> (bit % 8);
+	if (state)
+		bitmap[byte_spot] |= bitmask;
+	else
+		bitmap[byte_spot] &= ~bitmask;
+}
+
+/**
+ * cipso_v4_doi_domhsh_free - Frees a domain list entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a domain list entry can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
+{
+	struct cipso_v4_domhsh_entry *ptr;
+
+	ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
+	kfree(ptr->domain);
+	kfree(ptr);
+}
+
+/**
+ * cipso_v4_cache_entry_free - Frees a cache entry
+ * @entry: the entry to free
+ *
+ * Description:
+ * This function frees the memory associated with a cache entry.
+ *
+ */
+static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
+{
+	if (entry->lsm_data.free)
+		entry->lsm_data.free(entry->lsm_data.data);
+	kfree(entry->key);
+	kfree(entry);
+}
+
+/**
+ * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache
+ * @key: the hash key
+ * @key_len: the length of the key in bytes
+ *
+ * Description:
+ * The CIPSO tag hashing function.  Returns a 32-bit hash value.
+ *
+ */
+static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len)
+{
+	return jhash(key, key_len, 0);
+}
+
+/*
+ * Label Mapping Cache Functions
+ */
+
+/**
+ * cipso_v4_cache_init - Initialize the CIPSO cache
+ *
+ * Description:
+ * Initializes the CIPSO label mapping cache, this function should be called
+ * before any of the other functions defined in this file.  Returns zero on
+ * success, negative values on error.
+ *
+ */
+static int cipso_v4_cache_init(void)
+{
+	u32 iter;
+
+	cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS,
+				 sizeof(struct cipso_v4_map_cache_bkt),
+				 GFP_KERNEL);
+	if (cipso_v4_cache == NULL)
+		return -ENOMEM;
+
+	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+		spin_lock_init(&cipso_v4_cache[iter].lock);
+		cipso_v4_cache[iter].size = 0;
+		INIT_LIST_HEAD(&cipso_v4_cache[iter].list);
+	}
+
+	return 0;
+}
+
+/**
+ * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
+ *
+ * Description:
+ * Invalidates and frees any entries in the CIPSO cache.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+void cipso_v4_cache_invalidate(void)
+{
+	struct cipso_v4_map_cache_entry *entry, *tmp_entry;
+	u32 iter;
+
+	for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) {
+		spin_lock(&cipso_v4_cache[iter].lock);
+		list_for_each_entry_safe(entry,
+					 tmp_entry,
+					 &cipso_v4_cache[iter].list, list) {
+			list_del(&entry->list);
+			cipso_v4_cache_entry_free(entry);
+		}
+		cipso_v4_cache[iter].size = 0;
+		spin_unlock(&cipso_v4_cache[iter].lock);
+	}
+
+	return;
+}
+
+/**
+ * cipso_v4_cache_check - Check the CIPSO cache for a label mapping
+ * @key: the buffer to check
+ * @key_len: buffer length in bytes
+ * @secattr: the security attribute struct to use
+ *
+ * Description:
+ * This function checks the cache to see if a label mapping already exists for
+ * the given key.  If there is a match then the cache is adjusted and the
+ * @secattr struct is populated with the correct LSM security attributes.  The
+ * cache is adjusted in the following manner if the entry is not already the
+ * first in the cache bucket:
+ *
+ *  1. The cache entry's activity counter is incremented
+ *  2. The previous (higher ranking) entry's activity counter is decremented
+ *  3. If the difference between the two activity counters is geater than
+ *     CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped
+ *
+ * Returns zero on success, -ENOENT for a cache miss, and other negative values
+ * on error.
+ *
+ */
+static int cipso_v4_cache_check(const unsigned char *key,
+				u32 key_len,
+				struct netlbl_lsm_secattr *secattr)
+{
+	u32 bkt;
+	struct cipso_v4_map_cache_entry *entry;
+	struct cipso_v4_map_cache_entry *prev_entry = NULL;
+	u32 hash;
+
+	if (!cipso_v4_cache_enabled)
+		return -ENOENT;
+
+	hash = cipso_v4_map_cache_hash(key, key_len);
+	bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	spin_lock(&cipso_v4_cache[bkt].lock);
+	list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
+		if (entry->hash == hash &&
+		    entry->key_len == key_len &&
+		    memcmp(entry->key, key, key_len) == 0) {
+			entry->activity += 1;
+			secattr->cache.free = entry->lsm_data.free;
+			secattr->cache.data = entry->lsm_data.data;
+			if (prev_entry == NULL) {
+				spin_unlock(&cipso_v4_cache[bkt].lock);
+				return 0;
+			}
+
+			if (prev_entry->activity > 0)
+				prev_entry->activity -= 1;
+			if (entry->activity > prev_entry->activity &&
+			    entry->activity - prev_entry->activity >
+			    CIPSO_V4_CACHE_REORDERLIMIT) {
+				__list_del(entry->list.prev, entry->list.next);
+				__list_add(&entry->list,
+					   prev_entry->list.prev,
+					   &prev_entry->list);
+			}
+
+			spin_unlock(&cipso_v4_cache[bkt].lock);
+			return 0;
+		}
+		prev_entry = entry;
+	}
+	spin_unlock(&cipso_v4_cache[bkt].lock);
+
+	return -ENOENT;
+}
+
+/**
+ * cipso_v4_cache_add - Add an entry to the CIPSO cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add a new entry into the CIPSO label mapping cache.  Add the new entry to
+ * head of the cache bucket's list, if the cache bucket is out of room remove
+ * the last entry in the list first.  It is important to note that there is
+ * currently no checking for duplicate keys.  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int cipso_v4_cache_add(const struct sk_buff *skb,
+		       const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	u32 bkt;
+	struct cipso_v4_map_cache_entry *entry = NULL;
+	struct cipso_v4_map_cache_entry *old_entry = NULL;
+	unsigned char *cipso_ptr;
+	u32 cipso_ptr_len;
+
+	if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+		return 0;
+
+	cipso_ptr = CIPSO_V4_OPTPTR(skb);
+	cipso_ptr_len = cipso_ptr[1];
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL)
+		return -ENOMEM;
+	entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC);
+	if (entry->key == NULL) {
+		ret_val = -ENOMEM;
+		goto cache_add_failure;
+	}
+	memcpy(entry->key, cipso_ptr, cipso_ptr_len);
+	entry->key_len = cipso_ptr_len;
+	entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
+	entry->lsm_data.free = secattr->cache.free;
+	entry->lsm_data.data = secattr->cache.data;
+
+	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+	spin_lock(&cipso_v4_cache[bkt].lock);
+	if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+		list_add(&entry->list, &cipso_v4_cache[bkt].list);
+		cipso_v4_cache[bkt].size += 1;
+	} else {
+		old_entry = list_entry(cipso_v4_cache[bkt].list.prev,
+				       struct cipso_v4_map_cache_entry, list);
+		list_del(&old_entry->list);
+		list_add(&entry->list, &cipso_v4_cache[bkt].list);
+		cipso_v4_cache_entry_free(old_entry);
+	}
+	spin_unlock(&cipso_v4_cache[bkt].lock);
+
+	return 0;
+
+cache_add_failure:
+	if (entry)
+		cipso_v4_cache_entry_free(entry);
+	return ret_val;
+}
+
+/*
+ * DOI List Functions
+ */
+
+/**
+ * cipso_v4_doi_search - Searches for a DOI definition
+ * @doi: the DOI to search for
+ *
+ * Description:
+ * Search the DOI definition list for a DOI definition with a DOI value that
+ * matches @doi.  The caller is responsibile for calling rcu_read_[un]lock().
+ * Returns a pointer to the DOI definition on success and NULL on failure.
+ */
+static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
+{
+	struct cipso_v4_doi *iter;
+
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->doi == doi && iter->valid)
+			return iter;
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine
+ * @doi_def: the DOI structure
+ *
+ * Description:
+ * The caller defines a new DOI for use by the CIPSO engine and calls this
+ * function to add it to the list of acceptable domains.  The caller must
+ * ensure that the mapping table specified in @doi_def->map meets all of the
+ * requirements of the mapping type (see cipso_ipv4.h for details).  Returns
+ * zero on success and non-zero on failure.
+ *
+ */
+int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+		return -EINVAL;
+
+	doi_def->valid = 1;
+	INIT_RCU_HEAD(&doi_def->rcu);
+	INIT_LIST_HEAD(&doi_def->dom_list);
+
+	rcu_read_lock();
+	if (cipso_v4_doi_search(doi_def->doi) != NULL)
+		goto doi_add_failure_rlock;
+	spin_lock(&cipso_v4_doi_list_lock);
+	if (cipso_v4_doi_search(doi_def->doi) != NULL)
+		goto doi_add_failure_slock;
+	list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return 0;
+
+doi_add_failure_slock:
+	spin_unlock(&cipso_v4_doi_list_lock);
+doi_add_failure_rlock:
+	rcu_read_unlock();
+	return -EEXIST;
+}
+
+/**
+ * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
+ * @doi: the DOI value
+ * @callback: the DOI cleanup/free callback
+ *
+ * Description:
+ * Removes a DOI definition from the CIPSO engine, @callback is called to
+ * free any memory.  The NetLabel routines will be called to release their own
+ * LSM domain mappings as well as our own domain list.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
+{
+	struct cipso_v4_doi *doi_def;
+	struct cipso_v4_domhsh_entry *dom_iter;
+
+	rcu_read_lock();
+	if (cipso_v4_doi_search(doi) != NULL) {
+		spin_lock(&cipso_v4_doi_list_lock);
+		doi_def = cipso_v4_doi_search(doi);
+		if (doi_def == NULL) {
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			return -ENOENT;
+		}
+		doi_def->valid = 0;
+		list_del_rcu(&doi_def->list);
+		spin_unlock(&cipso_v4_doi_list_lock);
+		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
+			if (dom_iter->valid)
+				netlbl_domhsh_remove(dom_iter->domain);
+		cipso_v4_cache_invalidate();
+		rcu_read_unlock();
+
+		call_rcu(&doi_def->rcu, callback);
+		return 0;
+	}
+	rcu_read_unlock();
+
+	return -ENOENT;
+}
+
+/**
+ * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * @doi: the DOI value
+ *
+ * Description:
+ * Searches for a valid DOI definition and if one is found it is returned to
+ * the caller.  Otherwise NULL is returned.  The caller must ensure that
+ * rcu_read_lock() is held while accessing the returned definition.
+ *
+ */
+struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
+{
+	return cipso_v4_doi_search(doi);
+}
+
+/**
+ * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Dump a list of all the configured DOI values into a sk_buff.  The returned
+ * sk_buff has room at the front of the sk_buff for @headroom bytes.  See
+ * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format.  This
+ * function may fail if another process is changing the DOI list at the same
+ * time.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump_all(size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	struct cipso_v4_doi *iter;
+	u32 doi_cnt = 0;
+	ssize_t buf_len;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->valid) {
+			doi_cnt += 1;
+			buf_len += 2 * NETLBL_LEN_U32;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto doi_dump_all_failure;
+
+	if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0)
+		goto doi_dump_all_failure;
+	buf_len -= NETLBL_LEN_U32;
+	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
+		if (iter->valid) {
+			if (buf_len < 2 * NETLBL_LEN_U32)
+				goto doi_dump_all_failure;
+			if (nla_put_u32(skb, NLA_U32, iter->doi) != 0)
+				goto doi_dump_all_failure;
+			if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+				goto doi_dump_all_failure;
+			buf_len -= 2 * NETLBL_LEN_U32;
+		}
+	rcu_read_unlock();
+
+	return skb;
+
+doi_dump_all_failure:
+	rcu_read_unlock();
+	kfree(skb);
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff
+ * @doi: the DOI value
+ * @headroom: the amount of headroom to allocate for the sk_buff
+ *
+ * Description:
+ * Lookup the DOI definition matching @doi and dump it's contents into a
+ * sk_buff.  The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes.  See net/netlabel/netlabel_cipso_v4.h for the LIST message
+ * format.  This function may fail if another process is changing the DOI list
+ * at the same time.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	struct cipso_v4_doi *iter;
+	u32 tag_cnt = 0;
+	u32 lvl_cnt = 0;
+	u32 cat_cnt = 0;
+	ssize_t buf_len;
+	ssize_t tmp;
+
+	rcu_read_lock();
+	iter = cipso_v4_doi_getdef(doi);
+	if (iter == NULL)
+		goto doi_dump_failure;
+	buf_len = NETLBL_LEN_U32;
+	switch (iter->type) {
+	case CIPSO_V4_MAP_PASS:
+		buf_len += NETLBL_LEN_U32;
+		while(tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+		      iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+			tag_cnt += 1;
+			buf_len += NETLBL_LEN_U8;
+		}
+		break;
+	case CIPSO_V4_MAP_STD:
+		buf_len += 3 * NETLBL_LEN_U32;
+		while (tag_cnt < CIPSO_V4_TAG_MAXCNT &&
+		       iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) {
+			tag_cnt += 1;
+			buf_len += NETLBL_LEN_U8;
+		}
+		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+			if (iter->map.std->lvl.local[tmp] !=
+			    CIPSO_V4_INV_LVL) {
+				lvl_cnt += 1;
+				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8;
+			}
+		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+			if (iter->map.std->cat.local[tmp] !=
+			    CIPSO_V4_INV_CAT) {
+				cat_cnt += 1;
+				buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16;
+			}
+		break;
+	}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto doi_dump_failure;
+
+	if (nla_put_u32(skb, NLA_U32, iter->type) != 0)
+		goto doi_dump_failure;
+	buf_len -= NETLBL_LEN_U32;
+	if (iter != cipso_v4_doi_getdef(doi))
+		goto doi_dump_failure;
+	switch (iter->type) {
+	case CIPSO_V4_MAP_PASS:
+		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+			goto doi_dump_failure;
+		buf_len -= NETLBL_LEN_U32;
+		for (tmp = 0;
+		     tmp < CIPSO_V4_TAG_MAXCNT &&
+			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+		     tmp++) {
+			if (buf_len < NETLBL_LEN_U8)
+				goto doi_dump_failure;
+			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+				goto doi_dump_failure;
+			buf_len -= NETLBL_LEN_U8;
+		}
+		break;
+	case CIPSO_V4_MAP_STD:
+		if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0)
+			goto doi_dump_failure;
+		if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0)
+			goto doi_dump_failure;
+		if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0)
+			goto doi_dump_failure;
+		buf_len -= 3 * NETLBL_LEN_U32;
+		for (tmp = 0;
+		     tmp < CIPSO_V4_TAG_MAXCNT &&
+			     iter->tags[tmp] != CIPSO_V4_TAG_INVALID;
+		     tmp++) {
+			if (buf_len < NETLBL_LEN_U8)
+				goto doi_dump_failure;
+			if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0)
+				goto doi_dump_failure;
+			buf_len -= NETLBL_LEN_U8;
+		}
+		for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++)
+			if (iter->map.std->lvl.local[tmp] !=
+			    CIPSO_V4_INV_LVL) {
+				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8)
+					goto doi_dump_failure;
+				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+					goto doi_dump_failure;
+				if (nla_put_u8(skb,
+					   NLA_U8,
+					   iter->map.std->lvl.local[tmp]) != 0)
+					goto doi_dump_failure;
+				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8;
+			}
+		for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++)
+			if (iter->map.std->cat.local[tmp] !=
+			    CIPSO_V4_INV_CAT) {
+				if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16)
+					goto doi_dump_failure;
+				if (nla_put_u32(skb, NLA_U32, tmp) != 0)
+					goto doi_dump_failure;
+				if (nla_put_u16(skb,
+					   NLA_U16,
+					   iter->map.std->cat.local[tmp]) != 0)
+					goto doi_dump_failure;
+				buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16;
+			}
+		break;
+	}
+	rcu_read_unlock();
+
+	return skb;
+
+doi_dump_failure:
+	rcu_read_unlock();
+	kfree(skb);
+	return NULL;
+}
+
+/**
+ * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to add
+ *
+ * Description:
+ * Adds the @domain to the the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel).  This function
+ * does allocate memory.  Returns zero on success, negative values on failure.
+ *
+ */
+int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
+{
+	struct cipso_v4_domhsh_entry *iter;
+	struct cipso_v4_domhsh_entry *new_dom;
+
+	new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
+	if (new_dom == NULL)
+		return -ENOMEM;
+	if (domain) {
+		new_dom->domain = kstrdup(domain, GFP_KERNEL);
+		if (new_dom->domain == NULL) {
+			kfree(new_dom);
+			return -ENOMEM;
+		}
+	}
+	new_dom->valid = 1;
+	INIT_RCU_HEAD(&new_dom->rcu);
+
+	rcu_read_lock();
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+		if (iter->valid &&
+		    ((domain != NULL && iter->domain != NULL &&
+		      strcmp(iter->domain, domain) == 0) ||
+		     (domain == NULL && iter->domain == NULL))) {
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			kfree(new_dom->domain);
+			kfree(new_dom);
+			return -EEXIST;
+		}
+	list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
+ * @doi_def: the DOI definition
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes the @domain from the DOI specified by @doi_def, this function
+ * should only be called by external functions (i.e. NetLabel).   Returns zero
+ * on success and negative values on error.
+ *
+ */
+int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
+			       const char *domain)
+{
+	struct cipso_v4_domhsh_entry *iter;
+
+	rcu_read_lock();
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_for_each_entry_rcu(iter, &doi_def->dom_list, list)
+		if (iter->valid &&
+		    ((domain != NULL && iter->domain != NULL &&
+		      strcmp(iter->domain, domain) == 0) ||
+		     (domain == NULL && iter->domain == NULL))) {
+			iter->valid = 0;
+			list_del_rcu(&iter->list);
+			spin_unlock(&cipso_v4_doi_list_lock);
+			rcu_read_unlock();
+			call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
+
+			return 0;
+		}
+	spin_unlock(&cipso_v4_doi_list_lock);
+	rcu_read_unlock();
+
+	return -ENOENT;
+}
+
+/*
+ * Label Mapping Functions
+ */
+
+/**
+ * cipso_v4_map_lvl_valid - Checks to see if the given level is understood
+ * @doi_def: the DOI definition
+ * @level: the level to check
+ *
+ * Description:
+ * Checks the given level against the given DOI definition and returns a
+ * negative value if the level does not have a valid mapping and a zero value
+ * if the level is defined by the DOI.
+ *
+ */
+static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
+{
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+			return 0;
+		break;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network
+ * @doi_def: the DOI definition
+ * @host_lvl: the host MLS level
+ * @net_lvl: the network/CIPSO MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS level to the correct
+ * CIPSO level using the given DOI definition.  Returns zero on success,
+ * negative values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def,
+				 u32 host_lvl,
+				 u32 *net_lvl)
+{
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		*net_lvl = host_lvl;
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		if (host_lvl < doi_def->map.std->lvl.local_size) {
+			*net_lvl = doi_def->map.std->lvl.local[host_lvl];
+			return 0;
+		}
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host
+ * @doi_def: the DOI definition
+ * @net_lvl: the network/CIPSO MLS level
+ * @host_lvl: the host MLS level
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO level to the correct local MLS
+ * level using the given DOI definition.  Returns zero on success, negative
+ * values otherwise.
+ *
+ */
+static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def,
+				 u32 net_lvl,
+				 u32 *host_lvl)
+{
+	struct cipso_v4_std_map_tbl *map_tbl;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		*host_lvl = net_lvl;
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		map_tbl = doi_def->map.std;
+		if (net_lvl < map_tbl->lvl.cipso_size &&
+		    map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) {
+			*host_lvl = doi_def->map.std->lvl.cipso[net_lvl];
+			return 0;
+		}
+		break;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid
+ * @doi_def: the DOI definition
+ * @bitmap: category bitmap
+ * @bitmap_len: bitmap length in bytes
+ *
+ * Description:
+ * Checks the given category bitmap against the given DOI definition and
+ * returns a negative value if any of the categories in the bitmap do not have
+ * a valid mapping and a zero value if all of the categories are valid.
+ *
+ */
+static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def,
+				      const unsigned char *bitmap,
+				      u32 bitmap_len)
+{
+	int cat = -1;
+	u32 bitmap_len_bits = bitmap_len * 8;
+	u32 cipso_cat_size = doi_def->map.std->cat.cipso_size;
+	u32 *cipso_array = doi_def->map.std->cat.cipso;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		return 0;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			cat = cipso_v4_bitmap_walk(bitmap,
+						   bitmap_len_bits,
+						   cat + 1,
+						   1);
+			if (cat < 0)
+				break;
+			if (cat >= cipso_cat_size ||
+			    cipso_array[cat] >= CIPSO_V4_INV_CAT)
+				return -EFAULT;
+		}
+
+		if (cat == -1)
+			return 0;
+		break;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @host_cat: the category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ * @net_cat: the zero'd out category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CIPSO bitmap using the given DOI definition.  Returns the minimum
+ * size in bytes of the network bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def,
+				     const unsigned char *host_cat,
+				     u32 host_cat_len,
+				     unsigned char *net_cat,
+				     u32 net_cat_len)
+{
+	int host_spot = -1;
+	u32 net_spot;
+	u32 net_spot_max = 0;
+	u32 host_clen_bits = host_cat_len * 8;
+	u32 net_clen_bits = net_cat_len * 8;
+	u32 host_cat_size = doi_def->map.std->cat.local_size;
+	u32 *host_cat_array = doi_def->map.std->cat.local;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		net_spot_max = host_cat_len - 1;
+		while (net_spot_max > 0 && host_cat[net_spot_max] == 0)
+			net_spot_max--;
+		if (net_spot_max > net_cat_len)
+			return -EINVAL;
+		memcpy(net_cat, host_cat, net_spot_max);
+		return net_spot_max;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			host_spot = cipso_v4_bitmap_walk(host_cat,
+							 host_clen_bits,
+							 host_spot + 1,
+							 1);
+			if (host_spot < 0)
+				break;
+			if (host_spot >= host_cat_size)
+				return -EPERM;
+
+			net_spot = host_cat_array[host_spot];
+			if (net_spot >= net_clen_bits)
+				return -ENOSPC;
+			cipso_v4_bitmap_setbit(net_cat, net_spot, 1);
+
+			if (net_spot > net_spot_max)
+				net_spot_max = net_spot;
+		}
+
+		if (host_spot == -2)
+			return -EFAULT;
+
+		if (++net_spot_max % 8)
+			return net_spot_max / 8 + 1;
+		return net_spot_max / 8;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category bitmap in network/CIPSO format
+ * @net_cat_len: the length of the CIPSO bitmap in bytes
+ * @host_cat: the zero'd out category bitmap in host format
+ * @host_cat_len: the length of the host's category bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a CIPSO bitmap to the correct local
+ * MLS category bitmap using the given DOI definition.  Returns the minimum
+ * size in bytes of the host bitmap on success, negative values otherwise.
+ *
+ */
+static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def,
+				     const unsigned char *net_cat,
+				     u32 net_cat_len,
+				     unsigned char *host_cat,
+				     u32 host_cat_len)
+{
+	u32 host_spot;
+	u32 host_spot_max = 0;
+	int net_spot = -1;
+	u32 net_clen_bits = net_cat_len * 8;
+	u32 host_clen_bits = host_cat_len * 8;
+	u32 net_cat_size = doi_def->map.std->cat.cipso_size;
+	u32 *net_cat_array = doi_def->map.std->cat.cipso;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_PASS:
+		if (net_cat_len > host_cat_len)
+			return -EINVAL;
+		memcpy(host_cat, net_cat, net_cat_len);
+		return net_cat_len;
+	case CIPSO_V4_MAP_STD:
+		for (;;) {
+			net_spot = cipso_v4_bitmap_walk(net_cat,
+							net_clen_bits,
+							net_spot + 1,
+							1);
+			if (net_spot < 0)
+				break;
+			if (net_spot >= net_cat_size ||
+			    net_cat_array[net_spot] >= CIPSO_V4_INV_CAT)
+				return -EPERM;
+
+			host_spot = net_cat_array[net_spot];
+			if (host_spot >= host_clen_bits)
+				return -ENOSPC;
+			cipso_v4_bitmap_setbit(host_cat, host_spot, 1);
+
+			if (host_spot > host_spot_max)
+				host_spot_max = host_spot;
+		}
+
+		if (net_spot == -2)
+			return -EFAULT;
+
+		if (++host_spot_max % 8)
+			return host_spot_max / 8 + 1;
+		return host_spot_max / 8;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * Protocol Handling Functions
+ */
+
+#define CIPSO_V4_HDR_LEN              6
+
+/**
+ * cipso_v4_gentag_hdr - Generate a CIPSO option header
+ * @doi_def: the DOI definition
+ * @len: the total tag length in bytes
+ * @buf: the CIPSO option buffer
+ *
+ * Description:
+ * Write a CIPSO header into the beginning of @buffer.  Return zero on success,
+ * negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
+			       u32 len,
+			       unsigned char *buf)
+{
+	if (CIPSO_V4_HDR_LEN + len > 40)
+		return -ENOSPC;
+
+	buf[0] = IPOPT_CIPSO;
+	buf[1] = CIPSO_V4_HDR_LEN + len;
+	*(u32 *)&buf[2] = htonl(doi_def->doi);
+
+	return 0;
+}
+
+#define CIPSO_V4_TAG1_CAT_LEN         30
+
+/**
+ * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1)
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @buffer: the option buffer
+ * @buffer_len: length of buffer in bytes
+ *
+ * Description:
+ * Generate a CIPSO option using the restricted bitmap tag, tag type #1.  The
+ * actual buffer length may be larger than the indicated size due to
+ * translation between host and network category bitmaps.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
+			       const struct netlbl_lsm_secattr *secattr,
+			       unsigned char **buffer,
+			       u32 *buffer_len)
+{
+	int ret_val = -EPERM;
+	unsigned char *buf = NULL;
+	u32 buf_len;
+	u32 level;
+
+	if (secattr->mls_cat) {
+		buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN,
+			      GFP_ATOMIC);
+		if (buf == NULL)
+			return -ENOMEM;
+
+		ret_val = cipso_v4_map_cat_rbm_hton(doi_def,
+						    secattr->mls_cat,
+						    secattr->mls_cat_len,
+						    &buf[CIPSO_V4_HDR_LEN + 4],
+						    CIPSO_V4_TAG1_CAT_LEN);
+		if (ret_val < 0)
+			goto gentag_failure;
+
+		/* This will send packets using the "optimized" format when
+		 * possibile as specified in  section 3.4.2.6 of the
+		 * CIPSO draft. */
+		if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10))
+			ret_val = 10;
+
+		buf_len = 4 + ret_val;
+	} else {
+		buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC);
+		if (buf == NULL)
+			return -ENOMEM;
+		buf_len = 4;
+	}
+
+	ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level);
+	if (ret_val != 0)
+		goto gentag_failure;
+
+	ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf);
+	if (ret_val != 0)
+		goto gentag_failure;
+
+	buf[CIPSO_V4_HDR_LEN] = 0x01;
+	buf[CIPSO_V4_HDR_LEN + 1] = buf_len;
+	buf[CIPSO_V4_HDR_LEN + 3] = level;
+
+	*buffer = buf;
+	*buffer_len = CIPSO_V4_HDR_LEN + buf_len;
+
+	return 0;
+
+gentag_failure:
+	kfree(buf);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag
+ * @doi_def: the DOI definition
+ * @tag: the CIPSO tag
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security
+ * attributes in @secattr.  Return zero on success, negatives values on
+ * failure.
+ *
+ */
+static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def,
+				 const unsigned char *tag,
+				 struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	u8 tag_len = tag[1];
+	u32 level;
+
+	ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level);
+	if (ret_val != 0)
+		return ret_val;
+	secattr->mls_lvl = level;
+	secattr->mls_lvl_vld = 1;
+
+	if (tag_len > 4) {
+		switch (doi_def->type) {
+		case CIPSO_V4_MAP_PASS:
+			secattr->mls_cat_len = tag_len - 4;
+			break;
+		case CIPSO_V4_MAP_STD:
+			secattr->mls_cat_len =
+				doi_def->map.std->cat.local_size;
+			break;
+		}
+		secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC);
+		if (secattr->mls_cat == NULL)
+			return -ENOMEM;
+
+		ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def,
+						    &tag[4],
+						    tag_len - 4,
+						    secattr->mls_cat,
+						    secattr->mls_cat_len);
+		if (ret_val < 0) {
+			kfree(secattr->mls_cat);
+			return ret_val;
+		}
+		secattr->mls_cat_len = ret_val;
+	}
+
+	return 0;
+}
+
+/**
+ * cipso_v4_validate - Validate a CIPSO option
+ * @option: the start of the option, on error it is set to point to the error
+ *
+ * Description:
+ * This routine is called to validate a CIPSO option, it checks all of the
+ * fields to ensure that they are at least valid, see the draft snippet below
+ * for details.  If the option is valid then a zero value is returned and
+ * the value of @option is unchanged.  If the option is invalid then a
+ * non-zero value is returned and @option is adjusted to point to the
+ * offending portion of the option.  From the IETF draft ...
+ *
+ *  "If any field within the CIPSO options, such as the DOI identifier, is not
+ *   recognized the IP datagram is discarded and an ICMP 'parameter problem'
+ *   (type 12) is generated and returned.  The ICMP code field is set to 'bad
+ *   parameter' (code 0) and the pointer is set to the start of the CIPSO field
+ *   that is unrecognized."
+ *
+ */
+int cipso_v4_validate(unsigned char **option)
+{
+	unsigned char *opt = *option;
+	unsigned char *tag;
+	unsigned char opt_iter;
+	unsigned char err_offset = 0;
+	u8 opt_len;
+	u8 tag_len;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 tag_iter;
+
+	/* caller already checks for length values that are too large */
+	opt_len = opt[1];
+	if (opt_len < 8) {
+		err_offset = 1;
+		goto validate_return;
+	}
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2])));
+	if (doi_def == NULL) {
+		err_offset = 2;
+		goto validate_return_locked;
+	}
+
+	opt_iter = 6;
+	tag = opt + opt_iter;
+	while (opt_iter < opt_len) {
+		for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];)
+			if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID ||
+			    ++tag_iter == CIPSO_V4_TAG_MAXCNT) {
+				err_offset = opt_iter;
+				goto validate_return_locked;
+			}
+
+		tag_len = tag[1];
+		if (tag_len > (opt_len - opt_iter)) {
+			err_offset = opt_iter + 1;
+			goto validate_return_locked;
+		}
+
+		switch (tag[0]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			if (tag_len < 4) {
+				err_offset = opt_iter + 1;
+				goto validate_return_locked;
+			}
+
+			/* We are already going to do all the verification
+			 * necessary at the socket layer so from our point of
+			 * view it is safe to turn these checks off (and less
+			 * work), however, the CIPSO draft says we should do
+			 * all the CIPSO validations here but it doesn't
+			 * really specify _exactly_ what we need to validate
+			 * ... so, just make it a sysctl tunable. */
+			if (cipso_v4_rbm_strictvalid) {
+				if (cipso_v4_map_lvl_valid(doi_def,
+							   tag[3]) < 0) {
+					err_offset = opt_iter + 3;
+					goto validate_return_locked;
+				}
+				if (tag_len > 4 &&
+				    cipso_v4_map_cat_rbm_valid(doi_def,
+							    &tag[4],
+							    tag_len - 4) < 0) {
+					err_offset = opt_iter + 4;
+					goto validate_return_locked;
+				}
+			}
+			break;
+		default:
+			err_offset = opt_iter;
+			goto validate_return_locked;
+		}
+
+		tag += tag_len;
+		opt_iter += tag_len;
+	}
+
+validate_return_locked:
+	rcu_read_unlock();
+validate_return:
+	*option = opt + err_offset;
+	return err_offset;
+}
+
+/**
+ * cipso_v4_error - Send the correct reponse for a bad packet
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: CIPSO gateway flag
+ *
+ * Description:
+ * Based on the error code given in @error, send an ICMP error message back to
+ * the originating host.  From the IETF draft ...
+ *
+ *  "If the contents of the CIPSO [option] are valid but the security label is
+ *   outside of the configured host or port label range, the datagram is
+ *   discarded and an ICMP 'destination unreachable' (type 3) is generated and
+ *   returned.  The code field of the ICMP is set to 'communication with
+ *   destination network administratively prohibited' (code 9) or to
+ *   'communication with destination host administratively prohibited'
+ *   (code 10).  The value of the code is dependent on whether the originator
+ *   of the ICMP message is acting as a CIPSO host or a CIPSO gateway.  The
+ *   recipient of the ICMP message MUST be able to handle either value.  The
+ *   same procedure is performed if a CIPSO [option] can not be added to an
+ *   IP packet because it is too large to fit in the IP options area."
+ *
+ *  "If the error is triggered by receipt of an ICMP message, the message is
+ *   discarded and no response is permitted (consistent with general ICMP
+ *   processing rules)."
+ *
+ */
+void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
+{
+	if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+		return;
+
+	if (gateway)
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+	else
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+}
+
+/**
+ * cipso_v4_socket_setattr - Add a CIPSO option to a socket
+ * @sock: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function.  This function requires
+ * exclusive access to @sock->sk, which means it either needs to be in the
+ * process of being created or locked via lock_sock(sock->sk).  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_setattr(const struct socket *sock,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	u32 iter;
+	unsigned char *buf = NULL;
+	u32 buf_len = 0;
+	u32 opt_len;
+	struct ip_options *opt = NULL;
+	struct sock *sk;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
+
+	/* In the case of sock_create_lite(), the sock->sk field is not
+	 * defined yet but it is not a problem as the only users of these
+	 * "lite" PF_INET sockets are functions which do an accept() call
+	 * afterwards so we will label the socket as part of the accept(). */
+	sk = sock->sk;
+	if (sk == NULL)
+		return 0;
+
+	/* XXX - This code assumes only one tag per CIPSO option which isn't
+	 * really a good assumption to make but since we only support the MAC
+	 * tags right now it is a safe assumption. */
+	iter = 0;
+	do {
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			ret_val = cipso_v4_gentag_rbm(doi_def,
+						      secattr,
+						      &buf,
+						      &buf_len);
+			break;
+		default:
+			ret_val = -EPERM;
+			goto socket_setattr_failure;
+		}
+
+		iter++;
+	} while (ret_val != 0 &&
+		 iter < CIPSO_V4_TAG_MAXCNT &&
+		 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
+	if (ret_val != 0)
+		goto socket_setattr_failure;
+
+	/* We can't use ip_options_get() directly because it makes a call to
+	 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
+	 * we can't block here. */
+	opt_len = (buf_len + 3) & ~3;
+	opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
+	if (opt == NULL) {
+		ret_val = -ENOMEM;
+		goto socket_setattr_failure;
+	}
+	memcpy(opt->__data, buf, buf_len);
+	opt->optlen = opt_len;
+	opt->is_data = 1;
+	kfree(buf);
+	buf = NULL;
+	ret_val = ip_options_compile(opt, NULL);
+	if (ret_val != 0)
+		goto socket_setattr_failure;
+
+	sk_inet = inet_sk(sk);
+	if (sk_inet->is_icsk) {
+		sk_conn = inet_csk(sk);
+		if (sk_inet->opt)
+			sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
+		sk_conn->icsk_ext_hdr_len += opt->optlen;
+		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+	}
+	opt = xchg(&sk_inet->opt, opt);
+	kfree(opt);
+
+	return 0;
+
+socket_setattr_failure:
+	kfree(buf);
+	kfree(opt);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_socket_getattr - Get the security attributes from a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sock to see if there is a CIPSO option attached to the socket and if
+ * there is return the CIPSO security attributes in @secattr.  Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int cipso_v4_socket_getattr(const struct socket *sock,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOMSG;
+	struct sock *sk;
+	struct inet_sock *sk_inet;
+	unsigned char *cipso_ptr;
+	u32 doi;
+	struct cipso_v4_doi *doi_def;
+
+	sk = sock->sk;
+	lock_sock(sk);
+	sk_inet = inet_sk(sk);
+	if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
+		goto socket_getattr_return;
+	cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
+		sizeof(struct iphdr);
+	ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
+	if (ret_val == 0)
+		goto socket_getattr_return;
+
+	doi = ntohl(*(u32 *)&cipso_ptr[2]);
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL) {
+		rcu_read_unlock();
+		goto socket_getattr_return;
+	}
+	switch (cipso_ptr[6]) {
+	case CIPSO_V4_TAG_RBITMAP:
+		ret_val = cipso_v4_parsetag_rbm(doi_def,
+						&cipso_ptr[6],
+						secattr);
+		break;
+	}
+	rcu_read_unlock();
+
+socket_getattr_return:
+	release_sock(sk);
+	return ret_val;
+}
+
+/**
+ * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Parse the given packet's CIPSO option and return the security attributes.
+ * Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
+			    struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOMSG;
+	unsigned char *cipso_ptr;
+	u32 doi;
+	struct cipso_v4_doi *doi_def;
+
+	if (!CIPSO_V4_OPTEXIST(skb))
+		return -ENOMSG;
+	cipso_ptr = CIPSO_V4_OPTPTR(skb);
+	if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
+		return 0;
+
+	doi = ntohl(*(u32 *)&cipso_ptr[2]);
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_getdef(doi);
+	if (doi_def == NULL)
+		goto skbuff_getattr_return;
+	switch (cipso_ptr[6]) {
+	case CIPSO_V4_TAG_RBITMAP:
+		ret_val = cipso_v4_parsetag_rbm(doi_def,
+						&cipso_ptr[6],
+						secattr);
+		break;
+	}
+
+skbuff_getattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * cipso_v4_init - Initialize the CIPSO module
+ *
+ * Description:
+ * Initialize the CIPSO module and prepare it for use.  Returns zero on success
+ * and negative values on failure.
+ *
+ */
+static int __init cipso_v4_init(void)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_cache_init();
+	if (ret_val != 0)
+		panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n",
+		      ret_val);
+
+	return 0;
+}
+
+subsys_initcall(cipso_v4_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a6cc31d..8e8d1f1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -43,6 +43,7 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/if_addr.h>
 #include <linux/if_ether.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
@@ -62,6 +63,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/netlink.h>
 
 struct ipv4_devconf ipv4_devconf = {
 	.accept_redirects = 1,
@@ -78,7 +80,15 @@
 	.accept_source_route = 1,
 };
 
-static void rtmsg_ifa(int event, struct in_ifaddr *);
+static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+	[IFA_LOCAL]     	= { .type = NLA_U32 },
+	[IFA_ADDRESS]   	= { .type = NLA_U32 },
+	[IFA_BROADCAST] 	= { .type = NLA_U32 },
+	[IFA_ANYCAST]   	= { .type = NLA_U32 },
+	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+};
+
+static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
@@ -229,8 +239,8 @@
 	return 0;
 }
 
-static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-			 int destroy)
+static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy, struct nlmsghdr *nlh, u32 pid)
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -263,7 +273,7 @@
 			if (!do_promote) {
 				*ifap1 = ifa->ifa_next;
 
-				rtmsg_ifa(RTM_DELADDR, ifa);
+				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 				blocking_notifier_call_chain(&inetaddr_chain,
 						NETDEV_DOWN, ifa);
 				inet_free_ifa(ifa);
@@ -288,7 +298,7 @@
 	   is valid, it will try to restore deleted routes... Grr.
 	   So that, this order is correct.
 	 */
-	rtmsg_ifa(RTM_DELADDR, ifa1);
+	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
@@ -300,7 +310,7 @@
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
-		rtmsg_ifa(RTM_NEWADDR, promote);
+		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
@@ -319,7 +329,14 @@
 	}
 }
 
-static int inet_insert_ifa(struct in_ifaddr *ifa)
+static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+			 int destroy)
+{
+	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
+}
+
+static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
+			     u32 pid)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -364,12 +381,17 @@
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
-	rtmsg_ifa(RTM_NEWADDR, ifa);
+	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 
 	return 0;
 }
 
+static int inet_insert_ifa(struct in_ifaddr *ifa)
+{
+	return __inet_insert_ifa(ifa, NULL, 0);
+}
+
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -421,87 +443,134 @@
 
 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr **rta = arg;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in_device *in_dev;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
 	struct in_ifaddr *ifa, **ifap;
+	int err = -EINVAL;
 
 	ASSERT_RTNL();
 
-	if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
-		goto out;
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	in_dev = inetdev_by_index(ifm->ifa_index);
+	if (in_dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
 	__in_dev_put(in_dev);
 
 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 	     ifap = &ifa->ifa_next) {
-		if ((rta[IFA_LOCAL - 1] &&
-		     memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
-			    &ifa->ifa_local, 4)) ||
-		    (rta[IFA_LABEL - 1] &&
-		     rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
-		    (rta[IFA_ADDRESS - 1] &&
-		     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
-		      !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
-			      	      ifa))))
+		if (tb[IFA_LOCAL] &&
+		    ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL]))
 			continue;
-		inet_del_ifa(in_dev, ifap, 1);
+
+		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
+			continue;
+
+		if (tb[IFA_ADDRESS] &&
+		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
+		    !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa)))
+			continue;
+
+		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 		return 0;
 	}
-out:
-	return -EADDRNOTAVAIL;
+
+	err = -EADDRNOTAVAIL;
+errout:
+	return err;
+}
+
+static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
+{
+	struct nlattr *tb[IFA_MAX+1];
+	struct in_ifaddr *ifa;
+	struct ifaddrmsg *ifm;
+	struct net_device *dev;
+	struct in_device *in_dev;
+	int err = -EINVAL;
+
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
+		goto errout;
+
+	dev = __dev_get_by_index(ifm->ifa_index);
+	if (dev == NULL) {
+		err = -ENODEV;
+		goto errout;
+	}
+
+	in_dev = __in_dev_get_rtnl(dev);
+	if (in_dev == NULL) {
+		in_dev = inetdev_init(dev);
+		if (in_dev == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
+	}
+
+	ifa = inet_alloc_ifa();
+	if (ifa == NULL) {
+		/*
+		 * A potential indev allocation can be left alive, it stays
+		 * assigned to its device and is destroy with it.
+		 */
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	in_dev_hold(in_dev);
+
+	if (tb[IFA_ADDRESS] == NULL)
+		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
+
+	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
+	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
+	ifa->ifa_flags = ifm->ifa_flags;
+	ifa->ifa_scope = ifm->ifa_scope;
+	ifa->ifa_dev = in_dev;
+
+	ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]);
+	ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]);
+
+	if (tb[IFA_BROADCAST])
+		ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]);
+
+	if (tb[IFA_ANYCAST])
+		ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]);
+
+	if (tb[IFA_LABEL])
+		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
+	else
+		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+
+	return ifa;
+
+errout:
+	return ERR_PTR(err);
 }
 
 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	struct net_device *dev;
-	struct in_device *in_dev;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
 	struct in_ifaddr *ifa;
-	int rc = -EINVAL;
 
 	ASSERT_RTNL();
 
-	if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
-		goto out;
+	ifa = rtm_to_ifaddr(nlh);
+	if (IS_ERR(ifa))
+		return PTR_ERR(ifa);
 
-	rc = -ENODEV;
-	if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
-		goto out;
-
-	rc = -ENOBUFS;
-	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
-		in_dev = inetdev_init(dev);
-		if (!in_dev)
-			goto out;
-	}
-
-	if ((ifa = inet_alloc_ifa()) == NULL)
-		goto out;
-
-	if (!rta[IFA_ADDRESS - 1])
-		rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
-	memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
-	memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
-	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
-	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
-	if (rta[IFA_BROADCAST - 1])
-		memcpy(&ifa->ifa_broadcast,
-		       RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
-	if (rta[IFA_ANYCAST - 1])
-		memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
-	ifa->ifa_flags = ifm->ifa_flags;
-	ifa->ifa_scope = ifm->ifa_scope;
-	in_dev_hold(in_dev);
-	ifa->ifa_dev   = in_dev;
-	if (rta[IFA_LABEL - 1])
-		rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
-	else
-		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
-
-	rc = inet_insert_ifa(ifa);
-out:
-	return rc;
+	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 }
 
 /*
@@ -1056,32 +1125,37 @@
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
-	if (ifa->ifa_address)
-		RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
-	if (ifa->ifa_local)
-		RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
-	if (ifa->ifa_broadcast)
-		RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
-	if (ifa->ifa_anycast)
-		RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
-	if (ifa->ifa_label[0])
-		RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (ifa->ifa_address)
+		NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address);
+
+	if (ifa->ifa_local)
+		NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local);
+
+	if (ifa->ifa_broadcast)
+		NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
+
+	if (ifa->ifa_anycast)
+		NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast);
+
+	if (ifa->ifa_label[0])
+		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1127,19 +1201,27 @@
 	return skb->len;
 }
 
-static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
+static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
+		      u32 pid)
 {
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+	struct sk_buff *skb;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
 
-	if (!skb)
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
-	else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
-	} else {
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
+		goto errout;
 	}
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
 }
 
 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -1151,9 +1233,7 @@
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
 				      .dumpit	= inet_dump_fib,	},
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	[RTM_NEWRULE  - RTM_BASE] = { .doit	= inet_rtm_newrule,	},
-	[RTM_DELRULE  - RTM_BASE] = { .doit	= inet_rtm_delrule,	},
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= inet_dump_rules,	},
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
 #endif
 };
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index fc2f8ce..13b2936 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,3 +1,4 @@
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -16,7 +17,8 @@
 	int err;
 	struct iphdr *top_iph;
 	struct ip_esp_hdr *esph;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
+	struct blkcipher_desc desc;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
 	int blksize;
@@ -36,7 +38,9 @@
 	esp = x->data;
 	alen = esp->auth.icv_trunc_len;
 	tfm = esp->conf.tfm;
-	blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+	desc.tfm = tfm;
+	desc.flags = 0;
+	blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	clen = ALIGN(clen + 2, blksize);
 	if (esp->conf.padlen)
 		clen = ALIGN(clen, esp->conf.padlen);
@@ -91,8 +95,13 @@
 	esph->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
 
-	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	if (esp->conf.ivlen) {
+		if (unlikely(!esp->conf.ivinitted)) {
+			get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+			esp->conf.ivinitted = 1;
+		}
+		crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+	}
 
 	do {
 		struct scatterlist *sg = &esp->sgbuf[0];
@@ -103,26 +112,27 @@
 				goto error;
 		}
 		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
-		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
 	} while (0);
 
+	if (unlikely(err))
+		goto error;
+
 	if (esp->conf.ivlen) {
-		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
-		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
+		crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
 	}
 
 	if (esp->auth.icv_full_len) {
-		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
-		              sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
-		pskb_put(skb, trailer, alen);
+		err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
+				     sizeof(*esph) + esp->conf.ivlen + clen);
+		memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
 	}
 
 	ip_send_check(top_iph);
 
-	err = 0;
-
 error:
 	return err;
 }
@@ -137,8 +147,10 @@
 	struct iphdr *iph;
 	struct ip_esp_hdr *esph;
 	struct esp_data *esp = x->data;
+	struct crypto_blkcipher *tfm = esp->conf.tfm;
+	struct blkcipher_desc desc = { .tfm = tfm };
 	struct sk_buff *trailer;
-	int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	int alen = esp->auth.icv_trunc_len;
 	int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
 	int nfrags;
@@ -146,6 +158,7 @@
 	u8 nexthdr[2];
 	struct scatterlist *sg;
 	int padlen;
+	int err;
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
 		goto out;
@@ -155,15 +168,16 @@
 
 	/* If integrity check is required, do this. */
 	if (esp->auth.icv_full_len) {
-		u8 sum[esp->auth.icv_full_len];
-		u8 sum1[alen];
-		
-		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+		u8 sum[alen];
 
-		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+		err = esp_mac_digest(esp, skb, 0, skb->len - alen);
+		if (err)
+			goto out;
+
+		if (skb_copy_bits(skb, skb->len - alen, sum, alen))
 			BUG();
 
-		if (unlikely(memcmp(sum, sum1, alen))) {
+		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
 			x->stats.integrity_failed++;
 			goto out;
 		}
@@ -178,7 +192,7 @@
 
 	/* Get ivec. This can be wrong, check against another impls. */
 	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+		crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
 
 	sg = &esp->sgbuf[0];
 
@@ -188,9 +202,11 @@
 			goto out;
 	}
 	skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
-	crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+	err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
 	if (unlikely(sg != &esp->sgbuf[0]))
 		kfree(sg);
+	if (unlikely(err))
+		return err;
 
 	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
 		BUG();
@@ -237,7 +253,7 @@
 		 *    as per draft-ietf-ipsec-udp-encaps-06,
 		 *    section 3.1.2
 		 */
-		if (!x->props.mode)
+		if (x->props.mode == XFRM_MODE_TRANSPORT)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
@@ -254,9 +270,9 @@
 static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		mtu = ALIGN(mtu + 2, blksize);
 	} else {
 		/* The worst case. */
@@ -293,11 +309,11 @@
 	if (!esp)
 		return;
 
-	crypto_free_tfm(esp->conf.tfm);
+	crypto_free_blkcipher(esp->conf.tfm);
 	esp->conf.tfm = NULL;
 	kfree(esp->conf.ivec);
 	esp->conf.ivec = NULL;
-	crypto_free_tfm(esp->auth.tfm);
+	crypto_free_hash(esp->auth.tfm);
 	esp->auth.tfm = NULL;
 	kfree(esp->auth.work_icv);
 	esp->auth.work_icv = NULL;
@@ -307,6 +323,7 @@
 static int esp_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
+	struct crypto_blkcipher *tfm;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -322,22 +339,27 @@
 
 	if (x->aalg) {
 		struct xfrm_algo_desc *aalg_desc;
+		struct crypto_hash *hash;
 
 		esp->auth.key = x->aalg->alg_key;
 		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
-		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-		if (esp->auth.tfm == NULL)
+		hash = crypto_alloc_hash(x->aalg->alg_name, 0,
+					 CRYPTO_ALG_ASYNC);
+		if (IS_ERR(hash))
 			goto error;
-		esp->auth.icv = esp_hmac_digest;
+
+		esp->auth.tfm = hash;
+		if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len))
+			goto error;
 
 		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 		BUG_ON(!aalg_desc);
 
 		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-		    crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+		    crypto_hash_digestsize(hash)) {
 			NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
 				 x->aalg->alg_name,
-				 crypto_tfm_alg_digestsize(esp->auth.tfm),
+				 crypto_hash_digestsize(hash),
 				 aalg_desc->uinfo.auth.icv_fullbits/8);
 			goto error;
 		}
@@ -351,24 +373,22 @@
 	}
 	esp->conf.key = x->ealg->alg_key;
 	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
-	if (x->props.ealgo == SADB_EALG_NULL)
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
-	else
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
-	if (esp->conf.tfm == NULL)
+	tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		goto error;
-	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.tfm = tfm;
+	esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
 	esp->conf.padlen = 0;
 	if (esp->conf.ivlen) {
 		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
 		if (unlikely(esp->conf.ivec == NULL))
 			goto error;
-		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+		esp->conf.ivinitted = 0;
 	}
-	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ba2a707..cfb527c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,10 +32,12 @@
 #include <linux/inet.h>
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/init.h>
+#include <linux/list.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -50,48 +52,67 @@
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
-#define RT_TABLE_MIN RT_TABLE_MAIN
-
 struct fib_table *ip_fib_local_table;
 struct fib_table *ip_fib_main_table;
 
+#define FIB_TABLE_HASHSZ 1
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
 #else
 
-#define RT_TABLE_MIN 1
+#define FIB_TABLE_HASHSZ 256
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
 
-struct fib_table *fib_tables[RT_TABLE_MAX+1];
-
-struct fib_table *__fib_new_table(int id)
+struct fib_table *fib_new_table(u32 id)
 {
 	struct fib_table *tb;
+	unsigned int h;
 
+	if (id == 0)
+		id = RT_TABLE_MAIN;
+	tb = fib_get_table(id);
+	if (tb)
+		return tb;
 	tb = fib_hash_init(id);
 	if (!tb)
 		return NULL;
-	fib_tables[id] = tb;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
 	return tb;
 }
 
+struct fib_table *fib_get_table(u32 id)
+{
+	struct fib_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
 
+	if (id == 0)
+		id = RT_TABLE_MAIN;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
+		if (tb->tb_id == id) {
+			rcu_read_unlock();
+			return tb;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
-
 static void fib_flush(void)
 {
 	int flushed = 0;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_table *tb;
-	int id;
+	struct hlist_node *node;
+	unsigned int h;
 
-	for (id = RT_TABLE_MAX; id>0; id--) {
-		if ((tb = fib_get_table(id))==NULL)
-			continue;
-		flushed += tb->tb_flush(tb);
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
+			flushed += tb->tb_flush(tb);
 	}
-#else /* CONFIG_IP_MULTIPLE_TABLES */
-	flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
-	flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
-#endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 	if (flushed)
 		rt_cache_flush(-1);
@@ -232,42 +253,190 @@
 
 #ifndef CONFIG_IP_NOSIOCRT
 
+static inline u32 sk_extract_addr(struct sockaddr *addr)
+{
+	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+}
+
+static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
+{
+	struct nlattr *nla;
+
+	nla = (struct nlattr *) ((char *) mx + len);
+	nla->nla_type = type;
+	nla->nla_len = nla_attr_size(4);
+	*(u32 *) nla_data(nla) = value;
+
+	return len + nla_total_size(4);
+}
+
+static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
+				 struct fib_config *cfg)
+{
+	u32 addr;
+	int plen;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	if (rt->rt_dst.sa_family != AF_INET)
+		return -EAFNOSUPPORT;
+
+	/*
+	 * Check mask for validity:
+	 * a) it must be contiguous.
+	 * b) destination must have all host bits clear.
+	 * c) if application forgot to set correct family (AF_INET),
+	 *    reject request unless it is absolutely clear i.e.
+	 *    both family and mask are zero.
+	 */
+	plen = 32;
+	addr = sk_extract_addr(&rt->rt_dst);
+	if (!(rt->rt_flags & RTF_HOST)) {
+		u32 mask = sk_extract_addr(&rt->rt_genmask);
+
+		if (rt->rt_genmask.sa_family != AF_INET) {
+			if (mask || rt->rt_genmask.sa_family)
+				return -EAFNOSUPPORT;
+		}
+
+		if (bad_mask(mask, addr))
+			return -EINVAL;
+
+		plen = inet_mask_len(mask);
+	}
+
+	cfg->fc_dst_len = plen;
+	cfg->fc_dst = addr;
+
+	if (cmd != SIOCDELRT) {
+		cfg->fc_nlflags = NLM_F_CREATE;
+		cfg->fc_protocol = RTPROT_BOOT;
+	}
+
+	if (rt->rt_metric)
+		cfg->fc_priority = rt->rt_metric - 1;
+
+	if (rt->rt_flags & RTF_REJECT) {
+		cfg->fc_scope = RT_SCOPE_HOST;
+		cfg->fc_type = RTN_UNREACHABLE;
+		return 0;
+	}
+
+	cfg->fc_scope = RT_SCOPE_NOWHERE;
+	cfg->fc_type = RTN_UNICAST;
+
+	if (rt->rt_dev) {
+		char *colon;
+		struct net_device *dev;
+		char devname[IFNAMSIZ];
+
+		if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
+			return -EFAULT;
+
+		devname[IFNAMSIZ-1] = 0;
+		colon = strchr(devname, ':');
+		if (colon)
+			*colon = 0;
+		dev = __dev_get_by_name(devname);
+		if (!dev)
+			return -ENODEV;
+		cfg->fc_oif = dev->ifindex;
+		if (colon) {
+			struct in_ifaddr *ifa;
+			struct in_device *in_dev = __in_dev_get_rtnl(dev);
+			if (!in_dev)
+				return -ENODEV;
+			*colon = ':';
+			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
+				if (strcmp(ifa->ifa_label, devname) == 0)
+					break;
+			if (ifa == NULL)
+				return -ENODEV;
+			cfg->fc_prefsrc = ifa->ifa_local;
+		}
+	}
+
+	addr = sk_extract_addr(&rt->rt_gateway);
+	if (rt->rt_gateway.sa_family == AF_INET && addr) {
+		cfg->fc_gw = addr;
+		if (rt->rt_flags & RTF_GATEWAY &&
+		    inet_addr_type(addr) == RTN_UNICAST)
+			cfg->fc_scope = RT_SCOPE_UNIVERSE;
+	}
+
+	if (cmd == SIOCDELRT)
+		return 0;
+
+	if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
+		return -EINVAL;
+
+	if (cfg->fc_scope == RT_SCOPE_NOWHERE)
+		cfg->fc_scope = RT_SCOPE_LINK;
+
+	if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
+		struct nlattr *mx;
+		int len = 0;
+
+		mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
+ 		if (mx == NULL)
+			return -ENOMEM;
+
+		if (rt->rt_flags & RTF_MTU)
+			len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
+
+		if (rt->rt_flags & RTF_WINDOW)
+			len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
+
+		if (rt->rt_flags & RTF_IRTT)
+			len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
+
+		cfg->fc_mx = mx;
+		cfg->fc_mx_len = len;
+	}
+
+	return 0;
+}
+
 /*
  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
  */
  
 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 {
+	struct fib_config cfg;
+	struct rtentry rt;
 	int err;
-	struct kern_rta rta;
-	struct rtentry  r;
-	struct {
-		struct nlmsghdr nlh;
-		struct rtmsg	rtm;
-	} req;
 
 	switch (cmd) {
 	case SIOCADDRT:		/* Add a route */
 	case SIOCDELRT:		/* Delete a route */
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		if (copy_from_user(&r, arg, sizeof(struct rtentry)))
+
+		if (copy_from_user(&rt, arg, sizeof(rt)))
 			return -EFAULT;
+
 		rtnl_lock();
-		err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
+		err = rtentry_to_fib_config(cmd, &rt, &cfg);
 		if (err == 0) {
+			struct fib_table *tb;
+
 			if (cmd == SIOCDELRT) {
-				struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
-				err = -ESRCH;
+				tb = fib_get_table(cfg.fc_table);
 				if (tb)
-					err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+					err = tb->tb_delete(tb, &cfg);
+				else
+					err = -ESRCH;
 			} else {
-				struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
-				err = -ENOBUFS;
+				tb = fib_new_table(cfg.fc_table);
 				if (tb)
-					err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+					err = tb->tb_insert(tb, &cfg);
+				else
+					err = -ENOBUFS;
 			}
-			kfree(rta.rta_mx);
+
+			/* allocated by rtentry_to_fib_config() */
+			kfree(cfg.fc_mx);
 		}
 		rtnl_unlock();
 		return err;
@@ -284,77 +453,169 @@
 
 #endif
 
-static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
-{
-	int i;
+struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+	[RTA_DST]		= { .type = NLA_U32 },
+	[RTA_SRC]		= { .type = NLA_U32 },
+	[RTA_IIF]		= { .type = NLA_U32 },
+	[RTA_OIF]		= { .type = NLA_U32 },
+	[RTA_GATEWAY]		= { .type = NLA_U32 },
+	[RTA_PRIORITY]		= { .type = NLA_U32 },
+	[RTA_PREFSRC]		= { .type = NLA_U32 },
+	[RTA_METRICS]		= { .type = NLA_NESTED },
+	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+	[RTA_PROTOINFO]		= { .type = NLA_U32 },
+	[RTA_FLOW]		= { .type = NLA_U32 },
+	[RTA_MP_ALGO]		= { .type = NLA_U32 },
+};
 
-	for (i=1; i<=RTA_MAX; i++, rta++) {
-		struct rtattr *attr = *rta;
-		if (attr) {
-			if (RTA_PAYLOAD(attr) < 4)
-				return -EINVAL;
-			if (i != RTA_MULTIPATH && i != RTA_METRICS)
-				*rta = (struct rtattr*)RTA_DATA(attr);
+static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct fib_config *cfg)
+{
+	struct nlattr *attr;
+	int err, remaining;
+	struct rtmsg *rtm;
+
+	err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	rtm = nlmsg_data(nlh);
+	cfg->fc_family = rtm->rtm_family;
+	cfg->fc_dst_len = rtm->rtm_dst_len;
+	cfg->fc_src_len = rtm->rtm_src_len;
+	cfg->fc_tos = rtm->rtm_tos;
+	cfg->fc_table = rtm->rtm_table;
+	cfg->fc_protocol = rtm->rtm_protocol;
+	cfg->fc_scope = rtm->rtm_scope;
+	cfg->fc_type = rtm->rtm_type;
+	cfg->fc_flags = rtm->rtm_flags;
+	cfg->fc_nlflags = nlh->nlmsg_flags;
+
+	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.nlh = nlh;
+
+	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
+		switch (attr->nla_type) {
+		case RTA_DST:
+			cfg->fc_dst = nla_get_u32(attr);
+			break;
+		case RTA_SRC:
+			cfg->fc_src = nla_get_u32(attr);
+			break;
+		case RTA_OIF:
+			cfg->fc_oif = nla_get_u32(attr);
+			break;
+		case RTA_GATEWAY:
+			cfg->fc_gw = nla_get_u32(attr);
+			break;
+		case RTA_PRIORITY:
+			cfg->fc_priority = nla_get_u32(attr);
+			break;
+		case RTA_PREFSRC:
+			cfg->fc_prefsrc = nla_get_u32(attr);
+			break;
+		case RTA_METRICS:
+			cfg->fc_mx = nla_data(attr);
+			cfg->fc_mx_len = nla_len(attr);
+			break;
+		case RTA_MULTIPATH:
+			cfg->fc_mp = nla_data(attr);
+			cfg->fc_mp_len = nla_len(attr);
+			break;
+		case RTA_FLOW:
+			cfg->fc_flow = nla_get_u32(attr);
+			break;
+		case RTA_MP_ALGO:
+			cfg->fc_mp_alg = nla_get_u32(attr);
+			break;
+		case RTA_TABLE:
+			cfg->fc_table = nla_get_u32(attr);
+			break;
 		}
 	}
+
 	return 0;
+errout:
+	return err;
 }
 
 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct fib_table * tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct fib_config cfg;
+	struct fib_table *tb;
+	int err;
 
-	if (inet_check_attr(r, rta))
-		return -EINVAL;
+	err = rtm_to_fib_config(skb, nlh, &cfg);
+	if (err < 0)
+		goto errout;
 
-	tb = fib_get_table(r->rtm_table);
-	if (tb)
-		return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
-	return -ESRCH;
+	tb = fib_get_table(cfg.fc_table);
+	if (tb == NULL) {
+		err = -ESRCH;
+		goto errout;
+	}
+
+	err = tb->tb_delete(tb, &cfg);
+errout:
+	return err;
 }
 
 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct fib_table * tb;
-	struct rtattr **rta = arg;
-	struct rtmsg *r = NLMSG_DATA(nlh);
+	struct fib_config cfg;
+	struct fib_table *tb;
+	int err;
 
-	if (inet_check_attr(r, rta))
-		return -EINVAL;
+	err = rtm_to_fib_config(skb, nlh, &cfg);
+	if (err < 0)
+		goto errout;
 
-	tb = fib_new_table(r->rtm_table);
-	if (tb)
-		return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
-	return -ENOBUFS;
+	tb = fib_new_table(cfg.fc_table);
+	if (tb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
+
+	err = tb->tb_insert(tb, &cfg);
+errout:
+	return err;
 }
 
 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int t;
-	int s_t;
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
 	struct fib_table *tb;
+	struct hlist_node *node;
+	int dumped = 0;
 
-	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
-	    ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
+	    ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 		return ip_rt_dump(skb, cb);
 
-	s_t = cb->args[0];
-	if (s_t == 0)
-		s_t = cb->args[0] = RT_TABLE_MIN;
+	s_h = cb->args[0];
+	s_e = cb->args[1];
 
-	for (t=s_t; t<=RT_TABLE_MAX; t++) {
-		if (t < s_t) continue;
-		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
-		if ((tb = fib_get_table(t))==NULL)
-			continue;
-		if (tb->tb_dump(tb, skb, cb) < 0) 
-			break;
+	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
+			if (e < s_e)
+				goto next;
+			if (dumped)
+				memset(&cb->args[2], 0, sizeof(cb->args) -
+				                 2 * sizeof(cb->args[0]));
+			if (tb->tb_dump(tb, skb, cb) < 0)
+				goto out;
+			dumped = 1;
+next:
+			e++;
+		}
 	}
-
-	cb->args[0] = t;
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
 
 	return skb->len;
 }
@@ -366,17 +627,19 @@
    only when netlink is already locked.
  */
 
-static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, u32 dst, int dst_len,
+		      struct in_ifaddr *ifa)
 {
-	struct fib_table * tb;
-	struct {
-		struct nlmsghdr	nlh;
-		struct rtmsg	rtm;
-	} req;
-	struct kern_rta rta;
-
-	memset(&req.rtm, 0, sizeof(req.rtm));
-	memset(&rta, 0, sizeof(rta));
+	struct fib_table *tb;
+	struct fib_config cfg = {
+		.fc_protocol = RTPROT_KERNEL,
+		.fc_type = type,
+		.fc_dst = dst,
+		.fc_dst_len = dst_len,
+		.fc_prefsrc = ifa->ifa_local,
+		.fc_oif = ifa->ifa_dev->dev->ifindex,
+		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
+	};
 
 	if (type == RTN_UNICAST)
 		tb = fib_new_table(RT_TABLE_MAIN);
@@ -386,26 +649,17 @@
 	if (tb == NULL)
 		return;
 
-	req.nlh.nlmsg_len = sizeof(req);
-	req.nlh.nlmsg_type = cmd;
-	req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
-	req.nlh.nlmsg_pid = 0;
-	req.nlh.nlmsg_seq = 0;
+	cfg.fc_table = tb->tb_id;
 
-	req.rtm.rtm_dst_len = dst_len;
-	req.rtm.rtm_table = tb->tb_id;
-	req.rtm.rtm_protocol = RTPROT_KERNEL;
-	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
-	req.rtm.rtm_type = type;
-
-	rta.rta_dst = &dst;
-	rta.rta_prefsrc = &ifa->ifa_local;
-	rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
+	if (type != RTN_LOCAL)
+		cfg.fc_scope = RT_SCOPE_LINK;
+	else
+		cfg.fc_scope = RT_SCOPE_HOST;
 
 	if (cmd == RTM_NEWROUTE)
-		tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->tb_insert(tb, &cfg);
 	else
-		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
+		tb->tb_delete(tb, &cfg);
 }
 
 void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -652,11 +906,17 @@
 
 void __init ip_fib_init(void)
 {
+	unsigned int i;
+
+	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+		INIT_HLIST_HEAD(&fib_table_hash[i]);
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
 	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
 #else
-	fib_rules_init();
+	fib4_rules_init();
 #endif
 
 	register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 72c633b..88133b3 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -379,42 +379,39 @@
 	return NULL;
 }
 
-static int
-fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 	struct fib_node *new_f, *f;
 	struct fib_alias *fa, *new_fa;
 	struct fn_zone *fz;
 	struct fib_info *fi;
-	int z = r->rtm_dst_len;
-	int type = r->rtm_type;
-	u8 tos = r->rtm_tos;
+	u8 tos = cfg->fc_tos;
 	u32 key;
 	int err;
 
-	if (z > 32)
+	if (cfg->fc_dst_len > 32)
 		return -EINVAL;
-	fz = table->fn_zones[z];
-	if (!fz && !(fz = fn_new_zone(table, z)))
+
+	fz = table->fn_zones[cfg->fc_dst_len];
+	if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
 		return -ENOBUFS;
 
 	key = 0;
-	if (rta->rta_dst) {
-		u32 dst;
-		memcpy(&dst, rta->rta_dst, 4);
-		if (dst & ~FZ_MASK(fz))
+	if (cfg->fc_dst) {
+		if (cfg->fc_dst & ~FZ_MASK(fz))
 			return -EINVAL;
-		key = fz_key(dst, fz);
+		key = fz_key(cfg->fc_dst, fz);
 	}
 
-	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
-		return err;
+	fi = fib_create_info(cfg);
+	if (IS_ERR(fi))
+		return PTR_ERR(fi);
 
 	if (fz->fz_nent > (fz->fz_divisor<<1) &&
 	    fz->fz_divisor < FZ_MAX_DIVISOR &&
-	    (z==32 || (1<<z) > fz->fz_divisor))
+	    (cfg->fc_dst_len == 32 ||
+	     (1 << cfg->fc_dst_len) > fz->fz_divisor))
 		fn_rehash_zone(fz);
 
 	f = fib_find_node(fz, key);
@@ -440,18 +437,18 @@
 		struct fib_alias *fa_orig;
 
 		err = -EEXIST;
-		if (n->nlmsg_flags & NLM_F_EXCL)
+		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
-		if (n->nlmsg_flags & NLM_F_REPLACE) {
+		if (cfg->fc_nlflags & NLM_F_REPLACE) {
 			struct fib_info *fi_drop;
 			u8 state;
 
 			write_lock_bh(&fib_hash_lock);
 			fi_drop = fa->fa_info;
 			fa->fa_info = fi;
-			fa->fa_type = type;
-			fa->fa_scope = r->rtm_scope;
+			fa->fa_type = cfg->fc_type;
+			fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			fa->fa_state &= ~FA_S_ACCESSED;
 			fib_hash_genid++;
@@ -474,17 +471,17 @@
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
-			if (fa->fa_type == type &&
-			    fa->fa_scope == r->rtm_scope &&
+			if (fa->fa_type == cfg->fc_type &&
+			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi)
 				goto out;
 		}
-		if (!(n->nlmsg_flags & NLM_F_APPEND))
+		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
 
 	err = -ENOENT;
-	if (!(n->nlmsg_flags&NLM_F_CREATE))
+	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
 	err = -ENOBUFS;
@@ -506,8 +503,8 @@
 
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
-	new_fa->fa_type = type;
-	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_type = cfg->fc_type;
+	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 
 	/*
@@ -526,7 +523,8 @@
 		fz->fz_nent++;
 	rt_cache_flush(-1);
 
-	rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
+	rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
+		  &cfg->fc_nlinfo);
 	return 0;
 
 out_free_new_fa:
@@ -537,30 +535,25 @@
 }
 
 
-static int
-fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 	struct fib_node *f;
 	struct fib_alias *fa, *fa_to_delete;
-	int z = r->rtm_dst_len;
 	struct fn_zone *fz;
 	u32 key;
-	u8 tos = r->rtm_tos;
 
-	if (z > 32)
+	if (cfg->fc_dst_len > 32)
 		return -EINVAL;
-	if ((fz  = table->fn_zones[z]) == NULL)
+
+	if ((fz  = table->fn_zones[cfg->fc_dst_len]) == NULL)
 		return -ESRCH;
 
 	key = 0;
-	if (rta->rta_dst) {
-		u32 dst;
-		memcpy(&dst, rta->rta_dst, 4);
-		if (dst & ~FZ_MASK(fz))
+	if (cfg->fc_dst) {
+		if (cfg->fc_dst & ~FZ_MASK(fz))
 			return -EINVAL;
-		key = fz_key(dst, fz);
+		key = fz_key(cfg->fc_dst, fz);
 	}
 
 	f = fib_find_node(fz, key);
@@ -568,7 +561,7 @@
 	if (!f)
 		fa = NULL;
 	else
-		fa = fib_find_alias(&f->fn_alias, tos, 0);
+		fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
 	if (!fa)
 		return -ESRCH;
 
@@ -577,16 +570,16 @@
 	list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
 		struct fib_info *fi = fa->fa_info;
 
-		if (fa->fa_tos != tos)
+		if (fa->fa_tos != cfg->fc_tos)
 			break;
 
-		if ((!r->rtm_type ||
-		     fa->fa_type == r->rtm_type) &&
-		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == r->rtm_scope) &&
-		    (!r->rtm_protocol ||
-		     fi->fib_protocol == r->rtm_protocol) &&
-		    fib_nh_match(r, n, rta, fi) == 0) {
+		if ((!cfg->fc_type ||
+		     fa->fa_type == cfg->fc_type) &&
+		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_protocol ||
+		     fi->fib_protocol == cfg->fc_protocol) &&
+		    fib_nh_match(cfg, fi) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -596,7 +589,8 @@
 		int kill_fn;
 
 		fa = fa_to_delete;
-		rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
+		rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
+			  tb->tb_id, &cfg->fc_nlinfo);
 
 		kill_fn = 0;
 		write_lock_bh(&fib_hash_lock);
@@ -684,7 +678,7 @@
 	struct fib_node *f;
 	int i, s_i;
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	i = 0;
 	hlist_for_each_entry(f, node, head, fn_hash) {
 		struct fib_alias *fa;
@@ -699,19 +693,19 @@
 					  tb->tb_id,
 					  fa->fa_type,
 					  fa->fa_scope,
-					  &f->fn_key,
+					  f->fn_key,
 					  fz->fz_order,
 					  fa->fa_tos,
 					  fa->fa_info,
 					  NLM_F_MULTI) < 0) {
-				cb->args[3] = i;
+				cb->args[4] = i;
 				return -1;
 			}
 		next:
 			i++;
 		}
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -722,21 +716,21 @@
 {
 	int h, s_h;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 	for (h=0; h < fz->fz_divisor; h++) {
 		if (h < s_h) continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0,
+			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 		if (fz->fz_hash == NULL ||
 		    hlist_empty(&fz->fz_hash[h]))
 			continue;
 		if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -746,28 +740,28 @@
 	struct fn_zone *fz;
 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 	read_lock(&fib_hash_lock);
 	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
 		if (m < s_m) continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0,
-			       sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0,
+			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
 		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			read_unlock(&fib_hash_lock);
 			return -1;
 		}
 	}
 	read_unlock(&fib_hash_lock);
-	cb->args[1] = m;
+	cb->args[2] = m;
 	return skb->len;
 }
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
 #else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
 #endif
 {
 	struct fib_table *tb;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ef6609e..fd6f776 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -23,19 +23,14 @@
 			      struct fib_result *res, __u32 zone, __u32 mask,
 				int prefixlen);
 extern void fib_release_info(struct fib_info *);
-extern struct fib_info *fib_create_info(const struct rtmsg *r,
-					struct kern_rta *rta,
-					const struct nlmsghdr *,
-					int *err);
-extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
-			struct kern_rta *rta, struct fib_info *fi);
+extern struct fib_info *fib_create_info(struct fib_config *cfg);
+extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			 u8 tb_id, u8 type, u8 scope, void *dst,
+			 u32 tb_id, u8 type, u8 scope, u32 dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-		      int z, int tb_id,
-		      struct nlmsghdr *n, struct netlink_skb_parms *req);
+		      int dst_len, u32 tb_id, struct nl_info *info);
 extern struct fib_alias *fib_find_alias(struct list_head *fah,
 					u8 tos, u32 prio);
 extern int fib_detect_death(struct fib_info *fi, int order,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 79b0471..52b2ada 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -5,9 +5,8 @@
  *
  *		IPv4 Forwarding Information Base: policy rules.
  *
- * Version:	$Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $
- *
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * 		Thomas Graf <tgraf@suug.ch>
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -19,463 +18,350 @@
  *		Marc Boucher	:	routing by fwmark
  */
 
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/errno.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
-#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/inetdevice.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
-
 #include <net/ip.h>
-#include <net/protocol.h>
 #include <net/route.h>
 #include <net/tcp.h>
-#include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_rules.h>
 
-#define FRprintk(a...)
+static struct fib_rules_ops fib4_rules_ops;
 
-struct fib_rule
+struct fib4_rule
 {
-	struct hlist_node hlist;
-	atomic_t	r_clntref;
-	u32		r_preference;
-	unsigned char	r_table;
-	unsigned char	r_action;
-	unsigned char	r_dst_len;
-	unsigned char	r_src_len;
-	u32		r_src;
-	u32		r_srcmask;
-	u32		r_dst;
-	u32		r_dstmask;
-	u32		r_srcmap;
-	u8		r_flags;
-	u8		r_tos;
+	struct fib_rule		common;
+	u8			dst_len;
+	u8			src_len;
+	u8			tos;
+	u32			src;
+	u32			srcmask;
+	u32			dst;
+	u32			dstmask;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	u32		r_fwmark;
+	u32			fwmark;
+	u32			fwmask;
 #endif
-	int		r_ifindex;
 #ifdef CONFIG_NET_CLS_ROUTE
-	__u32		r_tclassid;
+	u32			tclassid;
 #endif
-	char		r_ifname[IFNAMSIZ];
-	int		r_dead;
-	struct		rcu_head rcu;
 };
 
-static struct fib_rule default_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_preference =	0x7FFF,
-	.r_table =	RT_TABLE_DEFAULT,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule default_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFF,
+		.table =	RT_TABLE_DEFAULT,
+		.action =	FR_ACT_TO_TBL,
+	},
 };
 
-static struct fib_rule main_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_preference =	0x7FFE,
-	.r_table =	RT_TABLE_MAIN,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule main_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFE,
+		.table =	RT_TABLE_MAIN,
+		.action =	FR_ACT_TO_TBL,
+	},
 };
 
-static struct fib_rule local_rule = {
-	.r_clntref =	ATOMIC_INIT(2),
-	.r_table =	RT_TABLE_LOCAL,
-	.r_action =	RTN_UNICAST,
+static struct fib4_rule local_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.table =	RT_TABLE_LOCAL,
+		.action =	FR_ACT_TO_TBL,
+		.flags =	FIB_RULE_PERMANENT,
+	},
 };
 
-static struct hlist_head fib_rules;
-
-/* writer func called from netlink -- rtnl_sem hold*/
-
-static void rtmsg_rule(int, struct fib_rule *);
-
-int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
-{
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct fib_rule *r;
-	struct hlist_node *node;
-	int err = -ESRCH;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) &&
-		    rtm->rtm_src_len == r->r_src_len &&
-		    rtm->rtm_dst_len == r->r_dst_len &&
-		    (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) &&
-		    rtm->rtm_tos == r->r_tos &&
-#ifdef CONFIG_IP_ROUTE_FWMARK
-		    (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) &&
-#endif
-		    (!rtm->rtm_type || rtm->rtm_type == r->r_action) &&
-		    (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) &&
-		    (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) &&
-		    (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) {
-			err = -EPERM;
-			if (r == &local_rule)
-				break;
-
-			hlist_del_rcu(&r->hlist);
-			r->r_dead = 1;
-			rtmsg_rule(RTM_DELRULE, r);
-			fib_rule_put(r);
-			err = 0;
-			break;
-		}
-	}
-	return err;
-}
-
-/* Allocate new unique table id */
-
-static struct fib_table *fib_empty_table(void)
-{
-	int id;
-
-	for (id = 1; id <= RT_TABLE_MAX; id++)
-		if (fib_tables[id] == NULL)
-			return __fib_new_table(id);
-	return NULL;
-}
-
-static inline void fib_rule_put_rcu(struct rcu_head *head)
-{
-	struct fib_rule *r = container_of(head, struct fib_rule, rcu);
-	kfree(r);
-}
-
-void fib_rule_put(struct fib_rule *r)
-{
-	if (atomic_dec_and_test(&r->r_clntref)) {
-		if (r->r_dead)
-			call_rcu(&r->rcu, fib_rule_put_rcu);
-		else
-			printk("Freeing alive rule %p\n", r);
-	}
-}
-
-/* writer func called from netlink -- rtnl_sem hold*/
-
-int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
-{
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
-	struct fib_rule *r, *new_r, *last = NULL;
-	struct hlist_node *node = NULL;
-	unsigned char table_id;
-
-	if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 ||
-	    (rtm->rtm_tos & ~IPTOS_TOS_MASK))
-		return -EINVAL;
-
-	if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ)
-		return -EINVAL;
-
-	table_id = rtm->rtm_table;
-	if (table_id == RT_TABLE_UNSPEC) {
-		struct fib_table *table;
-		if (rtm->rtm_type == RTN_UNICAST) {
-			if ((table = fib_empty_table()) == NULL)
-				return -ENOBUFS;
-			table_id = table->tb_id;
-		}
-	}
-
-	new_r = kzalloc(sizeof(*new_r), GFP_KERNEL);
-	if (!new_r)
-		return -ENOMEM;
-
-	if (rta[RTA_SRC-1])
-		memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4);
-	if (rta[RTA_DST-1])
-		memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4);
-	if (rta[RTA_GATEWAY-1])
-		memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4);
-	new_r->r_src_len = rtm->rtm_src_len;
-	new_r->r_dst_len = rtm->rtm_dst_len;
-	new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len);
-	new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len);
-	new_r->r_tos = rtm->rtm_tos;
-#ifdef CONFIG_IP_ROUTE_FWMARK
-	if (rta[RTA_PROTOINFO-1])
-		memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4);
-#endif
-	new_r->r_action = rtm->rtm_type;
-	new_r->r_flags = rtm->rtm_flags;
-	if (rta[RTA_PRIORITY-1])
-		memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	new_r->r_table = table_id;
-	if (rta[RTA_IIF-1]) {
-		struct net_device *dev;
-		rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ);
-		new_r->r_ifindex = -1;
-		dev = __dev_get_by_name(new_r->r_ifname);
-		if (dev)
-			new_r->r_ifindex = dev->ifindex;
-	}
-#ifdef CONFIG_NET_CLS_ROUTE
-	if (rta[RTA_FLOW-1])
-		memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4);
-#endif
-	r = container_of(fib_rules.first, struct fib_rule, hlist);
-
-	if (!new_r->r_preference) {
-		if (r && r->hlist.next != NULL) {
-			r = container_of(r->hlist.next, struct fib_rule, hlist);
-			if (r->r_preference)
-				new_r->r_preference = r->r_preference - 1;
-		}
-	}
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_preference > new_r->r_preference)
-			break;
-		last = r;
-	}
-	atomic_inc(&new_r->r_clntref);
-
-	if (last)
-		hlist_add_after_rcu(&last->hlist, &new_r->hlist);
-	else
-		hlist_add_before_rcu(&new_r->hlist, &r->hlist);
-
-	rtmsg_rule(RTM_NEWRULE, new_r);
-	return 0;
-}
+static LIST_HEAD(fib4_rules);
 
 #ifdef CONFIG_NET_CLS_ROUTE
 u32 fib_rules_tclass(struct fib_result *res)
 {
-	if (res->r)
-		return res->r->r_tclassid;
-	return 0;
+	return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
 }
 #endif
 
-/* callers should hold rtnl semaphore */
-
-static void fib_rules_detach(struct net_device *dev)
+int fib_lookup(struct flowi *flp, struct fib_result *res)
 {
-	struct hlist_node *node;
-	struct fib_rule *r;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_ifindex == dev->ifindex)
-			r->r_ifindex = -1;
-
-	}
-}
-
-/* callers should hold rtnl semaphore */
-
-static void fib_rules_attach(struct net_device *dev)
-{
-	struct hlist_node *node;
-	struct fib_rule *r;
-
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0)
-			r->r_ifindex = dev->ifindex;
-	}
-}
-
-int fib_lookup(const struct flowi *flp, struct fib_result *res)
-{
+	struct fib_lookup_arg arg = {
+		.result = res,
+	};
 	int err;
-	struct fib_rule *r, *policy;
-	struct fib_table *tb;
-	struct hlist_node *node;
 
-	u32 daddr = flp->fl4_dst;
-	u32 saddr = flp->fl4_src;
+	err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
+	res->r = arg.rule;
 
-FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
-	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
-
-	rcu_read_lock();
-
-	hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) {
-		if (((saddr^r->r_src) & r->r_srcmask) ||
-		    ((daddr^r->r_dst) & r->r_dstmask) ||
-		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
-#ifdef CONFIG_IP_ROUTE_FWMARK
-		    (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
-#endif
-		    (r->r_ifindex && r->r_ifindex != flp->iif))
-			continue;
-
-FRprintk("tb %d r %d ", r->r_table, r->r_action);
-		switch (r->r_action) {
-		case RTN_UNICAST:
-			policy = r;
-			break;
-		case RTN_UNREACHABLE:
-			rcu_read_unlock();
-			return -ENETUNREACH;
-		default:
-		case RTN_BLACKHOLE:
-			rcu_read_unlock();
-			return -EINVAL;
-		case RTN_PROHIBIT:
-			rcu_read_unlock();
-			return -EACCES;
-		}
-
-		if ((tb = fib_get_table(r->r_table)) == NULL)
-			continue;
-		err = tb->tb_lookup(tb, flp, res);
-		if (err == 0) {
-			res->r = policy;
-			if (policy)
-				atomic_inc(&policy->r_clntref);
-			rcu_read_unlock();
-			return 0;
-		}
-		if (err < 0 && err != -EAGAIN) {
-			rcu_read_unlock();
-			return err;
-		}
-	}
-FRprintk("FAILURE\n");
-	rcu_read_unlock();
-	return -ENETUNREACH;
+	return err;
 }
 
+static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	int err = -EAGAIN;
+	struct fib_table *tbl;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+
+	case FR_ACT_UNREACHABLE:
+		err = -ENETUNREACH;
+		goto errout;
+
+	case FR_ACT_PROHIBIT:
+		err = -EACCES;
+		goto errout;
+
+	case FR_ACT_BLACKHOLE:
+	default:
+		err = -EINVAL;
+		goto errout;
+	}
+
+	if ((tbl = fib_get_table(rule->table)) == NULL)
+		goto errout;
+
+	err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+	if (err > 0)
+		err = -EAGAIN;
+errout:
+	return err;
+}
+
+
 void fib_select_default(const struct flowi *flp, struct fib_result *res)
 {
-	if (res->r && res->r->r_action == RTN_UNICAST &&
+	if (res->r && res->r->action == FR_ACT_TO_TBL &&
 	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
 		struct fib_table *tb;
-		if ((tb = fib_get_table(res->r->r_table)) != NULL)
+		if ((tb = fib_get_table(res->r->table)) != NULL)
 			tb->tb_select_default(tb, flp, res);
 	}
 }
 
-static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
-	struct net_device *dev = ptr;
+	struct fib4_rule *r = (struct fib4_rule *) rule;
+	u32 daddr = fl->fl4_dst;
+	u32 saddr = fl->fl4_src;
 
-	if (event == NETDEV_UNREGISTER)
-		fib_rules_detach(dev);
-	else if (event == NETDEV_REGISTER)
-		fib_rules_attach(dev);
-	return NOTIFY_DONE;
+	if (((saddr ^ r->src) & r->srcmask) ||
+	    ((daddr ^ r->dst) & r->dstmask))
+		return 0;
+
+	if (r->tos && (r->tos != fl->fl4_tos))
+		return 0;
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask)
+		return 0;
+#endif
+
+	return 1;
 }
 
+static struct fib_table *fib_empty_table(void)
+{
+	u32 id;
 
-static struct notifier_block fib_rules_notifier = {
-	.notifier_call =fib_rules_event,
+	for (id = 1; id <= RT_TABLE_MAX; id++)
+		if (fib_get_table(id) == NULL)
+			return fib_new_table(id);
+	return NULL;
+}
+
+static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .type = NLA_U32 },
+	[FRA_DST]	= { .type = NLA_U32 },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FWMASK]	= { .type = NLA_U32 },
+	[FRA_FLOW]	= { .type = NLA_U32 },
+	[FRA_TABLE]	= { .type = NLA_U32 },
 };
 
-static __inline__ int inet_fill_rule(struct sk_buff *skb,
-				     struct fib_rule *r,
-				     u32 pid, u32 seq, int event,
-				     unsigned int flags)
+static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
 {
-	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	int err = -EINVAL;
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
-	rtm->rtm_family = AF_INET;
-	rtm->rtm_dst_len = r->r_dst_len;
-	rtm->rtm_src_len = r->r_src_len;
-	rtm->rtm_tos = r->r_tos;
+	if (frh->src_len > 32 || frh->dst_len > 32 ||
+	    (frh->tos & ~IPTOS_TOS_MASK))
+		goto errout;
+
+	if (rule->table == RT_TABLE_UNSPEC) {
+		if (rule->action == FR_ACT_TO_TBL) {
+			struct fib_table *table;
+
+			table = fib_empty_table();
+			if (table == NULL) {
+				err = -ENOBUFS;
+				goto errout;
+			}
+
+			rule->table = table->tb_id;
+		}
+	}
+
+	if (tb[FRA_SRC])
+		rule4->src = nla_get_u32(tb[FRA_SRC]);
+
+	if (tb[FRA_DST])
+		rule4->dst = nla_get_u32(tb[FRA_DST]);
+
 #ifdef CONFIG_IP_ROUTE_FWMARK
-	if (r->r_fwmark)
-		RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark);
-#endif
-	rtm->rtm_table = r->r_table;
-	rtm->rtm_protocol = 0;
-	rtm->rtm_scope = 0;
-	rtm->rtm_type = r->r_action;
-	rtm->rtm_flags = r->r_flags;
+	if (tb[FRA_FWMARK]) {
+		rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+		if (rule4->fwmark)
+			/* compatibility: if the mark value is non-zero all bits
+			 * are compared unless a mask is explicitly specified.
+			 */
+			rule4->fwmask = 0xFFFFFFFF;
+	}
 
-	if (r->r_dst_len)
-		RTA_PUT(skb, RTA_DST, 4, &r->r_dst);
-	if (r->r_src_len)
-		RTA_PUT(skb, RTA_SRC, 4, &r->r_src);
-	if (r->r_ifname[0])
-		RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname);
-	if (r->r_preference)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference);
-	if (r->r_srcmap)
-		RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap);
+	if (tb[FRA_FWMASK])
+		rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (r->r_tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid);
+	if (tb[FRA_FLOW])
+		rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
 #endif
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	rule4->src_len = frh->src_len;
+	rule4->srcmask = inet_make_mask(rule4->src_len);
+	rule4->dst_len = frh->dst_len;
+	rule4->dstmask = inet_make_mask(rule4->dst_len);
+	rule4->tos = frh->tos;
+
+	err = 0;
+errout:
+	return err;
 }
 
-/* callers should hold rtnl semaphore */
-
-static void rtmsg_rule(int event, struct fib_rule *r)
+static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
 {
-	int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128);
-	struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (!skb)
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS);
-	else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) {
-		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL);
-	} else {
-		netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL);
+	if (frh->src_len && (rule4->src_len != frh->src_len))
+		return 0;
+
+	if (frh->dst_len && (rule4->dst_len != frh->dst_len))
+		return 0;
+
+	if (frh->tos && (rule4->tos != frh->tos))
+		return 0;
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
+
+	if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+		return 0;
+#endif
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
+		return 0;
+#endif
+
+	if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC])))
+		return 0;
+
+	if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST])))
+		return 0;
+
+	return 1;
+}
+
+static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+
+	frh->family = AF_INET;
+	frh->dst_len = rule4->dst_len;
+	frh->src_len = rule4->src_len;
+	frh->tos = rule4->tos;
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+	if (rule4->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark);
+
+	if (rule4->fwmask || rule4->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask);
+#endif
+
+	if (rule4->dst_len)
+		NLA_PUT_U32(skb, FRA_DST, rule4->dst);
+
+	if (rule4->src_len)
+		NLA_PUT_U32(skb, FRA_SRC, rule4->src);
+
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (rule4->tclassid)
+		NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
+#endif
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return fib_rules_dump(skb, cb, AF_INET);
+}
+
+static u32 fib4_rule_default_pref(void)
+{
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&fib4_rules)) {
+		pos = fib4_rules.next;
+		if (pos->next != &fib4_rules) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
 	}
+
+	return 0;
 }
 
-int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static struct fib_rules_ops fib4_rules_ops = {
+	.family		= AF_INET,
+	.rule_size	= sizeof(struct fib4_rule),
+	.action		= fib4_rule_action,
+	.match		= fib4_rule_match,
+	.configure	= fib4_rule_configure,
+	.compare	= fib4_rule_compare,
+	.fill		= fib4_rule_fill,
+	.default_pref	= fib4_rule_default_pref,
+	.nlgroup	= RTNLGRP_IPV4_RULE,
+	.policy		= fib4_rule_policy,
+	.rules_list	= &fib4_rules,
+	.owner		= THIS_MODULE,
+};
+
+void __init fib4_rules_init(void)
 {
-	int idx = 0;
-	int s_idx = cb->args[0];
-	struct fib_rule *r;
-	struct hlist_node *node;
+	list_add_tail(&local_rule.common.list, &fib4_rules);
+	list_add_tail(&main_rule.common.list, &fib4_rules);
+	list_add_tail(&default_rule.common.list, &fib4_rules);
 
-	rcu_read_lock();
-	hlist_for_each_entry(r, node, &fib_rules, hlist) {
-		if (idx < s_idx)
-			goto next;
-		if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
-				   cb->nlh->nlmsg_seq,
-				   RTM_NEWRULE, NLM_F_MULTI) < 0)
-			break;
-next:
-		idx++;
-	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
-
-	return skb->len;
-}
-
-void __init fib_rules_init(void)
-{
-	INIT_HLIST_HEAD(&fib_rules);
-	hlist_add_head(&local_rule.hlist, &fib_rules);
-	hlist_add_after(&local_rule.hlist, &main_rule.hlist);
-	hlist_add_after(&main_rule.hlist, &default_rule.hlist);
-	register_netdevice_notifier(&fib_rules_notifier);
+	fib_rules_register(&fib4_rules_ops);
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5173800..2ead0954 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -33,7 +33,6 @@
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/init.h>
 
 #include <net/arp.h>
@@ -44,12 +43,14 @@
 #include <net/sock.h>
 #include <net/ip_fib.h>
 #include <net/ip_mp_alg.h>
+#include <net/netlink.h>
+#include <net/nexthop.h>
 
 #include "fib_lookup.h"
 
 #define FSprintk(a...)
 
-static DEFINE_RWLOCK(fib_info_lock);
+static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_hash_size;
@@ -159,7 +160,7 @@
 
 void fib_release_info(struct fib_info *fi)
 {
-	write_lock_bh(&fib_info_lock);
+	spin_lock_bh(&fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
@@ -172,7 +173,7 @@
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
-	write_unlock_bh(&fib_info_lock);
+	spin_unlock_bh(&fib_info_lock);
 }
 
 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -254,7 +255,7 @@
 	struct fib_nh *nh;
 	unsigned int hash;
 
-	read_lock(&fib_info_lock);
+	spin_lock(&fib_info_lock);
 
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
@@ -262,41 +263,41 @@
 		if (nh->nh_dev == dev &&
 		    nh->nh_gw == gw &&
 		    !(nh->nh_flags&RTNH_F_DEAD)) {
-			read_unlock(&fib_info_lock);
+			spin_unlock(&fib_info_lock);
 			return 0;
 		}
 	}
 
-	read_unlock(&fib_info_lock);
+	spin_unlock(&fib_info_lock);
 
 	return -1;
 }
 
 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
-	       int z, int tb_id,
-	       struct nlmsghdr *n, struct netlink_skb_parms *req)
+	       int dst_len, u32 tb_id, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	u32 pid = req ? req->pid : n->nlmsg_pid;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
+	int payload = sizeof(struct rtmsg) + 256;
+	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_KERNEL);
-	if (!skb)
-		return;
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
 
-	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
-			  fa->fa_type, fa->fa_scope, &key, z,
-			  fa->fa_tos,
-			  fa->fa_info, 0) < 0) {
+	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+			    fa->fa_type, fa->fa_scope, key, dst_len,
+			    fa->fa_tos, fa->fa_info, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
-	if (n->nlmsg_flags&NLM_F_ECHO)
-		atomic_inc(&skb->users);
-	netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
-	if (n->nlmsg_flags&NLM_F_ECHO)
-		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
+
+	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
+			  info->nlh, GFP_KERNEL);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
 }
 
 /* Return the first fib alias matching TOS with
@@ -342,102 +343,100 @@
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
-static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
-{
-	while (RTA_OK(attr,attrlen)) {
-		if (attr->rta_type == type)
-			return *(u32*)RTA_DATA(attr);
-		attr = RTA_NEXT(attr, attrlen);
-	}
-	return 0;
-}
-
-static int
-fib_count_nexthops(struct rtattr *rta)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
 {
 	int nhs = 0;
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
 
-	while (nhlen >= (int)sizeof(struct rtnexthop)) {
-		if ((nhlen -= nhp->rtnh_len) < 0)
-			return 0;
+	while (rtnh_ok(rtnh, remaining)) {
 		nhs++;
-		nhp = RTNH_NEXT(nhp);
-	};
-	return nhs;
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	/* leftover implies invalid nexthop configuration, discard it */
+	return remaining > 0 ? 0 : nhs;
 }
 
-static int
-fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
+static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
+		       int remaining, struct fib_config *cfg)
 {
-	struct rtnexthop *nhp = RTA_DATA(rta);
-	int nhlen = RTA_PAYLOAD(rta);
-
 	change_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		int attrlen;
+
+		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
-		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
-		nh->nh_oif = nhp->rtnh_ifindex;
-		nh->nh_weight = nhp->rtnh_hops + 1;
-		if (attrlen) {
-			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
+
+		nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+		nh->nh_oif = rtnh->rtnh_ifindex;
+		nh->nh_weight = rtnh->rtnh_hops + 1;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			nh->nh_gw = nla ? nla_get_u32(nla) : 0;
 #ifdef CONFIG_NET_CLS_ROUTE
-			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
+
 	return 0;
 }
 
 #endif
 
-int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
-		 struct fib_info *fi)
+int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	struct rtnexthop *nhp;
-	int nhlen;
+	struct rtnexthop *rtnh;
+	int remaining;
 #endif
 
-	if (rta->rta_priority &&
-	    *rta->rta_priority != fi->fib_priority)
+	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
-	if (rta->rta_oif || rta->rta_gw) {
-		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
-		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
+	if (cfg->fc_oif || cfg->fc_gw) {
+		if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
+		    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
 			return 0;
 		return 1;
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (rta->rta_mp == NULL)
+	if (cfg->fc_mp == NULL)
 		return 0;
-	nhp = RTA_DATA(rta->rta_mp);
-	nhlen = RTA_PAYLOAD(rta->rta_mp);
+
+	rtnh = cfg->fc_mp;
+	remaining = cfg->fc_mp_len;
 	
 	for_nexthops(fi) {
-		int attrlen = nhlen - sizeof(struct rtnexthop);
-		u32 gw;
+		int attrlen;
 
-		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
-		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
+
+		if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
 			return 1;
-		if (attrlen) {
-			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
-			if (gw && gw != nh->nh_gw)
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen < 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla && nla_get_u32(nla) != nh->nh_gw)
 				return 1;
 #ifdef CONFIG_NET_CLS_ROUTE
-			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
-			if (gw && gw != nh->nh_tclassid)
+			nla = nla_find(attrs, attrlen, RTA_FLOW);
+			if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 				return 1;
 #endif
 		}
-		nhp = RTNH_NEXT(nhp);
+
+		rtnh = rtnh_next(rtnh, &remaining);
 	} endfor_nexthops(fi);
 #endif
 	return 0;
@@ -488,7 +487,8 @@
 						|-> {local prefix} (terminal node)
  */
 
-static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
+static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+			struct fib_nh *nh)
 {
 	int err;
 
@@ -502,7 +502,7 @@
 		if (nh->nh_flags&RTNH_F_ONLINK) {
 			struct net_device *dev;
 
-			if (r->rtm_scope >= RT_SCOPE_LINK)
+			if (cfg->fc_scope >= RT_SCOPE_LINK)
 				return -EINVAL;
 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
 				return -EINVAL;
@@ -516,10 +516,15 @@
 			return 0;
 		}
 		{
-			struct flowi fl = { .nl_u = { .ip4_u =
-						      { .daddr = nh->nh_gw,
-							.scope = r->rtm_scope + 1 } },
-					    .oif = nh->nh_oif };
+			struct flowi fl = {
+				.nl_u = {
+					.ip4_u = {
+						.daddr = nh->nh_gw,
+						.scope = cfg->fc_scope + 1,
+					},
+				},
+				.oif = nh->nh_oif,
+			};
 
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl.fl4_scope < RT_SCOPE_LINK)
@@ -598,7 +603,7 @@
 	unsigned int old_size = fib_hash_size;
 	unsigned int i, bytes;
 
-	write_lock_bh(&fib_info_lock);
+	spin_lock_bh(&fib_info_lock);
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
 	fib_hash_size = new_size;
@@ -639,46 +644,35 @@
 	}
 	fib_info_laddrhash = new_laddrhash;
 
-	write_unlock_bh(&fib_info_lock);
+	spin_unlock_bh(&fib_info_lock);
 
 	bytes = old_size * sizeof(struct hlist_head *);
 	fib_hash_free(old_info_hash, bytes);
 	fib_hash_free(old_laddrhash, bytes);
 }
 
-struct fib_info *
-fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
-		const struct nlmsghdr *nlh, int *errp)
+struct fib_info *fib_create_info(struct fib_config *cfg)
 {
 	int err;
 	struct fib_info *fi = NULL;
 	struct fib_info *ofi;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int nhs = 1;
-#else
-	const int nhs = 1;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	u32 mp_alg = IP_MP_ALG_NONE;
-#endif
 
 	/* Fast check to catch the most weird cases */
-	if (fib_props[r->rtm_type].scope > r->rtm_scope)
+	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 		goto err_inval;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (rta->rta_mp) {
-		nhs = fib_count_nexthops(rta->rta_mp);
+	if (cfg->fc_mp) {
+		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
 		if (nhs == 0)
 			goto err_inval;
 	}
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rta->rta_mp_alg) {
-		mp_alg = *rta->rta_mp_alg;
-
-		if (mp_alg < IP_MP_ALG_NONE ||
-		    mp_alg > IP_MP_ALG_MAX)
+	if (cfg->fc_mp_alg) {
+		if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+		    cfg->fc_mp_alg > IP_MP_ALG_MAX)
 			goto err_inval;
 	}
 #endif
@@ -714,43 +708,42 @@
 		goto failure;
 	fib_info_cnt++;
 
-	fi->fib_protocol = r->rtm_protocol;
+	fi->fib_protocol = cfg->fc_protocol;
+	fi->fib_flags = cfg->fc_flags;
+	fi->fib_priority = cfg->fc_priority;
+	fi->fib_prefsrc = cfg->fc_prefsrc;
 
 	fi->fib_nhs = nhs;
 	change_nexthops(fi) {
 		nh->nh_parent = fi;
 	} endfor_nexthops(fi)
 
-	fi->fib_flags = r->rtm_flags;
-	if (rta->rta_priority)
-		fi->fib_priority = *rta->rta_priority;
-	if (rta->rta_mx) {
-		int attrlen = RTA_PAYLOAD(rta->rta_mx);
-		struct rtattr *attr = RTA_DATA(rta->rta_mx);
+	if (cfg->fc_mx) {
+		struct nlattr *nla;
+		int remaining;
 
-		while (RTA_OK(attr, attrlen)) {
-			unsigned flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > RTAX_MAX)
+		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+			int type = nla->nla_type;
+
+			if (type) {
+				if (type > RTAX_MAX)
 					goto err_inval;
-				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
+				fi->fib_metrics[type - 1] = nla_get_u32(nla);
 			}
-			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
-	if (rta->rta_prefsrc)
-		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
 
-	if (rta->rta_mp) {
+	if (cfg->fc_mp) {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
+		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+		if (err != 0)
 			goto failure;
-		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
+		if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
 			goto err_inval;
-		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
+		if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 			goto err_inval;
 #ifdef CONFIG_NET_CLS_ROUTE
-		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
+		if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 			goto err_inval;
 #endif
 #else
@@ -758,34 +751,32 @@
 #endif
 	} else {
 		struct fib_nh *nh = fi->fib_nh;
-		if (rta->rta_oif)
-			nh->nh_oif = *rta->rta_oif;
-		if (rta->rta_gw)
-			memcpy(&nh->nh_gw, rta->rta_gw, 4);
+
+		nh->nh_oif = cfg->fc_oif;
+		nh->nh_gw = cfg->fc_gw;
+		nh->nh_flags = cfg->fc_flags;
 #ifdef CONFIG_NET_CLS_ROUTE
-		if (rta->rta_flow)
-			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
+		nh->nh_tclassid = cfg->fc_flow;
 #endif
-		nh->nh_flags = r->rtm_flags;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		nh->nh_weight = 1;
 #endif
 	}
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	fi->fib_mp_alg = mp_alg;
+	fi->fib_mp_alg = cfg->fc_mp_alg;
 #endif
 
-	if (fib_props[r->rtm_type].error) {
-		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
+	if (fib_props[cfg->fc_type].error) {
+		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
 			goto err_inval;
 		goto link_it;
 	}
 
-	if (r->rtm_scope > RT_SCOPE_HOST)
+	if (cfg->fc_scope > RT_SCOPE_HOST)
 		goto err_inval;
 
-	if (r->rtm_scope == RT_SCOPE_HOST) {
+	if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
@@ -798,14 +789,14 @@
 			goto failure;
 	} else {
 		change_nexthops(fi) {
-			if ((err = fib_check_nh(r, fi, nh)) != 0)
+			if ((err = fib_check_nh(cfg, fi, nh)) != 0)
 				goto failure;
 		} endfor_nexthops(fi)
 	}
 
 	if (fi->fib_prefsrc) {
-		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
-		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
+		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+		    fi->fib_prefsrc != cfg->fc_dst)
 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
 				goto err_inval;
 	}
@@ -820,7 +811,7 @@
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock_bh(&fib_info_lock);
+	spin_lock_bh(&fib_info_lock);
 	hlist_add_head(&fi->fib_hash,
 		       &fib_info_hash[fib_info_hashfn(fi)]);
 	if (fi->fib_prefsrc) {
@@ -839,19 +830,19 @@
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
-	write_unlock_bh(&fib_info_lock);
+	spin_unlock_bh(&fib_info_lock);
 	return fi;
 
 err_inval:
 	err = -EINVAL;
 
 failure:
-        *errp = err;
         if (fi) {
 		fi->fib_dead = 1;
 		free_fib_info(fi);
 	}
-	return NULL;
+
+	return ERR_PTR(err);
 }
 
 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
@@ -937,224 +928,89 @@
 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
 }
 
-int
-fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
-	      struct fib_info *fi, unsigned int flags)
+int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+		  u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
+		  struct fib_info *fi, unsigned int flags)
 {
+	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET;
 	rtm->rtm_dst_len = dst_len;
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = tos;
 	rtm->rtm_table = tb_id;
+	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
 	rtm->rtm_scope = scope;
-	if (rtm->rtm_dst_len)
-		RTA_PUT(skb, RTA_DST, 4, dst);
 	rtm->rtm_protocol = fi->fib_protocol;
+
+	if (rtm->rtm_dst_len)
+		NLA_PUT_U32(skb, RTA_DST, dst);
+
 	if (fi->fib_priority)
-		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
+		NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
+
 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	if (fi->fib_prefsrc)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
+		NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
+
 	if (fi->fib_nhs == 1) {
 		if (fi->fib_nh->nh_gw)
-			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
+			NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
+
 		if (fi->fib_nh->nh_oif)
-			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
+			NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
 #ifdef CONFIG_NET_CLS_ROUTE
 		if (fi->fib_nh[0].nh_tclassid)
-			RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
+			NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
 	}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (fi->fib_nhs > 1) {
-		struct rtnexthop *nhp;
-		struct rtattr *mp_head;
-		if (skb_tailroom(skb) <= RTA_SPACE(0))
-			goto rtattr_failure;
-		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
+		struct rtnexthop *rtnh;
+		struct nlattr *mp;
+
+		mp = nla_nest_start(skb, RTA_MULTIPATH);
+		if (mp == NULL)
+			goto nla_put_failure;
 
 		for_nexthops(fi) {
-			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
-				goto rtattr_failure;
-			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
-			nhp->rtnh_flags = nh->nh_flags & 0xFF;
-			nhp->rtnh_hops = nh->nh_weight-1;
-			nhp->rtnh_ifindex = nh->nh_oif;
+			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+			if (rtnh == NULL)
+				goto nla_put_failure;
+
+			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+			rtnh->rtnh_hops = nh->nh_weight - 1;
+			rtnh->rtnh_ifindex = nh->nh_oif;
+
 			if (nh->nh_gw)
-				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
+				NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
 #ifdef CONFIG_NET_CLS_ROUTE
 			if (nh->nh_tclassid)
-				RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
+				NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
-			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
+			/* length of rtnetlink header + attributes */
+			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
 		} endfor_nexthops(fi);
-		mp_head->rta_type = RTA_MULTIPATH;
-		mp_head->rta_len = skb->tail - (u8*)mp_head;
+
+		nla_nest_end(skb, mp);
 	}
 #endif
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	return nlmsg_end(skb, nlh);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
-#ifndef CONFIG_IP_NOSIOCRT
-
-int
-fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
-		    struct kern_rta *rta, struct rtentry *r)
-{
-	int    plen;
-	u32    *ptr;
-
-	memset(rtm, 0, sizeof(*rtm));
-	memset(rta, 0, sizeof(*rta));
-
-	if (r->rt_dst.sa_family != AF_INET)
-		return -EAFNOSUPPORT;
-
-	/* Check mask for validity:
-	   a) it must be contiguous.
-	   b) destination must have all host bits clear.
-	   c) if application forgot to set correct family (AF_INET),
-	      reject request unless it is absolutely clear i.e.
-	      both family and mask are zero.
-	 */
-	plen = 32;
-	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
-	if (!(r->rt_flags&RTF_HOST)) {
-		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
-		if (r->rt_genmask.sa_family != AF_INET) {
-			if (mask || r->rt_genmask.sa_family)
-				return -EAFNOSUPPORT;
-		}
-		if (bad_mask(mask, *ptr))
-			return -EINVAL;
-		plen = inet_mask_len(mask);
-	}
-
-	nl->nlmsg_flags = NLM_F_REQUEST;
-	nl->nlmsg_pid = 0;
-	nl->nlmsg_seq = 0;
-	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
-	if (cmd == SIOCDELRT) {
-		nl->nlmsg_type = RTM_DELROUTE;
-		nl->nlmsg_flags = 0;
-	} else {
-		nl->nlmsg_type = RTM_NEWROUTE;
-		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
-		rtm->rtm_protocol = RTPROT_BOOT;
-	}
-
-	rtm->rtm_dst_len = plen;
-	rta->rta_dst = ptr;
-
-	if (r->rt_metric) {
-		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
-		rta->rta_priority = (u32*)&r->rt_pad3;
-	}
-	if (r->rt_flags&RTF_REJECT) {
-		rtm->rtm_scope = RT_SCOPE_HOST;
-		rtm->rtm_type = RTN_UNREACHABLE;
-		return 0;
-	}
-	rtm->rtm_scope = RT_SCOPE_NOWHERE;
-	rtm->rtm_type = RTN_UNICAST;
-
-	if (r->rt_dev) {
-		char *colon;
-		struct net_device *dev;
-		char   devname[IFNAMSIZ];
-
-		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
-			return -EFAULT;
-		devname[IFNAMSIZ-1] = 0;
-		colon = strchr(devname, ':');
-		if (colon)
-			*colon = 0;
-		dev = __dev_get_by_name(devname);
-		if (!dev)
-			return -ENODEV;
-		rta->rta_oif = &dev->ifindex;
-		if (colon) {
-			struct in_ifaddr *ifa;
-			struct in_device *in_dev = __in_dev_get_rtnl(dev);
-			if (!in_dev)
-				return -ENODEV;
-			*colon = ':';
-			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
-				if (strcmp(ifa->ifa_label, devname) == 0)
-					break;
-			if (ifa == NULL)
-				return -ENODEV;
-			rta->rta_prefsrc = &ifa->ifa_local;
-		}
-	}
-
-	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
-	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
-		rta->rta_gw = ptr;
-		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
-			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
-	}
-
-	if (cmd == SIOCDELRT)
-		return 0;
-
-	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
-		return -EINVAL;
-
-	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
-		rtm->rtm_scope = RT_SCOPE_LINK;
-
-	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
-		struct rtattr *rec;
-		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
-		if (mx == NULL)
-			return -ENOMEM;
-		rta->rta_mx = mx;
-		mx->rta_type = RTA_METRICS;
-		mx->rta_len  = RTA_LENGTH(0);
-		if (r->rt_flags&RTF_MTU) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_ADVMSS;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
-		}
-		if (r->rt_flags&RTF_WINDOW) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_WINDOW;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_window;
-		}
-		if (r->rt_flags&RTF_IRTT) {
-			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
-			rec->rta_type = RTAX_RTT;
-			rec->rta_len = RTA_LENGTH(4);
-			mx->rta_len += RTA_LENGTH(4);
-			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
-		}
-	}
-	return 0;
-}
-
-#endif
-
 /*
    Update FIB if:
    - local address disappeared -> we must delete all the entries
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 23fb9d9..9c3ff6b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1124,17 +1124,14 @@
 	return fa_head;
 }
 
-static int
-fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-	       struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *new_fa;
 	struct list_head *fa_head = NULL;
 	struct fib_info *fi;
-	int plen = r->rtm_dst_len;
-	int type = r->rtm_type;
-	u8 tos = r->rtm_tos;
+	int plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
 	u32 key, mask;
 	int err;
 	struct leaf *l;
@@ -1142,13 +1139,9 @@
 	if (plen > 32)
 		return -EINVAL;
 
-	key = 0;
-	if (rta->rta_dst)
-		memcpy(&key, rta->rta_dst, 4);
+	key = ntohl(cfg->fc_dst);
 
-	key = ntohl(key);
-
-	pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
+	pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
 
 	mask = ntohl(inet_make_mask(plen));
 
@@ -1157,10 +1150,11 @@
 
 	key = key & mask;
 
-	fi = fib_create_info(r, rta, nlhdr, &err);
-
-	if (!fi)
+	fi = fib_create_info(cfg);
+	if (IS_ERR(fi)) {
+		err = PTR_ERR(fi);
 		goto err;
+	}
 
 	l = fib_find_node(t, key);
 	fa = NULL;
@@ -1185,10 +1179,10 @@
 		struct fib_alias *fa_orig;
 
 		err = -EEXIST;
-		if (nlhdr->nlmsg_flags & NLM_F_EXCL)
+		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
-		if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
+		if (cfg->fc_nlflags & NLM_F_REPLACE) {
 			struct fib_info *fi_drop;
 			u8 state;
 
@@ -1200,8 +1194,8 @@
 			fi_drop = fa->fa_info;
 			new_fa->fa_tos = fa->fa_tos;
 			new_fa->fa_info = fi;
-			new_fa->fa_type = type;
-			new_fa->fa_scope = r->rtm_scope;
+			new_fa->fa_type = cfg->fc_type;
+			new_fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			new_fa->fa_state &= ~FA_S_ACCESSED;
 
@@ -1224,17 +1218,17 @@
 				break;
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
-			if (fa->fa_type == type &&
-			    fa->fa_scope == r->rtm_scope &&
+			if (fa->fa_type == cfg->fc_type &&
+			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi) {
 				goto out;
 			}
 		}
-		if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
+		if (!(cfg->fc_nlflags & NLM_F_APPEND))
 			fa = fa_orig;
 	}
 	err = -ENOENT;
-	if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
+	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
 	err = -ENOBUFS;
@@ -1244,8 +1238,8 @@
 
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
-	new_fa->fa_type = type;
-	new_fa->fa_scope = r->rtm_scope;
+	new_fa->fa_type = cfg->fc_type;
+	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 	/*
 	 * Insert new entry to the list.
@@ -1262,7 +1256,8 @@
 			  (fa ? &fa->fa_list : fa_head));
 
 	rt_cache_flush(-1);
-	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
+	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
+		  &cfg->fc_nlinfo);
 succeeded:
 	return 0;
 
@@ -1281,18 +1276,18 @@
 			     struct fib_result *res)
 {
 	int err, i;
-	t_key mask;
+	__be32 mask;
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
 	struct hlist_node *node;
 
 	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
 		i = li->plen;
-		mask = ntohl(inet_make_mask(i));
-		if (l->key != (key & mask))
+		mask = inet_make_mask(i);
+		if (l->key != (key & ntohl(mask)))
 			continue;
 
-		if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
+		if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
 			*plen = i;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			t->stats.semantic_match_passed++;
@@ -1548,28 +1543,21 @@
 	return 1;
 }
 
-static int
-fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
-		struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
+static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	u32 key, mask;
-	int plen = r->rtm_dst_len;
-	u8 tos = r->rtm_tos;
+	int plen = cfg->fc_dst_len;
+	u8 tos = cfg->fc_tos;
 	struct fib_alias *fa, *fa_to_delete;
 	struct list_head *fa_head;
 	struct leaf *l;
 	struct leaf_info *li;
 
-
 	if (plen > 32)
 		return -EINVAL;
 
-	key = 0;
-	if (rta->rta_dst)
-		memcpy(&key, rta->rta_dst, 4);
-
-	key = ntohl(key);
+	key = ntohl(cfg->fc_dst);
 	mask = ntohl(inet_make_mask(plen));
 
 	if (key & ~mask)
@@ -1598,13 +1586,12 @@
 		if (fa->fa_tos != tos)
 			break;
 
-		if ((!r->rtm_type ||
-		     fa->fa_type == r->rtm_type) &&
-		    (r->rtm_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == r->rtm_scope) &&
-		    (!r->rtm_protocol ||
-		     fi->fib_protocol == r->rtm_protocol) &&
-		    fib_nh_match(r, nlhdr, rta, fi) == 0) {
+		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
+		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
+		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_protocol ||
+		     fi->fib_protocol == cfg->fc_protocol) &&
+		    fib_nh_match(cfg, fi) == 0) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -1614,7 +1601,8 @@
 		return -ESRCH;
 
 	fa = fa_to_delete;
-	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
+	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
+		  &cfg->fc_nlinfo);
 
 	l = fib_find_node(t, key);
 	li = find_leaf_info(l, plen);
@@ -1848,7 +1836,7 @@
 
 	u32 xkey = htonl(key);
 
-	s_i = cb->args[3];
+	s_i = cb->args[4];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
@@ -1866,16 +1854,16 @@
 				  tb->tb_id,
 				  fa->fa_type,
 				  fa->fa_scope,
-				  &xkey,
+				  xkey,
 				  plen,
 				  fa->fa_tos,
 				  fa->fa_info, 0) < 0) {
-			cb->args[3] = i;
+			cb->args[4] = i;
 			return -1;
 		}
 		i++;
 	}
-	cb->args[3] = i;
+	cb->args[4] = i;
 	return skb->len;
 }
 
@@ -1886,14 +1874,14 @@
 	struct list_head *fa_head;
 	struct leaf *l = NULL;
 
-	s_h = cb->args[2];
+	s_h = cb->args[3];
 
 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
-			memset(&cb->args[3], 0,
-			       sizeof(cb->args) - 3*sizeof(cb->args[0]));
+			memset(&cb->args[4], 0,
+			       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 
 		fa_head = get_fa_head(l, plen);
 
@@ -1904,11 +1892,11 @@
 			continue;
 
 		if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
-			cb->args[2] = h;
+			cb->args[3] = h;
 			return -1;
 		}
 	}
-	cb->args[2] = h;
+	cb->args[3] = h;
 	return skb->len;
 }
 
@@ -1917,23 +1905,23 @@
 	int m, s_m;
 	struct trie *t = (struct trie *) tb->tb_data;
 
-	s_m = cb->args[1];
+	s_m = cb->args[2];
 
 	rcu_read_lock();
 	for (m = 0; m <= 32; m++) {
 		if (m < s_m)
 			continue;
 		if (m > s_m)
-			memset(&cb->args[2], 0,
-				sizeof(cb->args) - 2*sizeof(cb->args[0]));
+			memset(&cb->args[3], 0,
+				sizeof(cb->args) - 3*sizeof(cb->args[0]));
 
 		if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
-			cb->args[1] = m;
+			cb->args[2] = m;
 			goto out;
 		}
 	}
 	rcu_read_unlock();
-	cb->args[1] = m;
+	cb->args[2] = m;
 	return skb->len;
 out:
 	rcu_read_unlock();
@@ -1943,9 +1931,9 @@
 /* Fix more generic FIB names for init later */
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-struct fib_table * fib_hash_init(int id)
+struct fib_table * fib_hash_init(u32 id)
 #else
-struct fib_table * __init fib_hash_init(int id)
+struct fib_table * __init fib_hash_init(u32 id)
 #endif
 {
 	struct fib_table *tb;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4c86ac3..c2ad07e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -187,11 +187,11 @@
 };
 
 /* Control parameters for ECHO replies. */
-int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts = 1;
+int sysctl_icmp_echo_ignore_all __read_mostly;
+int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
 
 /* Control parameter - ignore bogus broadcast responses? */
-int sysctl_icmp_ignore_bogus_error_responses = 1;
+int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
 
 /*
  * 	Configurable global rate limit.
@@ -205,9 +205,9 @@
  *	time exceeded (11), parameter problem (12)
  */
 
-int sysctl_icmp_ratelimit = 1 * HZ;
-int sysctl_icmp_ratemask = 0x1818;
-int sysctl_icmp_errors_use_inbound_ifaddr;
+int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
+int sysctl_icmp_ratemask __read_mostly = 0x1818;
+int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
 
 /*
  *	ICMP control array. This specifies what to do with each ICMP.
@@ -406,6 +406,7 @@
 						.saddr = rt->rt_spec_dst,
 						.tos = RT_TOS(skb->nh.iph->tos) } },
 				    .proto = IPPROTO_ICMP };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
@@ -560,6 +561,7 @@
 				}
 			}
 		};
+		security_skb_classify_flow(skb_in, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			goto out_unlock;
 	}
@@ -928,7 +930,7 @@
 	ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!(u16)csum_fold(skb->csum))
 			break;
 		/* fall through */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8e8117c..58be822 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -931,7 +931,7 @@
 		goto drop;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!(u16)csum_fold(skb->csum))
 			break;
 		/* fall through */
@@ -1397,8 +1397,8 @@
 /*
  *	Join a socket to a group
  */
-int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS;
-int sysctl_igmp_max_msf = IP_MAX_MSF;
+int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
+int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
 
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e50a1bf..0720439 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -327,6 +327,7 @@
 				       { .sport = inet_sk(sk)->sport,
 					 .dport = ireq->rmt_port } } };
 
+	security_req_classify_flow(req, &fl);
 	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
@@ -509,6 +510,8 @@
 
 		/* Deinitialize accept_queue to trap illegal accesses. */
 		memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+
+		security_inet_csk_clone(newsk, req);
 	}
 	return newsk;
 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 95fac55..fb296c9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -124,8 +124,10 @@
  * remote address for the connection. So always assume those are both
  * wildcarded during the search since they can never be otherwise.
  */
-struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-				    const unsigned short hnum, const int dif)
+static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+					      const u32 daddr,
+					      const unsigned short hnum,
+					      const int dif)
 {
 	struct sock *result = NULL, *sk;
 	const struct hlist_node *node;
@@ -159,6 +161,33 @@
 	return result;
 }
 
+/* Optimize the common listener case. */
+struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+				    const u32 daddr, const unsigned short hnum,
+				    const int dif)
+{
+	struct sock *sk = NULL;
+	const struct hlist_head *head;
+
+	read_lock(&hashinfo->lhash_lock);
+	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+	if (!hlist_empty(head)) {
+		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+
+		if (inet->num == hnum && !sk->sk_node.next &&
+		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
+		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+		    !sk->sk_bound_dev_if)
+			goto sherry_cache;
+		sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
+	}
+	if (sk) {
+sherry_cache:
+		sock_hold(sk);
+	}
+	read_unlock(&hashinfo->lhash_lock);
+	return sk;
+}
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
 
 /* called with local bh disabled */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 03ff62e..a675602 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -126,12 +126,9 @@
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN,
+			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 			NULL, NULL);
 
-	if (!peer_cachep)
-		panic("cannot create inet_peer_cache");
-
 	/* All the timers, started at system startup tend
 	   to synchronize. Perturb it a bit.
 	 */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b84b53a..165d728 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,15 +54,15 @@
  * even the most extreme cases without allowing an attacker to measurably
  * harm machine performance.
  */
-int sysctl_ipfrag_high_thresh = 256*1024;
-int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
 
-int sysctl_ipfrag_max_dist = 64;
+int sysctl_ipfrag_max_dist __read_mostly = 64;
 
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
-int sysctl_ipfrag_time = IP_FRAG_TIME;
+int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
 
 struct ipfrag_skb_cb
 {
@@ -130,7 +130,7 @@
 }
 
 static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ipfrag_secret_rebuild(unsigned long dummy)
 {
@@ -665,7 +665,7 @@
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &ip_frag_mem);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f9b3a3..f5fba05 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -393,7 +393,8 @@
 	int code = skb->h.icmph->code;
 	int rel_type = 0;
 	int rel_code = 0;
-	int rel_info = 0;
+	__be32 rel_info = 0;
+	__u32 n = 0;
 	u16 flags;
 	int grehlen = (iph->ihl<<2) + 4;
 	struct sk_buff *skb2;
@@ -422,14 +423,16 @@
 	default:
 		return;
 	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < (iph->ihl<<2))
+		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		if (n < (iph->ihl<<2))
 			return;
 
 		/* So... This guy found something strange INSIDE encapsulated
 		   packet. Well, he is fool, but what can we do ?
 		 */
 		rel_type = ICMP_PARAMETERPROB;
-		rel_info = skb->h.icmph->un.gateway - grehlen;
+		n -= grehlen;
+		rel_info = htonl(n << 24);
 		break;
 
 	case ICMP_DEST_UNREACH:
@@ -440,13 +443,14 @@
 			return;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
-			if (rel_info < grehlen+68)
+			n = ntohs(skb->h.icmph->un.frag.mtu);
+			if (n < grehlen+68)
 				return;
-			rel_info -= grehlen;
+			n -= grehlen;
 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
-			if (rel_info > ntohs(eiph->tot_len))
+			if (n > ntohs(eiph->tot_len))
 				return;
+			rel_info = htonl(n);
 			break;
 		default:
 			/* All others are translated to HOST_UNREACH.
@@ -508,12 +512,11 @@
 
 	/* change mtu on this route */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (rel_info > dst_mtu(skb2->dst)) {
+		if (n > dst_mtu(skb2->dst)) {
 			kfree_skb(skb2);
 			return;
 		}
-		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
-		rel_info = htonl(rel_info);
+		skb2->dst->ops->update_pmtu(skb2->dst, n);
 	} else if (type == ICMP_TIME_EXCEEDED) {
 		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
@@ -576,7 +579,7 @@
 
 		if (flags&GRE_CSUM) {
 			switch (skb->ip_summed) {
-			case CHECKSUM_HW:
+			case CHECKSUM_COMPLETE:
 				csum = (u16)csum_fold(skb->csum);
 				if (!csum)
 					break;
@@ -584,7 +587,7 @@
 			case CHECKSUM_NONE:
 				skb->csum = 0;
 				csum = __skb_checksum_complete(skb);
-				skb->ip_summed = CHECKSUM_HW;
+				skb->ip_summed = CHECKSUM_COMPLETE;
 			}
 			offset += 4;
 		}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 406056e..e7437c0 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -24,6 +24,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/route.h>
+#include <net/cipso_ipv4.h>
 
 /* 
  * Write options to IP header, record destination address to
@@ -194,6 +195,13 @@
 			dopt->is_strictroute = sopt->is_strictroute;
 		}
 	}
+	if (sopt->cipso) {
+		optlen  = sptr[sopt->cipso+1];
+		dopt->cipso = dopt->optlen+sizeof(struct iphdr);
+		memcpy(dptr, sptr+sopt->cipso, optlen);
+		dptr += optlen;
+		dopt->optlen += optlen;
+	}
 	while (dopt->optlen & 3) {
 		*dptr++ = IPOPT_END;
 		dopt->optlen++;
@@ -434,6 +442,17 @@
 			if (optptr[2] == 0 && optptr[3] == 0)
 				opt->router_alert = optptr - iph;
 			break;
+		      case IPOPT_CIPSO:
+		        if (opt->cipso) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			opt->cipso = optptr - iph;
+		        if (cipso_v4_validate(&optptr)) {
+				pp_ptr = optptr;
+				goto error;
+			}
+			break;
 		      case IPOPT_SEC:
 		      case IPOPT_SID:
 		      default:
@@ -506,7 +525,6 @@
 		opt->__data[optlen++] = IPOPT_END;
 	opt->optlen = optlen;
 	opt->is_data = 1;
-	opt->is_setbyuser = 1;
 	if (optlen && ip_options_compile(opt, NULL)) {
 		kfree(opt);
 		return -EINVAL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2ede16..97aee76 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,7 +83,7 @@
 #include <linux/netlink.h>
 #include <linux/tcp.h>
 
-int sysctl_ip_default_ttl = IPDEFTTL;
+int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
 
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
@@ -328,6 +328,7 @@
 			 * keep trying until route appears or the connection times
 			 * itself out.
 			 */
+			security_sk_classify_flow(sk, &fl);
 			if (ip_route_output_flow(&rt, &fl, sk, 0))
 				goto no_route;
 		}
@@ -425,7 +426,7 @@
 	int ptr;
 	struct net_device *dev;
 	struct sk_buff *skb2;
-	unsigned int mtu, hlen, left, len, ll_rs;
+	unsigned int mtu, hlen, left, len, ll_rs, pad;
 	int offset;
 	__be16 not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
@@ -555,14 +556,13 @@
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = raw + hlen;		/* Where to start from */
 
-#ifdef CONFIG_BRIDGE_NETFILTER
 	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
-	 * we need to make room for the encapsulating header */
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb));
-	mtu -= nf_bridge_pad(skb);
-#else
-	ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev);
-#endif
+	 * we need to make room for the encapsulating header
+	 */
+	pad = nf_bridge_pad(skb);
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+	mtu -= pad;
+
 	/*
 	 *	Fragment the datagram.
 	 */
@@ -679,7 +679,7 @@
 {
 	struct iovec *iov = from;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
 			return -EFAULT;
 	} else {
@@ -735,7 +735,7 @@
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 		sk->sk_sndmsg_off = 0;
 	}
@@ -843,7 +843,7 @@
 	    length + fragheaderlen <= mtu &&
 	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
 	    !exthdrlen)
-		csummode = CHECKSUM_HW;
+		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
@@ -1366,6 +1366,7 @@
 					       { .sport = skb->h.th->dest,
 					         .dport = skb->h.th->source } },
 				    .proto = sk->sk_protocol };
+		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
 			return;
 	}
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index a0c28b2..1734243 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -32,7 +32,7 @@
 
 struct ipcomp_tfms {
 	struct list_head list;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int users;
 };
 
@@ -46,7 +46,7 @@
 	int err, plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 	
 	plen = skb->len;
@@ -107,7 +107,7 @@
 	struct iphdr *iph = skb->nh.iph;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 	
 	ihlen = iph->ihl * 4;
@@ -176,7 +176,7 @@
 	return 0;
 
 out_ok:
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		ip_send_check(iph);
 	return 0;
 }
@@ -216,7 +216,7 @@
 	t->id.daddr.a4 = x->id.daddr.a4;
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET;
-	t->props.mode = 1;
+	t->props.mode = XFRM_MODE_TUNNEL;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
 
@@ -302,7 +302,7 @@
 	return scratches;
 }
 
-static void ipcomp_free_tfms(struct crypto_tfm **tfms)
+static void ipcomp_free_tfms(struct crypto_comp **tfms)
 {
 	struct ipcomp_tfms *pos;
 	int cpu;
@@ -324,28 +324,28 @@
 		return;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_tfm(tfm);
+		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+		crypto_free_comp(tfm);
 	}
 	free_percpu(tfms);
 }
 
-static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
+static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
 {
 	struct ipcomp_tfms *pos;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int cpu;
 
 	/* This can be any valid CPU ID so we don't need locking. */
 	cpu = raw_smp_processor_id();
 
 	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
-		struct crypto_tfm *tfm;
+		struct crypto_comp *tfm;
 
 		tfms = pos->tfms;
 		tfm = *per_cpu_ptr(tfms, cpu);
 
-		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;
 			return tfms;
 		}
@@ -359,12 +359,13 @@
 	INIT_LIST_HEAD(&pos->list);
 	list_add(&pos->list, &ipcomp_tfms_list);
 
-	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
 	if (!tfms)
 		goto error;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+							    CRYPTO_ALG_ASYNC);
 		if (!tfm)
 			goto error;
 		*per_cpu_ptr(tfms, cpu) = tfm;
@@ -415,7 +416,7 @@
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
 
 	mutex_lock(&ipcomp_resource_mutex);
@@ -427,7 +428,7 @@
 		goto error;
 	mutex_unlock(&ipcomp_resource_mutex);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cb8a92f..1fbb384 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -31,7 +31,6 @@
  *              --  Josef Siemes <jsiemes@web.de>, Aug 2002
  */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 76ab50b..0c45565 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -341,7 +341,8 @@
 	int code = skb->h.icmph->code;
 	int rel_type = 0;
 	int rel_code = 0;
-	int rel_info = 0;
+	__be32 rel_info = 0;
+	__u32 n = 0;
 	struct sk_buff *skb2;
 	struct flowi fl;
 	struct rtable *rt;
@@ -354,14 +355,15 @@
 	default:
 		return 0;
 	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < hlen)
+		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		if (n < hlen)
 			return 0;
 
 		/* So... This guy found something strange INSIDE encapsulated
 		   packet. Well, he is fool, but what can we do ?
 		 */
 		rel_type = ICMP_PARAMETERPROB;
-		rel_info = skb->h.icmph->un.gateway - hlen;
+		rel_info = htonl((n - hlen) << 24);
 		break;
 
 	case ICMP_DEST_UNREACH:
@@ -372,13 +374,14 @@
 			return 0;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
-			if (rel_info < hlen+68)
+			n = ntohs(skb->h.icmph->un.frag.mtu);
+			if (n < hlen+68)
 				return 0;
-			rel_info -= hlen;
+			n -= hlen;
 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
-			if (rel_info > ntohs(eiph->tot_len))
+			if (n > ntohs(eiph->tot_len))
 				return 0;
+			rel_info = htonl(n);
 			break;
 		default:
 			/* All others are translated to HOST_UNREACH.
@@ -440,12 +443,11 @@
 
 	/* change mtu on this route */
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		if (rel_info > dst_mtu(skb2->dst)) {
+		if (n > dst_mtu(skb2->dst)) {
 			kfree_skb(skb2);
 			return 0;
 		}
-		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
-		rel_info = htonl(rel_info);
+		skb2->dst->ops->update_pmtu(skb2->dst, n);
 	} else if (type == ICMP_TIME_EXCEEDED) {
 		struct ip_tunnel *t = netdev_priv(skb2->dev);
 		if (t->parms.iph.ttl) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 85893ee..ba49588 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -312,7 +312,8 @@
 			e = NLMSG_DATA(nlh);
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
-			netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+			rtnl_unicast(skb, NETLINK_CB(skb).pid);
 		} else
 			kfree_skb(skb);
 	}
@@ -512,7 +513,6 @@
 
 	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (skb->nh.iph->version == 0) {
-			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -525,7 +525,8 @@
 				e->error = -EMSGSIZE;
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
-			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
+
+			rtnl_unicast(skb, NETLINK_CB(skb).pid);
 		} else
 			ip_mr_forward(skb, c, 0);
 	}
@@ -1899,11 +1900,8 @@
 {
 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
 				       sizeof(struct mfc_cache),
-				       0, SLAB_HWCACHE_ALIGN,
+				       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 				       NULL, NULL);
-	if (!mrt_cachep)
-		panic("cannot allocate ip_mrt_cache");
-
 	init_timer(&ipmr_expire_timer);
 	ipmr_expire_timer.function=ipmr_expire_process;
 	register_netdevice_notifier(&ip_mr_notifier);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index bc28b11..820e831 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -151,7 +151,7 @@
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -204,7 +204,7 @@
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -229,7 +229,7 @@
 	switch (skb->ip_summed) {
 	case CHECKSUM_NONE:
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
 				      skb->len - tcphoff,
 				      skb->nh.iph->protocol, skb->csum)) {
@@ -239,7 +239,7 @@
 		}
 		break;
 	default:
-		/* CHECKSUM_UNNECESSARY */
+		/* No need to checksum. */
 		break;
 	}
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 89d9175..90c8166 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -161,7 +161,7 @@
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -216,7 +216,7 @@
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_HW)
+		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
 			(*pskb)->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -250,7 +250,7 @@
 		case CHECKSUM_NONE:
 			skb->csum = skb_checksum(skb, udphoff,
 						 skb->len - udphoff, 0);
-		case CHECKSUM_HW:
+		case CHECKSUM_COMPLETE:
 			if (csum_tcpudp_magic(skb->nh.iph->saddr,
 					      skb->nh.iph->daddr,
 					      skb->len - udphoff,
@@ -262,7 +262,7 @@
 			}
 			break;
 		default:
-			/* CHECKSUM_UNNECESSARY */
+			/* No need to checksum. */
 			break;
 		}
 	}
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6a9e34b..f88347d 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -168,7 +168,7 @@
 	unsigned int csum = 0;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN)
 			break;
 		if ((protocol == 0 && !(u16)csum_fold(skb->csum)) ||
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef0b5aa..a55b8ff 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -278,17 +278,6 @@
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_MATCH_DSCP
-	tristate "DSCP match support"
-	depends on IP_NF_IPTABLES
-	help
-	  This option adds a `DSCP' match, which allows you to match against
-	  the IPv4 header DSCP field (DSCP codepoint).
-
-	  The DSCP codepoint can have any value between 0x0 and 0x4f.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_MATCH_AH
 	tristate "AH match support"
 	depends on IP_NF_IPTABLES
@@ -568,17 +557,6 @@
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_TARGET_DSCP
-	tristate "DSCP target support"
-	depends on IP_NF_MANGLE
-	help
-	  This option adds a `DSCP' match, which allows you to match against
-	  the IPv4 header DSCP field (DSCP codepoint).
-
-	  The DSCP codepoint can have any value between 0x0 and 0x4f.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_TARGET_TTL
 	tristate  'TTL target support'
 	depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3ded4a3..09aaed1 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -59,7 +59,6 @@
 obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
@@ -68,7 +67,6 @@
 obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
 obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8d1d7a6..85f0d73 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -56,8 +56,6 @@
 #define ARP_NF_ASSERT(x)
 #endif
 
-#include <linux/netfilter_ipv4/listhelp.h>
-
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      char *hdr_addr, int len)
 {
@@ -208,8 +206,7 @@
 			       const struct net_device *out,
 			       unsigned int hooknum,
 			       const struct xt_target *target,
-			       const void *targinfo,
-			       void *userinfo)
+			       const void *targinfo)
 {
 	if (net_ratelimit())
 		printk("arp_tables: error: '%s'\n", (char *)targinfo);
@@ -226,8 +223,7 @@
 			   unsigned int hook,
 			   const struct net_device *in,
 			   const struct net_device *out,
-			   struct arpt_table *table,
-			   void *userdata)
+			   struct arpt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ];
 	unsigned int verdict = NF_DROP;
@@ -302,8 +298,7 @@
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
-								     t->data,
-								     userdata);
+								     t->data);
 
 				/* Target might have changed stuff. */
 				arp = (*pskb)->nh.arph;
@@ -490,12 +485,10 @@
 	if (t->u.kernel.target == &arpt_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
-			goto out;
+			goto err;
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
 						      e->comefrom)) {
 		duprintf("arp_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -562,8 +555,7 @@
 
 	t = arpt_get_target(e);
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-					    t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index a58325c..d12b1df 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -11,7 +11,7 @@
 target(struct sk_buff **pskb,
        const struct net_device *in, const struct net_device *out,
        unsigned int hooknum, const struct xt_target *target,
-       const void *targinfo, void *userinfo)
+       const void *targinfo)
 {
 	const struct arpt_mangle *mangle = targinfo;
 	struct arphdr *arp;
@@ -67,7 +67,7 @@
 
 static int
 checkentry(const char *tablename, const void *e, const struct xt_target *target,
-           void *targinfo, unsigned int targinfosize, unsigned int hook_mask)
+           void *targinfo, unsigned int hook_mask)
 {
 	const struct arpt_mangle *mangle = targinfo;
 
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index d7c472f..7edea2a 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -155,7 +155,7 @@
 			      const struct net_device *out,
 			      int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return arpt_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index aa45917..c432b31 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -47,7 +47,6 @@
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #define IP_CONNTRACK_VERSION	"2.4"
 
@@ -64,17 +63,17 @@
 
 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
 LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
+struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
 static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size = 0;
-int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
+unsigned int ip_conntrack_htable_size __read_mostly = 0;
+int ip_conntrack_max __read_mostly;
+struct list_head *ip_conntrack_hash __read_mostly;
 static kmem_cache_t *ip_conntrack_cachep __read_mostly;
 static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
 struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid;
+unsigned int ip_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc;
+static int ip_conntrack_vmalloc __read_mostly;
 
 static unsigned int ip_conntrack_next_id;
 static unsigned int ip_conntrack_expect_next_id;
@@ -294,15 +293,10 @@
 static void
 clean_from_lists(struct ip_conntrack *ct)
 {
-	unsigned int ho, hr;
-	
 	DEBUGP("clean_from_lists(%p)\n", ct);
 	ASSERT_WRITE_LOCK(&ip_conntrack_lock);
-
-	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
 
 	/* Destroy all pending expectations */
 	ip_ct_remove_expectations(ct);
@@ -313,6 +307,7 @@
 {
 	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
 	struct ip_conntrack_protocol *proto;
+	struct ip_conntrack_helper *helper;
 
 	DEBUGP("destroy_conntrack(%p)\n", ct);
 	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -321,6 +316,10 @@
 	ip_conntrack_event(IPCT_DESTROY, ct);
 	set_bit(IPS_DYING_BIT, &ct->status);
 
+	helper = ct->helper;
+	if (helper && helper->destroy)
+		helper->destroy(ct);
+
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to ip_conntrack_lock!!! -HW */
@@ -367,16 +366,6 @@
 	ip_conntrack_put(ct);
 }
 
-static inline int
-conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
-		    const struct ip_conntrack_tuple *tuple,
-		    const struct ip_conntrack *ignored_conntrack)
-{
-	ASSERT_READ_LOCK(&ip_conntrack_lock);
-	return tuplehash_to_ctrack(i) != ignored_conntrack
-		&& ip_ct_tuple_equal(tuple, &i->tuple);
-}
-
 struct ip_conntrack_tuple_hash *
 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
 		    const struct ip_conntrack *ignored_conntrack)
@@ -386,7 +375,8 @@
 
 	ASSERT_READ_LOCK(&ip_conntrack_lock);
 	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
-		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+		if (tuplehash_to_ctrack(h) != ignored_conntrack &&
+		    ip_ct_tuple_equal(tuple, &h->tuple)) {
 			CONNTRACK_STAT_INC(found);
 			return h;
 		}
@@ -417,10 +407,10 @@
 					unsigned int repl_hash) 
 {
 	ct->id = ++ip_conntrack_next_id;
-	list_prepend(&ip_conntrack_hash[hash],
-		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&ip_conntrack_hash[repl_hash],
-		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+		 &ip_conntrack_hash[hash]);
+	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+		 &ip_conntrack_hash[repl_hash]);
 }
 
 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
@@ -440,6 +430,7 @@
 __ip_conntrack_confirm(struct sk_buff **pskb)
 {
 	unsigned int hash, repl_hash;
+	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
 
@@ -470,43 +461,43 @@
 	/* See if there's one in the list already, including reverse:
            NAT could have grabbed it without realizing, since we're
            not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&ip_conntrack_hash[hash],
-		       conntrack_tuple_cmp,
-		       struct ip_conntrack_tuple_hash *,
-		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
-			  conntrack_tuple_cmp,
-			  struct ip_conntrack_tuple_hash *,
-			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
-		/* Remove from unconfirmed list */
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_for_each_entry(h, &ip_conntrack_hash[hash], list)
+		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      &h->tuple))
+			goto out;
+	list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
+		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				      &h->tuple))
+			goto out;
 
-		__ip_conntrack_hash_insert(ct, hash, repl_hash);
-		/* Timer relative to confirmation time, not original
-		   setting time, otherwise we'd get timer wrap in
-		   weird delay cases. */
-		ct->timeout.expires += jiffies;
-		add_timer(&ct->timeout);
-		atomic_inc(&ct->ct_general.use);
-		set_bit(IPS_CONFIRMED_BIT, &ct->status);
-		CONNTRACK_STAT_INC(insert);
-		write_unlock_bh(&ip_conntrack_lock);
-		if (ct->helper)
-			ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+	/* Remove from unconfirmed list */
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+	__ip_conntrack_hash_insert(ct, hash, repl_hash);
+	/* Timer relative to confirmation time, not original
+	   setting time, otherwise we'd get timer wrap in
+	   weird delay cases. */
+	ct->timeout.expires += jiffies;
+	add_timer(&ct->timeout);
+	atomic_inc(&ct->ct_general.use);
+	set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	CONNTRACK_STAT_INC(insert);
+	write_unlock_bh(&ip_conntrack_lock);
+	if (ct->helper)
+		ip_conntrack_event_cache(IPCT_HELPER, *pskb);
 #ifdef CONFIG_IP_NF_NAT_NEEDED
-		if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-		    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-			ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+		ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
 #endif
-		ip_conntrack_event_cache(master_ct(ct) ?
-					 IPCT_RELATED : IPCT_NEW, *pskb);
+	ip_conntrack_event_cache(master_ct(ct) ?
+				 IPCT_RELATED : IPCT_NEW, *pskb);
 
-		return NF_ACCEPT;
-	}
+	return NF_ACCEPT;
 
+out:
 	CONNTRACK_STAT_INC(insert_failed);
 	write_unlock_bh(&ip_conntrack_lock);
-
 	return NF_DROP;
 }
 
@@ -527,23 +518,21 @@
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
-{
-	return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
-}
-
 static int early_drop(struct list_head *chain)
 {
 	/* Traverse backwards: gives us oldest, which is roughly LRU */
 	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct = NULL;
+	struct ip_conntrack *ct = NULL, *tmp;
 	int dropped = 0;
 
 	read_lock_bh(&ip_conntrack_lock);
-	h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
-	if (h) {
-		ct = tuplehash_to_ctrack(h);
-		atomic_inc(&ct->ct_general.use);
+	list_for_each_entry_reverse(h, chain, list) {
+		tmp = tuplehash_to_ctrack(h);
+		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+			ct = tmp;
+			atomic_inc(&ct->ct_general.use);
+			break;
+		}
 	}
 	read_unlock_bh(&ip_conntrack_lock);
 
@@ -559,18 +548,16 @@
 	return dropped;
 }
 
-static inline int helper_cmp(const struct ip_conntrack_helper *i,
-			     const struct ip_conntrack_tuple *rtuple)
-{
-	return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
 static struct ip_conntrack_helper *
 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
-			 struct ip_conntrack_helper *,
-			 tuple);
+	struct ip_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+			return h;
+	}
+	return NULL;
 }
 
 struct ip_conntrack_helper *
@@ -640,11 +627,15 @@
 		ip_conntrack_hash_rnd_initted = 1;
 	}
 
+	/* We don't want any race condition at early drop stage */
+	atomic_inc(&ip_conntrack_count);
+
 	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
+	    && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
 		/* Try dropping from this hash chain. */
 		if (!early_drop(&ip_conntrack_hash[hash])) {
+			atomic_dec(&ip_conntrack_count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "ip_conntrack: table full, dropping"
@@ -656,6 +647,7 @@
 	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
 	if (!conntrack) {
 		DEBUGP("Can't allocate conntrack.\n");
+		atomic_dec(&ip_conntrack_count);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -669,8 +661,6 @@
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
 
-	atomic_inc(&ip_conntrack_count);
-
 	return conntrack;
 }
 
@@ -1062,7 +1052,7 @@
 {
 	BUG_ON(me->timeout == 0);
 	write_lock_bh(&ip_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_add(&me->list, &helpers);
 	write_unlock_bh(&ip_conntrack_lock);
 
 	return 0;
@@ -1081,24 +1071,24 @@
 	return NULL;
 }
 
-static inline int unhelp(struct ip_conntrack_tuple_hash *i,
-			 const struct ip_conntrack_helper *me)
+static inline void unhelp(struct ip_conntrack_tuple_hash *i,
+			  const struct ip_conntrack_helper *me)
 {
 	if (tuplehash_to_ctrack(i)->helper == me) {
  		ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
 		tuplehash_to_ctrack(i)->helper = NULL;
 	}
-	return 0;
 }
 
 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
 {
 	unsigned int i;
+	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack_expect *exp, *tmp;
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&ip_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	list_del(&me->list);
 
 	/* Get rid of expectations */
 	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
@@ -1108,10 +1098,12 @@
 		}
 	}
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
-	for (i = 0; i < ip_conntrack_htable_size; i++)
-		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
-			    struct ip_conntrack_tuple_hash *, me);
+	list_for_each_entry(h, &unconfirmed, list)
+		unhelp(h, me);
+	for (i = 0; i < ip_conntrack_htable_size; i++) {
+		list_for_each_entry(h, &ip_conntrack_hash[i], list)
+			unhelp(h, me);
+	}
 	write_unlock_bh(&ip_conntrack_lock);
 
 	/* Someone could be still looking at the helper in a bh. */
@@ -1237,46 +1229,43 @@
 	nf_conntrack_get(nskb->nfct);
 }
 
-static inline int
-do_iter(const struct ip_conntrack_tuple_hash *i,
-	int (*iter)(struct ip_conntrack *i, void *data),
-	void *data)
-{
-	return iter(tuplehash_to_ctrack(i), data);
-}
-
 /* Bring out ya dead! */
-static struct ip_conntrack_tuple_hash *
+static struct ip_conntrack *
 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
 		void *data, unsigned int *bucket)
 {
-	struct ip_conntrack_tuple_hash *h = NULL;
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct;
 
 	write_lock_bh(&ip_conntrack_lock);
 	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
-				struct ip_conntrack_tuple_hash *, iter, data);
-		if (h)
-			break;
+		list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
+			ct = tuplehash_to_ctrack(h);
+			if (iter(ct, data))
+				goto found;
+		}
 	}
-	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
-				struct ip_conntrack_tuple_hash *, iter, data);
-	if (h)
-		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
+	list_for_each_entry(h, &unconfirmed, list) {
+		ct = tuplehash_to_ctrack(h);
+		if (iter(ct, data))
+			goto found;
+	}
 	write_unlock_bh(&ip_conntrack_lock);
+	return NULL;
 
-	return h;
+found:
+	atomic_inc(&ct->ct_general.use);
+	write_unlock_bh(&ip_conntrack_lock);
+	return ct;
 }
 
 void
 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
 {
-	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ct;
 	unsigned int bucket = 0;
 
-	while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
-		struct ip_conntrack *ct = tuplehash_to_ctrack(h);
+	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
 			death_by_timeout((unsigned long)ct);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index b020a33..fb0aee6 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -20,11 +20,11 @@
  * 	 - We can only support one single call within each session
  *
  * TODO:
- *	 - testing of incoming PPTP calls 
+ *	 - testing of incoming PPTP calls
  *
- * Changes: 
+ * Changes:
  * 	2002-02-05 - Version 1.3
- * 	  - Call ip_conntrack_unexpect_related() from 
+ * 	  - Call ip_conntrack_unexpect_related() from
  * 	    pptp_destroy_siblings() to destroy expectations in case
  * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
  * 	    (Philip Craig <philipc@snapgear.com>)
@@ -80,7 +80,7 @@
 			  struct PptpControlHeader *ctlh,
 			  union pptp_ctrl_union *pptpReq);
 
-int
+void
 (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
 			    struct ip_conntrack_expect *expect_reply);
 
@@ -141,7 +141,7 @@
 		invert_tuplepr(&inv_t, &exp->tuple);
 		DEBUGP("trying to unexpect other dir: ");
 		DUMP_TUPLE(&inv_t);
-	
+
 		exp_other = ip_conntrack_expect_find(&inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
@@ -194,15 +194,16 @@
 {
 	struct ip_conntrack_tuple t;
 
-	/* Since ct->sibling_list has literally rusted away in 2.6.11, 
+	ip_ct_gre_keymap_destroy(ct);
+	/* Since ct->sibling_list has literally rusted away in 2.6.11,
 	 * we now need another way to find out about our sibling
 	 * contrack and expects... -HW */
 
 	/* try original (pns->pac) tuple */
 	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
 	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
-	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+	t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
+	t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
 
 	if (!destroy_sibling_or_exp(&t))
 		DEBUGP("failed to timeout original pns->pac ct/exp\n");
@@ -210,8 +211,8 @@
 	/* try reply (pac->pns) tuple */
 	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
 	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
-	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+	t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
+	t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
 
 	if (!destroy_sibling_or_exp(&t))
 		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
@@ -219,94 +220,63 @@
 
 /* expect GRE connections (PNS->PAC and PAC->PNS direction) */
 static inline int
-exp_gre(struct ip_conntrack *master,
-	u_int32_t seq,
+exp_gre(struct ip_conntrack *ct,
 	__be16 callid,
 	__be16 peer_callid)
 {
-	struct ip_conntrack_tuple inv_tuple;
-	struct ip_conntrack_tuple exp_tuples[] = {
-		/* tuple in original direction, PNS->PAC */
-		{ .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
-			   .u = { .gre = { .key = peer_callid } }
-			 },
-		  .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
-			   .u = { .gre = { .key = callid } },
-			   .protonum = IPPROTO_GRE
-			 },
-		 },
-		/* tuple in reply direction, PAC->PNS */
-		{ .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
-			   .u = { .gre = { .key = callid } }
-			 },
-		  .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
-			   .u = { .gre = { .key = peer_callid } },
-			   .protonum = IPPROTO_GRE
-			 },
-		 }
-	};
 	struct ip_conntrack_expect *exp_orig, *exp_reply;
 	int ret = 1;
 
-	exp_orig = ip_conntrack_expect_alloc(master);
+	exp_orig = ip_conntrack_expect_alloc(ct);
 	if (exp_orig == NULL)
 		goto out;
 
-	exp_reply = ip_conntrack_expect_alloc(master);
+	exp_reply = ip_conntrack_expect_alloc(ct);
 	if (exp_reply == NULL)
 		goto out_put_orig;
 
-	memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+	/* original direction, PNS->PAC */
+	exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+	exp_orig->tuple.src.u.gre.key = peer_callid;
+	exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+	exp_orig->tuple.dst.u.gre.key = callid;
+	exp_orig->tuple.dst.protonum = IPPROTO_GRE;
 
 	exp_orig->mask.src.ip = 0xffffffff;
 	exp_orig->mask.src.u.all = 0;
-	exp_orig->mask.dst.u.all = 0;
 	exp_orig->mask.dst.u.gre.key = htons(0xffff);
 	exp_orig->mask.dst.ip = 0xffffffff;
 	exp_orig->mask.dst.protonum = 0xff;
-		
-	exp_orig->master = master;
+
+	exp_orig->master = ct;
 	exp_orig->expectfn = pptp_expectfn;
 	exp_orig->flags = 0;
 
 	/* both expectations are identical apart from tuple */
 	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
-	memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
+
+	/* reply direction, PAC->PNS */
+	exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+	exp_reply->tuple.src.u.gre.key = callid;
+	exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+	exp_reply->tuple.dst.u.gre.key = peer_callid;
+	exp_reply->tuple.dst.protonum = IPPROTO_GRE;
 
 	if (ip_nat_pptp_hook_exp_gre)
-		ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
-	else {
+		ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+	if (ip_conntrack_expect_related(exp_orig) != 0)
+		goto out_put_both;
+	if (ip_conntrack_expect_related(exp_reply) != 0)
+		goto out_unexpect_orig;
 
-		DEBUGP("calling expect_related PNS->PAC");
-		DUMP_TUPLE(&exp_orig->tuple);
-
-		if (ip_conntrack_expect_related(exp_orig) != 0) {
-			DEBUGP("cannot expect_related()\n");
-			goto out_put_both;
-		}
-
-		DEBUGP("calling expect_related PAC->PNS");
-		DUMP_TUPLE(&exp_reply->tuple);
-
-		if (ip_conntrack_expect_related(exp_reply) != 0) {
-			DEBUGP("cannot expect_related()\n");
-			goto out_unexpect_orig;
-		}
-
-		/* Add GRE keymap entries */
-		if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
-			DEBUGP("cannot keymap_add() exp\n");
-			goto out_unexpect_both;
-		}
-
-		invert_tuplepr(&inv_tuple, &exp_reply->tuple);
-		if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
-			ip_ct_gre_keymap_destroy(master);
-			DEBUGP("cannot keymap_add() exp_inv\n");
-			goto out_unexpect_both;
-		}
-		ret = 0;
+	/* Add GRE keymap entries */
+	if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
+		goto out_unexpect_both;
+	if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
+		ip_ct_gre_keymap_destroy(ct);
+		goto out_unexpect_both;
 	}
+	ret = 0;
 
 out_put_both:
 	ip_conntrack_expect_put(exp_reply);
@@ -322,73 +292,36 @@
 	goto out_put_both;
 }
 
-static inline int 
+static inline int
 pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct tcphdr *tcph,
-		 unsigned int nexthdr_off,
-		 unsigned int datalen,
+		 struct PptpControlHeader *ctlh,
+		 union pptp_ctrl_union *pptpReq,
+		 unsigned int reqlen,
 		 struct ip_conntrack *ct,
 		 enum ip_conntrack_info ctinfo)
 {
-	struct PptpControlHeader _ctlh, *ctlh;
-	unsigned int reqlen;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
-	__be16 *cid, *pcid;
-	u_int32_t seq;	
-
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
-	if (!ctlh) {
-		DEBUGP("error during skb_header_pointer\n");
-		return NF_ACCEPT;
-	}
-	nexthdr_off += sizeof(_ctlh);
-	datalen -= sizeof(_ctlh);
-
-	reqlen = datalen;
-	if (reqlen > sizeof(*pptpReq))
-		reqlen = sizeof(*pptpReq);
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
-	if (!pptpReq) {
-		DEBUGP("error during skb_header_pointer\n");
-		return NF_ACCEPT;
-	}
+	__be16 cid = 0, pcid = 0;
 
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
 
 	switch (msg) {
 	case PPTP_START_SESSION_REPLY:
-		if (reqlen < sizeof(_pptpReq.srep)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server confirms new control session */
-		if (info->sstate < PPTP_SESSION_REQUESTED) {
-			DEBUGP("%s without START_SESS_REQUEST\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate < PPTP_SESSION_REQUESTED)
+			goto invalid;
 		if (pptpReq->srep.resultCode == PPTP_START_OK)
 			info->sstate = PPTP_SESSION_CONFIRMED;
-		else 
+		else
 			info->sstate = PPTP_SESSION_ERROR;
 		break;
 
 	case PPTP_STOP_SESSION_REPLY:
-		if (reqlen < sizeof(_pptpReq.strep)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server confirms end of control session */
-		if (info->sstate > PPTP_SESSION_STOPREQ) {
-			DEBUGP("%s without STOP_SESS_REQUEST\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate > PPTP_SESSION_STOPREQ)
+			goto invalid;
 		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
 			info->sstate = PPTP_SESSION_NONE;
 		else
@@ -396,116 +329,64 @@
 		break;
 
 	case PPTP_OUT_CALL_REPLY:
-		if (reqlen < sizeof(_pptpReq.ocack)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server accepted call, we now expect GRE frames */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
 		if (info->cstate != PPTP_CALL_OUT_REQ &&
-		    info->cstate != PPTP_CALL_OUT_CONF) {
-			DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
-			break;
-		}
-		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+		    info->cstate != PPTP_CALL_OUT_CONF)
+			goto invalid;
+
+		cid = pptpReq->ocack.callID;
+		pcid = pptpReq->ocack.peersCallID;
+		if (info->pns_call_id != pcid)
+			goto invalid;
+		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+			ntohs(cid), ntohs(pcid));
+
+		if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
+			info->cstate = PPTP_CALL_OUT_CONF;
+			info->pac_call_id = cid;
+			exp_gre(ct, cid, pcid);
+		} else
 			info->cstate = PPTP_CALL_NONE;
-			break;
-		}
-
-		cid = &pptpReq->ocack.callID;
-		pcid = &pptpReq->ocack.peersCallID;
-
-		info->pac_call_id = ntohs(*cid);
-		
-		if (htons(info->pns_call_id) != *pcid) {
-			DEBUGP("%s for unknown callid %u\n",
-				pptp_msg_name[msg], ntohs(*pcid));
-			break;
-		}
-
-		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], 
-			ntohs(*cid), ntohs(*pcid));
-		
-		info->cstate = PPTP_CALL_OUT_CONF;
-
-		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
-				       + sizeof(struct PptpControlHeader)
-				       + ((void *)pcid - (void *)pptpReq);
-			
-		if (exp_gre(ct, seq, *cid, *pcid) != 0)
-			printk("ip_conntrack_pptp: error during exp_gre\n");
 		break;
 
 	case PPTP_IN_CALL_REQUEST:
-		if (reqlen < sizeof(_pptpReq.icack)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server tells us about incoming call request */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
-			break;
-		}
-		pcid = &pptpReq->icack.peersCallID;
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+
+		cid = pptpReq->icreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = ntohs(*pcid);
+		info->pac_call_id = cid;
 		break;
 
 	case PPTP_IN_CALL_CONNECT:
-		if (reqlen < sizeof(_pptpReq.iccon)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server tells us about incoming call established */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
-			break;
-		}
-		if (info->cstate != PPTP_CALL_IN_REP
-		    && info->cstate != PPTP_CALL_IN_CONF) {
-			DEBUGP("%s but never sent IN_CALL_REPLY\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
+		if (info->cstate != PPTP_CALL_IN_REP &&
+		    info->cstate != PPTP_CALL_IN_CONF)
+			goto invalid;
 
-		pcid = &pptpReq->iccon.peersCallID;
-		cid = &info->pac_call_id;
+		pcid = pptpReq->iccon.peersCallID;
+		cid = info->pac_call_id;
 
-		if (info->pns_call_id != ntohs(*pcid)) {
-			DEBUGP("%s for unknown CallID %u\n", 
-				pptp_msg_name[msg], ntohs(*pcid));
-			break;
-		}
+		if (info->pns_call_id != pcid)
+			goto invalid;
 
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
 		info->cstate = PPTP_CALL_IN_CONF;
 
 		/* we expect a GRE connection from PAC to PNS */
-		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
-				       + sizeof(struct PptpControlHeader)
-				       + ((void *)pcid - (void *)pptpReq);
-			
-		if (exp_gre(ct, seq, *cid, *pcid) != 0)
-			printk("ip_conntrack_pptp: error during exp_gre\n");
-
+		exp_gre(ct, cid, pcid);
 		break;
 
 	case PPTP_CALL_DISCONNECT_NOTIFY:
-		if (reqlen < sizeof(_pptpReq.disc)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* server confirms disconnect */
-		cid = &pptpReq->disc.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		cid = pptpReq->disc.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
 		info->cstate = PPTP_CALL_NONE;
 
 		/* untrack this call id, unexpect GRE packets */
@@ -513,54 +394,39 @@
 		break;
 
 	case PPTP_WAN_ERROR_NOTIFY:
-		break;
-
 	case PPTP_ECHO_REQUEST:
 	case PPTP_ECHO_REPLY:
 		/* I don't have to explain these ;) */
 		break;
 	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
-			? pptp_msg_name[msg]:pptp_msg_name[0], msg);
-		break;
+		goto invalid;
 	}
 
-
 	if (ip_nat_pptp_hook_inbound)
 		return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
 						pptpReq);
-
 	return NF_ACCEPT;
 
+invalid:
+	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
+	return NF_ACCEPT;
 }
 
 static inline int
 pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct tcphdr *tcph,
-		  unsigned int nexthdr_off,
-		  unsigned int datalen,
+		  struct PptpControlHeader *ctlh,
+		  union pptp_ctrl_union *pptpReq,
+		  unsigned int reqlen,
 		  struct ip_conntrack *ct,
 		  enum ip_conntrack_info ctinfo)
 {
-	struct PptpControlHeader _ctlh, *ctlh;
-	unsigned int reqlen;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
 	u_int16_t msg;
-	__be16 *cid, *pcid;
-
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
-	if (!ctlh)
-		return NF_ACCEPT;
-	nexthdr_off += sizeof(_ctlh);
-	datalen -= sizeof(_ctlh);
-	
-	reqlen = datalen;
-	if (reqlen > sizeof(*pptpReq))
-		reqlen = sizeof(*pptpReq);
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
-	if (!pptpReq)
-		return NF_ACCEPT;
+	__be16 cid = 0, pcid = 0;
 
 	msg = ntohs(ctlh->messageType);
 	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
@@ -568,10 +434,8 @@
 	switch (msg) {
 	case PPTP_START_SESSION_REQUEST:
 		/* client requests for new control session */
-		if (info->sstate != PPTP_SESSION_NONE) {
-			DEBUGP("%s but we already have one",
-				pptp_msg_name[msg]);
-		}
+		if (info->sstate != PPTP_SESSION_NONE)
+			goto invalid;
 		info->sstate = PPTP_SESSION_REQUESTED;
 		break;
 	case PPTP_STOP_SESSION_REQUEST:
@@ -580,123 +444,115 @@
 		break;
 
 	case PPTP_OUT_CALL_REQUEST:
-		if (reqlen < sizeof(_pptpReq.ocreq)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			/* FIXME: break; */
-		}
-
 		/* client initiating connection to server */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n",
-				pptp_msg_name[msg]);
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
 		info->cstate = PPTP_CALL_OUT_REQ;
 		/* track PNS call id */
-		cid = &pptpReq->ocreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
-		info->pns_call_id = ntohs(*cid);
+		cid = pptpReq->ocreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+		info->pns_call_id = cid;
 		break;
 	case PPTP_IN_CALL_REPLY:
-		if (reqlen < sizeof(_pptpReq.icack)) {
-			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
-			break;
-		}
-
 		/* client answers incoming call */
-		if (info->cstate != PPTP_CALL_IN_REQ
-		    && info->cstate != PPTP_CALL_IN_REP) {
-			DEBUGP("%s without incall_req\n", 
-				pptp_msg_name[msg]);
-			break;
-		}
-		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+		if (info->cstate != PPTP_CALL_IN_REQ &&
+		    info->cstate != PPTP_CALL_IN_REP)
+			goto invalid;
+
+		cid = pptpReq->icack.callID;
+		pcid = pptpReq->icack.peersCallID;
+		if (info->pac_call_id != pcid)
+			goto invalid;
+		DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+		       ntohs(cid), ntohs(pcid));
+
+		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
+			/* part two of the three-way handshake */
+			info->cstate = PPTP_CALL_IN_REP;
+			info->pns_call_id = cid;
+		} else
 			info->cstate = PPTP_CALL_NONE;
-			break;
-		}
-		pcid = &pptpReq->icack.peersCallID;
-		if (info->pac_call_id != ntohs(*pcid)) {
-			DEBUGP("%s for unknown call %u\n", 
-				pptp_msg_name[msg], ntohs(*pcid));
-			break;
-		}
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
-		/* part two of the three-way handshake */
-		info->cstate = PPTP_CALL_IN_REP;
-		info->pns_call_id = ntohs(pptpReq->icack.callID);
 		break;
 
 	case PPTP_CALL_CLEAR_REQUEST:
 		/* client requests hangup of call */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("CLEAR_CALL but no session\n");
-			break;
-		}
+		if (info->sstate != PPTP_SESSION_CONFIRMED)
+			goto invalid;
 		/* FUTURE: iterate over all calls and check if
 		 * call ID is valid.  We don't do this without newnat,
 		 * because we only know about last call */
 		info->cstate = PPTP_CALL_CLEAR_REQ;
 		break;
 	case PPTP_SET_LINK_INFO:
-		break;
 	case PPTP_ECHO_REQUEST:
 	case PPTP_ECHO_REPLY:
 		/* I don't have to explain these ;) */
 		break;
 	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 
-			pptp_msg_name[msg]:pptp_msg_name[0], msg);
-		/* unknown: no need to create GRE masq table entry */
-		break;
+		goto invalid;
 	}
-	
+
 	if (ip_nat_pptp_hook_outbound)
 		return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
 						 pptpReq);
+	return NF_ACCEPT;
 
+invalid:
+	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
+	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
+	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
+	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
 	return NF_ACCEPT;
 }
 
+static const unsigned int pptp_msg_size[] = {
+	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
+	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
+	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
+	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
+	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
+	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
+	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
+	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
+	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
+	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
+	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
+	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
+	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
+};
 
 /* track caller id inside control connection, call expect_related */
-static int 
+static int
 conntrack_pptp_help(struct sk_buff **pskb,
 		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
 
 {
-	struct pptp_pkt_hdr _pptph, *pptph;
-	struct tcphdr _tcph, *tcph;
-	u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
-	u_int32_t datalen;
 	int dir = CTINFO2DIR(ctinfo);
 	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	unsigned int nexthdr_off;
-
+	struct tcphdr _tcph, *tcph;
+	struct pptp_pkt_hdr _pptph, *pptph;
+	struct PptpControlHeader _ctlh, *ctlh;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+	unsigned int datalen, reqlen, nexthdr_off;
 	int oldsstate, oldcstate;
 	int ret;
+	u_int16_t msg;
 
 	/* don't do any tracking before tcp handshake complete */
-	if (ctinfo != IP_CT_ESTABLISHED 
+	if (ctinfo != IP_CT_ESTABLISHED
 	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
 		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
 		return NF_ACCEPT;
 	}
-	
+
 	nexthdr_off = (*pskb)->nh.iph->ihl*4;
 	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
 	BUG_ON(!tcph);
 	nexthdr_off += tcph->doff * 4;
  	datalen = tcplen - tcph->doff * 4;
 
-	if (tcph->fin || tcph->rst) {
-		DEBUGP("RST/FIN received, timeouting GRE\n");
-		/* can't do this after real newnat */
-		info->cstate = PPTP_CALL_NONE;
-
-		/* untrack this call id, unexpect GRE packets */
-		pptp_destroy_siblings(ct);
-	}
-
 	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
 	if (!pptph) {
 		DEBUGP("no full PPTP header, can't track\n");
@@ -712,6 +568,23 @@
 		return NF_ACCEPT;
 	}
 
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh)
+		return NF_ACCEPT;
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+
+	reqlen = datalen;
+	msg = ntohs(ctlh->messageType);
+	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
+		return NF_ACCEPT;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq)
+		return NF_ACCEPT;
+
 	oldsstate = info->sstate;
 	oldcstate = info->cstate;
 
@@ -721,11 +594,11 @@
 	 * established from PNS->PAC.  However, RFC makes no guarantee */
 	if (dir == IP_CT_DIR_ORIGINAL)
 		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+		ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
 					ctinfo);
 	else
 		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
 				       ctinfo);
 	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
 		oldsstate, info->sstate, oldcstate, info->cstate);
@@ -735,30 +608,31 @@
 }
 
 /* control protocol helper */
-static struct ip_conntrack_helper pptp = { 
+static struct ip_conntrack_helper pptp = {
 	.list = { NULL, NULL },
-	.name = "pptp", 
+	.name = "pptp",
 	.me = THIS_MODULE,
 	.max_expected = 2,
 	.timeout = 5 * 60,
-	.tuple = { .src = { .ip = 0, 
-		 	    .u = { .tcp = { .port =  
-				    __constant_htons(PPTP_CONTROL_PORT) } } 
-			  }, 
-		   .dst = { .ip = 0, 
+	.tuple = { .src = { .ip = 0,
+		 	    .u = { .tcp = { .port =
+				    __constant_htons(PPTP_CONTROL_PORT) } }
+			  },
+		   .dst = { .ip = 0,
 			    .u = { .all = 0 },
 			    .protonum = IPPROTO_TCP
-			  } 
+			  }
 		 },
-	.mask = { .src = { .ip = 0, 
-			   .u = { .tcp = { .port = __constant_htons(0xffff) } } 
-			 }, 
-		  .dst = { .ip = 0, 
+	.mask = { .src = { .ip = 0,
+			   .u = { .tcp = { .port = __constant_htons(0xffff) } }
+			 },
+		  .dst = { .ip = 0,
 			   .u = { .all = 0 },
-			   .protonum = 0xff 
-		 	 } 
+			   .protonum = 0xff
+		 	 }
 		},
-	.help = conntrack_pptp_help
+	.help = conntrack_pptp_help,
+	.destroy = pptp_destroy_siblings,
 };
 
 extern void ip_ct_proto_gre_fini(void);
@@ -768,7 +642,7 @@
 static int __init ip_conntrack_helper_pptp_init(void)
 {
 	int retcode;
- 
+
 	retcode = ip_ct_proto_gre_init();
 	if (retcode < 0)
 		return retcode;
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index a566a81..3d0b438 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
+#include <linux/if_addr.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/route.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 0d4cc92..52eddea 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -329,11 +329,7 @@
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-	} else if (events & (IPCT_STATUS |
-		      IPCT_PROTOINFO |
-		      IPCT_HELPER |
-		      IPCT_HELPINFO |
-		      IPCT_NATINFO)) {
+	} else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else 
@@ -385,6 +381,10 @@
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
 		goto nfattr_failure;
 
+	if (events & IPCT_MARK
+	    && ctnetlink_dump_mark(skb, ct) < 0)
+		goto nfattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	nfnetlink_send(skb, 0, group, 0);
 	return NOTIFY_DONE;
@@ -436,6 +436,11 @@
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
+#ifdef CONFIG_NF_CT_ACCT
+			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+						IPCTNL_MSG_CT_GET_CTRZERO)
+				memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
 		}
 		if (cb->args[1]) {
 			cb->args[1] = 0;
@@ -451,46 +456,6 @@
 	return skb->len;
 }
 
-#ifdef CONFIG_IP_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct ip_conntrack *ct = NULL;
-	struct ip_conntrack_tuple_hash *h;
-	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[1];
-
-	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
-			cb->args[0], *id);
-
-	write_lock_bh(&ip_conntrack_lock);
-	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
-			h = (struct ip_conntrack_tuple_hash *) i;
-			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
-				continue;
-			ct = tuplehash_to_ctrack(h);
-			if (ct->id <= *id)
-				continue;
-			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
-		                        	cb->nlh->nlmsg_seq,
-						IPCTNL_MSG_CT_NEW,
-						1, ct) < 0)
-				goto out;
-			*id = ct->id;
-
-			memset(&ct->counters, 0, sizeof(ct->counters));
-		}
-	}
-out:	
-	write_unlock_bh(&ip_conntrack_lock);
-
-	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
-	return skb->len;
-}
-#endif
-
 static const size_t cta_min_ip[CTA_IP_MAX] = {
 	[CTA_IP_V4_SRC-1]	= sizeof(u_int32_t),
 	[CTA_IP_V4_DST-1]	= sizeof(u_int32_t),
@@ -775,22 +740,14 @@
 		if (msg->nfgen_family != AF_INET)
 			return -EAFNOSUPPORT;
 
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
-					IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_IP_NF_CT_ACCT
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table_w,
-						ctnetlink_done)) != 0)
-				return -EINVAL;
-#else
+#ifndef CONFIG_IP_NF_CT_ACCT
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		} else {
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-		      		                        ctnetlink_dump_table,
-		                                	ctnetlink_done)) != 0)
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+	      		                        ctnetlink_dump_table,
+	                                	ctnetlink_done)) != 0)
 			return -EINVAL;
-		}
 
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (rlen > skb->len)
@@ -1253,6 +1210,9 @@
 	} else
 		return NOTIFY_DONE;
 
+	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+		return NOTIFY_DONE;
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!skb)
 		return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index f891308..36f2b5e 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -12,7 +12,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned int ip_ct_generic_timeout = 600*HZ;
+unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 4ee016c..5fe026f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,15 +1,15 @@
 /*
- * ip_conntrack_proto_gre.c - Version 3.0 
+ * ip_conntrack_proto_gre.c - Version 3.0
  *
  * Connection tracking protocol helper module for GRE.
  *
  * GRE is a generic encapsulation protocol, which is generally not very
  * suited for NAT, as it has no protocol-specific part as port numbers.
  *
- * It has an optional key field, which may help us distinguishing two 
+ * It has an optional key field, which may help us distinguishing two
  * connections between the same two hosts.
  *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
  *
  * PPTP is built on top of a modified version of GRE, and has a mandatory
  * field called "CallID", which serves us for the same purpose as the key
@@ -37,7 +37,6 @@
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
 
-#include <linux/netfilter_ipv4/listhelp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -62,7 +61,7 @@
 #define DEBUGP(x, args...)
 #define DUMP_TUPLE_GRE(x)
 #endif
-				
+
 /* GRE KEYMAP HANDLING FUNCTIONS */
 static LIST_HEAD(gre_keymap_list);
 
@@ -82,12 +81,14 @@
 	__be16 key = 0;
 
 	read_lock_bh(&ip_ct_gre_lock);
-	km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
-			struct ip_ct_gre_keymap *, t);
-	if (km)
-		key = km->tuple.src.u.gre.key;
+	list_for_each_entry(km, &gre_keymap_list, list) {
+		if (gre_key_cmpfn(km, t)) {
+			key = km->tuple.src.u.gre.key;
+			break;
+		}
+	}
 	read_unlock_bh(&ip_ct_gre_lock);
-	
+
 	DEBUGP("lookup src key 0x%x up key for ", key);
 	DUMP_TUPLE_GRE(t);
 
@@ -99,28 +100,25 @@
 ip_ct_gre_keymap_add(struct ip_conntrack *ct,
 		     struct ip_conntrack_tuple *t, int reply)
 {
-	struct ip_ct_gre_keymap **exist_km, *km, *old;
+	struct ip_ct_gre_keymap **exist_km, *km;
 
 	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
 		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
 		return -1;
 	}
 
-	if (!reply) 
+	if (!reply)
 		exist_km = &ct->help.ct_pptp_info.keymap_orig;
 	else
 		exist_km = &ct->help.ct_pptp_info.keymap_reply;
 
 	if (*exist_km) {
 		/* check whether it's a retransmission */
-		old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
-				struct ip_ct_gre_keymap *, t);
-		if (old == *exist_km) {
-			DEBUGP("retransmission\n");
-			return 0;
+		list_for_each_entry(km, &gre_keymap_list, list) {
+			if (gre_key_cmpfn(km, t) && km == *exist_km)
+				return 0;
 		}
-
-		DEBUGP("trying to override keymap_%s for ct %p\n", 
+		DEBUGP("trying to override keymap_%s for ct %p\n",
 			reply? "reply":"orig", ct);
 		return -EEXIST;
 	}
@@ -136,7 +134,7 @@
 	DUMP_TUPLE_GRE(&km->tuple);
 
 	write_lock_bh(&ip_ct_gre_lock);
-	list_append(&gre_keymap_list, km);
+	list_add_tail(&km->list, &gre_keymap_list);
 	write_unlock_bh(&ip_ct_gre_lock);
 
 	return 0;
@@ -154,7 +152,7 @@
 
 	write_lock_bh(&ip_ct_gre_lock);
 	if (ct->help.ct_pptp_info.keymap_orig) {
-		DEBUGP("removing %p from list\n", 
+		DEBUGP("removing %p from list\n",
 			ct->help.ct_pptp_info.keymap_orig);
 		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
 		kfree(ct->help.ct_pptp_info.keymap_orig);
@@ -222,7 +220,7 @@
 static int gre_print_tuple(struct seq_file *s,
 			   const struct ip_conntrack_tuple *tuple)
 {
-	return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 
+	return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
 			  ntohs(tuple->src.u.gre.key),
 			  ntohs(tuple->dst.u.gre.key));
 }
@@ -252,14 +250,14 @@
 	} else
 		ip_ct_refresh_acct(ct, conntrackinfo, skb,
 				   ct->proto.gre.timeout);
-	
+
 	return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
 static int gre_new(struct ip_conntrack *ct,
 		   const struct sk_buff *skb)
-{ 
+{
 	DEBUGP(": ");
 	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
@@ -285,9 +283,9 @@
 }
 
 /* protocol helper struct */
-static struct ip_conntrack_protocol gre = { 
+static struct ip_conntrack_protocol gre = {
 	.proto		 = IPPROTO_GRE,
-	.name		 = "gre", 
+	.name		 = "gre",
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
 	.print_tuple	 = gre_print_tuple,
@@ -325,7 +323,7 @@
 	}
 	write_unlock_bh(&ip_ct_gre_lock);
 
-	ip_conntrack_protocol_unregister(&gre); 
+	ip_conntrack_protocol_unregister(&gre);
 }
 
 EXPORT_SYMBOL(ip_ct_gre_keymap_add);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 23f1c50..09c40eb 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned int ip_ct_icmp_timeout = 30*HZ;
+unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 2d3612c..b908a48 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned int ip_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned int ip_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+static unsigned int ip_ct_sctp_timeout_closed __read_mostly           = 10 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly      =  3 SECS;
+static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly    =  3 SECS;
+static unsigned int ip_ct_sctp_timeout_established __read_mostly      =  5 DAYS;
+static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly    = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly    = 300 SECS / 1000;
+static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
 
 static const unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fb920e7..03ae9a0 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -48,19 +48,19 @@
 /* "Be conservative in what you do, 
     be liberal in what you accept from others." 
     If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal = 0;
+int ip_ct_tcp_be_liberal __read_mostly = 0;
 
 /* When connection is picked up from the middle, how many packets are required
    to pass in each direction when we assume we are in sync - if any side uses
    window scaling, we lost the game. 
    If it is set to zero, we disable picking up already established 
    connections. */
-int ip_ct_tcp_loose = 3;
+int ip_ct_tcp_loose __read_mostly = 3;
 
 /* Max number of the retransmitted packets without receiving an (acceptable) 
    ACK from the destination. If this number is reached, a shorter timer 
    will be started. */
-int ip_ct_tcp_max_retrans = 3;
+int ip_ct_tcp_max_retrans __read_mostly = 3;
 
   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
      closely.  They're more complex. --RR */
@@ -83,19 +83,19 @@
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned int ip_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned int ip_ct_tcp_timeout_established =   5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned int ip_ct_tcp_timeout_close =        10 SECS;
+unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
+unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
+unsigned int ip_ct_tcp_timeout_established __read_mostly =   5 DAYS;
+unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
+unsigned int ip_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
+unsigned int ip_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
+unsigned int ip_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
+unsigned int ip_ct_tcp_timeout_close __read_mostly =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
  
 static const unsigned int * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
@@ -731,13 +731,15 @@
 			if (state->last_dir == dir
 			    && state->last_seq == seq
 			    && state->last_ack == ack
-			    && state->last_end == end)
+			    && state->last_end == end
+			    && state->last_win == win)
 				state->retrans++;
 			else {
 				state->last_dir = dir;
 				state->last_seq = seq;
 				state->last_ack = ack;
 				state->last_end = end;
+				state->last_win = win;
 				state->retrans = 0;
 			}
 		}
@@ -865,8 +867,7 @@
   
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because it is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 9b2c16b..d0e8a16 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -18,8 +18,8 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 
-unsigned int ip_ct_udp_timeout = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream = 180*HZ;
+unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
@@ -117,8 +117,7 @@
 
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index 4f222d6..2893e9c 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -8,7 +8,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 7a9fa04..0213575 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -35,7 +35,6 @@
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
@@ -534,7 +533,7 @@
 
 /* Sysctl support */
 
-int ip_conntrack_checksum = 1;
+int ip_conntrack_checksum __read_mostly = 1;
 
 #ifdef CONFIG_SYSCTL
 
@@ -563,7 +562,7 @@
 /* From ip_conntrack_proto_icmp.c */
 extern unsigned int ip_ct_icmp_timeout;
 
-/* From ip_conntrack_proto_icmp.c */
+/* From ip_conntrack_proto_generic.c */
 extern unsigned int ip_ct_generic_timeout;
 
 /* Log invalid packets of a given protocol */
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1741d55..71f3e09 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,9 +22,6 @@
 #include <linux/udp.h>
 #include <linux/jhash.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -33,7 +30,6 @@
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
@@ -101,18 +97,6 @@
 	write_unlock_bh(&ip_nat_lock);
 }
 
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong.  Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-				      oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
 /* Is this tuple already taken? (not by us) */
 int
 ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,12 +362,12 @@
 	iph = (void *)(*pskb)->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
-		iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->saddr, target->src.ip,
+					    iph->check);
 		iph->saddr = target->src.ip;
 	} else {
-		iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
-						iph->check);
+		iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
+					    iph->check);
 		iph->daddr = target->dst.ip;
 	}
 	return 1;
@@ -423,10 +407,10 @@
 EXPORT_SYMBOL_GPL(ip_nat_packet);
 
 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_nat_manip_type manip,
-				  enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum,
+				  struct sk_buff **pskb)
 {
 	struct {
 		struct icmphdr icmp;
@@ -434,7 +418,9 @@
 	} *inside;
 	struct ip_conntrack_tuple inner, target;
 	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
+	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
 
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -443,12 +429,8 @@
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
-		hdrlen = (*pskb)->nh.iph->ihl * 4;
-		if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
-						(*pskb)->len - hdrlen, 0)))
-			return 0;
-	}
+	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+		return 0;
 
 	/* Must be RELATED */
 	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +469,14 @@
 		       !manip))
 		return 0;
 
-	/* Reloading "inside" here since manip_pkt inner. */
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-	inside->icmp.checksum = 0;
-	inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
-						       (*pskb)->len - hdrlen,
-						       0));
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+		/* Reloading "inside" here since manip_pkt inner. */
+		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
+							       (*pskb)->len - hdrlen,
+							       0));
+	}
 
 	/* Change outer to look the reply to an incoming packet
 	 * (proto 0 means don't invert per-proto part). */
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index cbcaa45..7f6a759 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -27,16 +27,12 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
@@ -165,7 +161,7 @@
 {
 	struct iphdr *iph;
 	struct tcphdr *tcph;
-	int datalen;
+	int oldlen, datalen;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return 0;
@@ -180,13 +176,22 @@
 	iph = (*pskb)->nh.iph;
 	tcph = (void *)iph + iph->ihl*4;
 
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
 			match_offset, match_len, rep_buffer, rep_len);
 
 	datalen = (*pskb)->len - iph->ihl*4;
-	tcph->check = 0;
-	tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
-				   csum_partial((char *)tcph, datalen, 0));
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+		tcph->check = 0;
+		tcph->check = tcp_v4_check(tcph, datalen,
+					   iph->saddr, iph->daddr,
+					   csum_partial((char *)tcph,
+					   		datalen, 0));
+	} else
+		tcph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   tcph->check, 1);
 
 	if (rep_len != match_len) {
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +226,7 @@
 {
 	struct iphdr *iph;
 	struct udphdr *udph;
+	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
 	iph = (*pskb)->nh.iph;
@@ -238,22 +244,32 @@
 
 	iph = (*pskb)->nh.iph;
 	udph = (void *)iph + iph->ihl*4;
+
+	oldlen = (*pskb)->len - iph->ihl*4;
 	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
 			match_offset, match_len, rep_buffer, rep_len);
 
 	/* update the length of the UDP packet */
-	udph->len = htons((*pskb)->len - iph->ihl*4);
+	datalen = (*pskb)->len - iph->ihl*4;
+	udph->len = htons(datalen);
 
 	/* fix udp checksum if udp checksum was previously calculated */
-	if (udph->check) {
-		int datalen = (*pskb)->len - iph->ihl * 4;
+	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+		return 1;
+
+	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		udph->check = 0;
 		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 		                                datalen, IPPROTO_UDP,
 		                                csum_partial((char *)udph,
 		                                             datalen, 0));
-	}
-
+		if (!udph->check)
+			udph->check = -1;
+	} else
+		udph->check = nf_proto_csum_update(*pskb,
+						   htons(oldlen) ^ 0xFFFF,
+						   htons(datalen),
+						   udph->check, 1);
 	return 1;
 }
 EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +309,14 @@
 			ntohl(sack->start_seq), new_start_seq,
 			ntohl(sack->end_seq), new_end_seq);
 
-		tcph->check = 
-			ip_nat_cheat_check(~sack->start_seq, new_start_seq,
-					   ip_nat_cheat_check(~sack->end_seq, 
-						   	      new_end_seq,
-							      tcph->check));
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->start_seq,
+						   new_start_seq,
+						   tcph->check, 0);
+		tcph->check = nf_proto_csum_update(skb,
+						   ~sack->end_seq,
+						   new_end_seq,
+						   tcph->check, 0);
 		sack->start_seq = new_start_seq;
 		sack->end_seq = new_end_seq;
 		sackoff += sizeof(*sack);
@@ -381,10 +400,10 @@
 		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
 	newack = htonl(newack);
 
-	tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
-					 ip_nat_cheat_check(~tcph->ack_seq, 
-					 		    newack, 
-							    tcph->check));
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
+					   tcph->check, 0);
+	tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
+					   tcph->check, 0);
 
 	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 1d14996..2ff5788 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -32,7 +32,7 @@
  *     2005-06-10 - Version 3.0
  *       - kernel >= 2.6.11 version,
  *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 
+ *
  */
 
 #include <linux/module.h>
@@ -85,19 +85,17 @@
 		DEBUGP("we are PNS->PAC\n");
 		/* therefore, build tuple for PAC->PNS */
 		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-		t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+		t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
 		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-		t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+		t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
 		t.dst.protonum = IPPROTO_GRE;
 	} else {
 		DEBUGP("we are PAC->PNS\n");
 		/* build tuple for PNS->PAC */
 		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		t.src.u.gre.key = 
-			htons(master->nat.help.nat_pptp_info.pns_call_id);
+		t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
 		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		t.dst.u.gre.key = 
-			htons(master->nat.help.nat_pptp_info.pac_call_id);
+		t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
 		t.dst.protonum = IPPROTO_GRE;
 	}
 
@@ -149,51 +147,52 @@
 {
 	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_callid;
+	u_int16_t msg;
+	__be16 new_callid;
 	unsigned int cid_off;
 
-	new_callid = htons(ct_pptp_info->pns_call_id);
-	
+	new_callid = ct_pptp_info->pns_call_id;
+
 	switch (msg = ntohs(ctlh->messageType)) {
-		case PPTP_OUT_CALL_REQUEST:
-			cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
-			/* FIXME: ideally we would want to reserve a call ID
-			 * here.  current netfilter NAT core is not able to do
-			 * this :( For now we use TCP source port. This breaks
-			 * multiple calls within one control session */
+	case PPTP_OUT_CALL_REQUEST:
+		cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+		/* FIXME: ideally we would want to reserve a call ID
+		 * here.  current netfilter NAT core is not able to do
+		 * this :( For now we use TCP source port. This breaks
+		 * multiple calls within one control session */
 
-			/* save original call ID in nat_info */
-			nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+		/* save original call ID in nat_info */
+		nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
 
-			/* don't use tcph->source since we are at a DSTmanip
-			 * hook (e.g. PREROUTING) and pkt is not mangled yet */
-			new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+		/* don't use tcph->source since we are at a DSTmanip
+		 * hook (e.g. PREROUTING) and pkt is not mangled yet */
+		new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
 
-			/* save new call ID in ct info */
-			ct_pptp_info->pns_call_id = ntohs(new_callid);
-			break;
-		case PPTP_IN_CALL_REPLY:
-			cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
-			break;
-		case PPTP_CALL_CLEAR_REQUEST:
-			cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
-			break;
-		default:
-			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
-			      (msg <= PPTP_MSG_MAX)? 
-			      pptp_msg_name[msg]:pptp_msg_name[0]);
-			/* fall through */
+		/* save new call ID in ct info */
+		ct_pptp_info->pns_call_id = new_callid;
+		break;
+	case PPTP_IN_CALL_REPLY:
+		cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+		break;
+	case PPTP_CALL_CLEAR_REQUEST:
+		cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+		break;
+	default:
+		DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+		      (msg <= PPTP_MSG_MAX)?
+		      pptp_msg_name[msg]:pptp_msg_name[0]);
+		/* fall through */
 
-		case PPTP_SET_LINK_INFO:
-			/* only need to NAT in case PAC is behind NAT box */
-		case PPTP_START_SESSION_REQUEST:
-		case PPTP_START_SESSION_REPLY:
-		case PPTP_STOP_SESSION_REQUEST:
-		case PPTP_STOP_SESSION_REPLY:
-		case PPTP_ECHO_REQUEST:
-		case PPTP_ECHO_REPLY:
-			/* no need to alter packet */
-			return NF_ACCEPT;
+	case PPTP_SET_LINK_INFO:
+		/* only need to NAT in case PAC is behind NAT box */
+	case PPTP_START_SESSION_REQUEST:
+	case PPTP_START_SESSION_REPLY:
+	case PPTP_STOP_SESSION_REQUEST:
+	case PPTP_STOP_SESSION_REPLY:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* no need to alter packet */
+		return NF_ACCEPT;
 	}
 
 	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
@@ -212,80 +211,28 @@
 	return NF_ACCEPT;
 }
 
-static int
+static void
 pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
 	     struct ip_conntrack_expect *expect_reply)
 {
-	struct ip_ct_pptp_master *ct_pptp_info = 
-				&expect_orig->master->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = 
-				&expect_orig->master->nat.help.nat_pptp_info;
-
 	struct ip_conntrack *ct = expect_orig->master;
-
-	struct ip_conntrack_tuple inv_t;
-	struct ip_conntrack_tuple *orig_t, *reply_t;
+	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
 
 	/* save original PAC call ID in nat_info */
 	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
 
-	/* alter expectation */
-	orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
 	/* alter expectation for PNS->PAC direction */
-	invert_tuplepr(&inv_t, &expect_orig->tuple);
-	expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
-	expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
-	expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+	expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+	expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
 	expect_orig->dir = IP_CT_DIR_ORIGINAL;
-	inv_t.src.ip = reply_t->src.ip;
-	inv_t.dst.ip = reply_t->dst.ip;
-	inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
-	inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
-
-	if (!ip_conntrack_expect_related(expect_orig)) {
-		DEBUGP("successfully registered expect\n");
-	} else {
-		DEBUGP("can't expect_related(expect_orig)\n");
-		return 1;
-	}
 
 	/* alter expectation for PAC->PNS direction */
-	invert_tuplepr(&inv_t, &expect_reply->tuple);
-	expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
-	expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
-	expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+	expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+	expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
 	expect_reply->dir = IP_CT_DIR_REPLY;
-	inv_t.src.ip = orig_t->src.ip;
-	inv_t.dst.ip = orig_t->dst.ip;
-	inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
-	inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
-
-	if (!ip_conntrack_expect_related(expect_reply)) {
-		DEBUGP("successfully registered expect\n");
-	} else {
-		DEBUGP("can't expect_related(expect_reply)\n");
-		ip_conntrack_unexpect_related(expect_orig);
-		return 1;
-	}
-
-	if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
-		DEBUGP("can't register original keymap\n");
-		ip_conntrack_unexpect_related(expect_orig);
-		ip_conntrack_unexpect_related(expect_reply);
-		return 1;
-	}
-
-	if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
-		DEBUGP("can't register reply keymap\n");
-		ip_conntrack_unexpect_related(expect_orig);
-		ip_conntrack_unexpect_related(expect_reply);
-		ip_ct_gre_keymap_destroy(ct);
-		return 1;
-	}
-
-	return 0;
 }
 
 /* inbound packets == from PAC to PNS */
@@ -297,15 +244,15 @@
 		 union pptp_ctrl_union *pptpReq)
 {
 	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg, new_cid = 0, new_pcid;
-	unsigned int pcid_off, cid_off = 0;
+	u_int16_t msg;
+	__be16 new_pcid;
+	unsigned int pcid_off;
 
-	new_pcid = htons(nat_pptp_info->pns_call_id);
+	new_pcid = nat_pptp_info->pns_call_id;
 
 	switch (msg = ntohs(ctlh->messageType)) {
 	case PPTP_OUT_CALL_REPLY:
 		pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
-		cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
 		break;
 	case PPTP_IN_CALL_CONNECT:
 		pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
@@ -324,7 +271,7 @@
 		break;
 
 	default:
-		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 
+		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
 			pptp_msg_name[msg]:pptp_msg_name[0]);
 		/* fall through */
 
@@ -351,17 +298,6 @@
 				     sizeof(new_pcid), (char *)&new_pcid,
 				     sizeof(new_pcid)) == 0)
 		return NF_DROP;
-
-	if (new_cid) {
-		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-			ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid));
-		if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-		                             cid_off + sizeof(struct pptp_pkt_hdr) +
-					     sizeof(struct PptpControlHeader),
-					     sizeof(new_cid), (char *)&new_cid,
-					     sizeof(new_cid)) == 0)
-			return NF_DROP;
-	}
 	return NF_ACCEPT;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 38acfdf..bf91f93 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -6,10 +6,10 @@
  * GRE is a generic encapsulation protocol, which is generally not very
  * suited for NAT, as it has no protocol-specific part as port numbers.
  *
- * It has an optional key field, which may help us distinguishing two 
+ * It has an optional key field, which may help us distinguishing two
  * connections between the same two hosts.
  *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
  *
  * PPTP is built on top of a modified version of GRE, and has a mandatory
  * field called "CallID", which serves us for the same purpose as the key
@@ -60,14 +60,14 @@
 }
 
 /* generate unique tuple ... */
-static int 
+static int
 gre_unique_tuple(struct ip_conntrack_tuple *tuple,
 		 const struct ip_nat_range *range,
 		 enum ip_nat_manip_type maniptype,
 		 const struct ip_conntrack *conntrack)
 {
 	static u_int16_t key;
-	u_int16_t *keyptr;
+	__be16 *keyptr;
 	unsigned int min, i, range_size;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
@@ -84,7 +84,7 @@
 		range_size = ntohs(range->max.gre.key) - min + 1;
 	}
 
-	DEBUGP("min = %u, range_size = %u\n", min, range_size); 
+	DEBUGP("min = %u, range_size = %u\n", min, range_size);
 
 	for (i = 0; i < range_size; i++, key++) {
 		*keyptr = htons(min + key % range_size);
@@ -117,7 +117,7 @@
 	greh = (void *)(*pskb)->data + hdroff;
 	pgreh = (struct gre_hdr_pptp *) greh;
 
-	/* we only have destination manip of a packet, since 'source key' 
+	/* we only have destination manip of a packet, since 'source key'
 	 * is not present in the packet itself */
 	if (maniptype == IP_NAT_MANIP_DST) {
 		/* key manipulation is always dest */
@@ -129,15 +129,16 @@
 			}
 			if (greh->csum) {
 				/* FIXME: Never tested this code... */
-				*(gre_csum(greh)) = 
-					ip_nat_cheat_check(~*(gre_key(greh)),
+				*(gre_csum(greh)) =
+					nf_proto_csum_update(*pskb,
+							~*(gre_key(greh)),
 							tuple->dst.u.gre.key,
-							*(gre_csum(greh)));
+							*(gre_csum(greh)), 0);
 			}
 			*(gre_key(greh)) = tuple->dst.u.gre.key;
 			break;
 		case GRE_VERSION_PPTP:
-			DEBUGP("call_id -> 0x%04x\n", 
+			DEBUGP("call_id -> 0x%04x\n",
 				ntohs(tuple->dst.u.gre.key));
 			pgreh->call_id = tuple->dst.u.gre.key;
 			break;
@@ -151,8 +152,8 @@
 }
 
 /* nat helper struct */
-static struct ip_nat_protocol gre = { 
-	.name		= "GRE", 
+static struct ip_nat_protocol gre = {
+	.name		= "GRE",
 	.protonum	= IPPROTO_GRE,
 	.manip_pkt	= gre_manip_pkt,
 	.in_range	= gre_in_range,
@@ -163,7 +164,7 @@
 	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
 #endif
 };
-				  
+
 int __init ip_nat_proto_gre_init(void)
 {
 	return ip_nat_protocol_register(&gre);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 31a3f4c..ec50cc2 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -66,10 +66,10 @@
 		return 0;
 
 	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-
-	hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
-					    tuple->src.u.icmp.id,
-					    hdr->checksum);
+	hdr->checksum = nf_proto_csum_update(*pskb,
+					     hdr->un.echo.id ^ 0xFFFF,
+					     tuple->src.u.icmp.id,
+					     hdr->checksum, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index a3d1407..72a6307 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -129,10 +129,9 @@
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-	hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(oldport ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+	hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
+	hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
+					  hdr->check, 0);
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index ec6053f..5da196a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -113,11 +113,16 @@
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
-	if (hdr->check) /* 0 is a special case meaning no checksum */
-		hdr->check = ip_nat_cheat_check(~oldip, newip,
-					ip_nat_cheat_check(*portptr ^ 0xFFFF,
-							   newport,
-							   hdr->check));
+
+	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
+		hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
+						  hdr->check, 1);
+		hdr->check = nf_proto_csum_update(*pskb,
+						  *portptr ^ 0xFFFF, newport,
+						  hdr->check, 0);
+		if (!hdr->check)
+			hdr->check = -1;
+	}
 	*portptr = newport;
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 1aba926..7b70383 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,14 +19,10 @@
 #include <net/route.h>
 #include <linux/bitops.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_core.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
@@ -104,8 +100,7 @@
 				    const struct net_device *out,
 				    unsigned int hooknum,
 				    const struct ipt_target *target,
-				    const void *targinfo,
-				    void *userinfo)
+				    const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
@@ -147,8 +142,7 @@
 				    const struct net_device *out,
 				    unsigned int hooknum,
 				    const struct ipt_target *target,
-				    const void *targinfo,
-				    void *userinfo)
+				    const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
@@ -174,7 +168,6 @@
 			       const void *entry,
 			       const struct ipt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hook_mask)
 {
 	struct ip_nat_multi_range_compat *mr = targinfo;
@@ -191,7 +184,6 @@
 			       const void *entry,
 			       const struct ipt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hook_mask)
 {
 	struct ip_nat_multi_range_compat *mr = targinfo;
@@ -255,7 +247,7 @@
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
+	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
 
 	if (ret == NF_ACCEPT) {
 		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 17de077..9c577db 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -30,9 +30,6 @@
 #include <net/checksum.h>
 #include <linux/spinlock.h>
 
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
 #include <linux/netfilter_ipv4/ip_nat.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_nat_protocol.h>
@@ -40,7 +37,6 @@
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
@@ -110,11 +106,6 @@
 	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
 		       & htons(IP_MF|IP_OFFSET)));
 
-	/* If we had a hardware checksum before, it's now invalid */
-	if ((*pskb)->ip_summed == CHECKSUM_HW)
-		if (skb_checksum_help(*pskb, (out == NULL)))
-			return NF_DROP;
-
 	ct = ip_conntrack_get(*pskb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	   have dropped it.  Hence it's the user's responsibilty to
@@ -145,8 +136,8 @@
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
-							   CTINFO2DIR(ctinfo)))
+			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
+							   hooknum, pskb))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 198ac36..7edad79 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -52,15 +52,15 @@
 
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
 static unsigned int queue_dropped = 0;
 static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
 static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
@@ -208,9 +208,9 @@
 		break;
 	
 	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
 		}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 048514f..800067d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -180,8 +180,7 @@
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	if (net_ratelimit())
 		printk("ip_tables: error: `%s'\n", (char *)targinfo);
@@ -217,8 +216,7 @@
 	     unsigned int hook,
 	     const struct net_device *in,
 	     const struct net_device *out,
-	     struct ipt_table *table,
-	     void *userdata)
+	     struct ipt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	u_int16_t offset;
@@ -308,8 +306,7 @@
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
-								     t->data,
-								     userdata);
+								     t->data);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 				if (((struct ipt_entry *)table_base)->comefrom
@@ -467,8 +464,7 @@
 		return 1;
 
 	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->u.kernel.match, m->data,
-					   m->u.match_size - sizeof(*m));
+		m->u.kernel.match->destroy(m->u.kernel.match, m->data);
 	module_put(m->u.kernel.match->me);
 	return 0;
 }
@@ -521,7 +517,6 @@
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ip, match, m->data,
-					      m->u.match_size - sizeof(*m),
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
@@ -578,12 +573,10 @@
 	if (t->u.kernel.target == &ipt_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
-			goto cleanup_matches;
+			goto err;
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
 						      e->comefrom)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -655,8 +648,7 @@
 	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
 	t = ipt_get_target(e);
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-					    t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
@@ -950,73 +942,28 @@
 	return delta;
 }
 
-struct compat_ipt_standard_target
+static void compat_standard_from_user(void *dst, void *src)
 {
-	struct compat_xt_entry_target target;
-	compat_int_t verdict;
-};
+	int v = *(compat_int_t *)src;
 
-struct compat_ipt_standard
+	if (v > 0)
+		v += compat_calc_jump(v);
+	memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, void *src)
 {
-	struct compat_ipt_entry entry;
-	struct compat_ipt_standard_target target;
-};
+	compat_int_t cv = *(int *)src;
 
-#define IPT_ST_LEN		XT_ALIGN(sizeof(struct ipt_standard_target))
-#define IPT_ST_COMPAT_LEN	COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target))
-#define IPT_ST_OFFSET		(IPT_ST_LEN - IPT_ST_COMPAT_LEN)
-
-static int compat_ipt_standard_fn(void *target,
-		void **dstptr, int *size, int convert)
-{
-	struct compat_ipt_standard_target compat_st, *pcompat_st;
-	struct ipt_standard_target st, *pst;
-	int ret;
-
-	ret = 0;
-	switch (convert) {
-		case COMPAT_TO_USER:
-			pst = target;
-			memcpy(&compat_st.target, &pst->target,
-				sizeof(compat_st.target));
-			compat_st.verdict = pst->verdict;
-			if (compat_st.verdict > 0)
-				compat_st.verdict -=
-					compat_calc_jump(compat_st.verdict);
-			compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
-			if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
-				ret = -EFAULT;
-			*size -= IPT_ST_OFFSET;
-			*dstptr += IPT_ST_COMPAT_LEN;
-			break;
-		case COMPAT_FROM_USER:
-			pcompat_st = target;
-			memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
-			st.verdict = pcompat_st->verdict;
-			if (st.verdict > 0)
-				st.verdict += compat_calc_jump(st.verdict);
-			st.target.u.user.target_size = IPT_ST_LEN;
-			memcpy(*dstptr, &st, IPT_ST_LEN);
-			*size += IPT_ST_OFFSET;
-			*dstptr += IPT_ST_LEN;
-			break;
-		case COMPAT_CALC_SIZE:
-			*size += IPT_ST_OFFSET;
-			break;
-		default:
-			ret = -ENOPROTOOPT;
-			break;
-	}
-	return ret;
+	if (cv > 0)
+		cv -= compat_calc_jump(cv);
+	return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
 }
 
 static inline int
 compat_calc_match(struct ipt_entry_match *m, int * size)
 {
-	if (m->u.kernel.match->compat)
-		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
-	else
-		xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+	*size += xt_compat_match_offset(m->u.kernel.match);
 	return 0;
 }
 
@@ -1031,10 +978,7 @@
 	entry_offset = (void *)e - base;
 	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
 	t = ipt_get_target(e);
-	if (t->u.kernel.target->compat)
-		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
-	else
-		xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
 	ret = compat_add_offset(entry_offset, off);
 	if (ret)
@@ -1420,17 +1364,13 @@
 };
 
 static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
-		void __user **dstptr, compat_uint_t *size)
+		void * __user *dstptr, compat_uint_t *size)
 {
-	if (m->u.kernel.match->compat)
-		return m->u.kernel.match->compat(m, dstptr, size,
-				COMPAT_TO_USER);
-	else
-		return xt_compat_match(m, dstptr, size, COMPAT_TO_USER);
+	return xt_compat_match_to_user(m, dstptr, size);
 }
 
 static int compat_copy_entry_to_user(struct ipt_entry *e,
-		void __user **dstptr, compat_uint_t *size)
+		void * __user *dstptr, compat_uint_t *size)
 {
 	struct ipt_entry_target __user *t;
 	struct compat_ipt_entry __user *ce;
@@ -1450,11 +1390,7 @@
 	if (ret)
 		goto out;
 	t = ipt_get_target(e);
-	if (t->u.kernel.target->compat)
-		ret = t->u.kernel.target->compat(t, dstptr, size,
-				COMPAT_TO_USER);
-	else
-		ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER);
+	ret = xt_compat_target_to_user(t, dstptr, size);
 	if (ret)
 		goto out;
 	ret = -EFAULT;
@@ -1486,11 +1422,7 @@
 		return match ? PTR_ERR(match) : -ENOENT;
 	}
 	m->u.kernel.match = match;
-
-	if (m->u.kernel.match->compat)
-		m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
-	else
-		xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE);
+	*size += xt_compat_match_offset(match);
 
 	(*i)++;
 	return 0;
@@ -1537,7 +1469,7 @@
 	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
 			e->comefrom, &off, &j);
 	if (ret != 0)
-		goto out;
+		goto cleanup_matches;
 
 	t = ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
@@ -1547,14 +1479,11 @@
 	if (IS_ERR(target) || !target) {
 		duprintf("check_entry: `%s' not found\n", t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
-		goto out;
+		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
 
-	if (t->u.kernel.target->compat)
-		t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
-	else
-		xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE);
+	off += xt_compat_target_offset(target);
 	*size += off;
 	ret = compat_add_offset(entry_offset, off);
 	if (ret)
@@ -1574,14 +1503,17 @@
 
 	(*i)++;
 	return 0;
+
 out:
+	module_put(t->u.kernel.target->me);
+cleanup_matches:
 	IPT_MATCH_ITERATE(e, cleanup_match, &j);
 	return ret;
 }
 
 static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
 	void **dstptr, compat_uint_t *size, const char *name,
-	const struct ipt_ip *ip, unsigned int hookmask)
+	const struct ipt_ip *ip, unsigned int hookmask, int *i)
 {
 	struct ipt_entry_match *dm;
 	struct ipt_match *match;
@@ -1589,26 +1521,28 @@
 
 	dm = (struct ipt_entry_match *)*dstptr;
 	match = m->u.kernel.match;
-	if (match->compat)
-		match->compat(m, dstptr, size, COMPAT_FROM_USER);
-	else
-		xt_compat_match(m, dstptr, size, COMPAT_FROM_USER);
+	xt_compat_match_from_user(m, dstptr, size);
 
 	ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm),
 			     name, hookmask, ip->proto,
 			     ip->invflags & IPT_INV_PROTO);
 	if (ret)
-		return ret;
+		goto err;
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ip, match, dm->data,
-					      dm->u.match_size - sizeof(*dm),
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err;
 	}
+	(*i)++;
 	return 0;
+
+err:
+	module_put(m->u.kernel.match->me);
+	return ret;
 }
 
 static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
@@ -1619,25 +1553,23 @@
 	struct ipt_target *target;
 	struct ipt_entry *de;
 	unsigned int origsize;
-	int ret, h;
+	int ret, h, j;
 
 	ret = 0;
 	origsize = *size;
 	de = (struct ipt_entry *)*dstptr;
 	memcpy(de, e, sizeof(struct ipt_entry));
 
+	j = 0;
 	*dstptr += sizeof(struct compat_ipt_entry);
 	ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
-			name, &de->ip, de->comefrom);
+			name, &de->ip, de->comefrom, &j);
 	if (ret)
-		goto out;
+		goto cleanup_matches;
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = ipt_get_target(e);
 	target = t->u.kernel.target;
-	if (target->compat)
-		target->compat(t, dstptr, size, COMPAT_FROM_USER);
-	else
-		xt_compat_target(t, dstptr, size, COMPAT_FROM_USER);
+	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
 	for (h = 0; h < NF_IP_NUMHOOKS; h++) {
@@ -1653,22 +1585,26 @@
 			      name, e->comefrom, e->ip.proto,
 			      e->ip.invflags & IPT_INV_PROTO);
 	if (ret)
-		goto out;
+		goto err;
 
 	ret = -EINVAL;
 	if (t->u.kernel.target == &ipt_standard_target) {
 		if (!standard_check(t, *size))
-			goto out;
+			goto err;
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, de, target,
-				t->data, t->u.target_size - sizeof(*t),
-				de->comefrom)) {
+						      t->data, de->comefrom)) {
 		duprintf("ip_tables: compat: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
-		goto out;
+		goto err;
 	}
 	ret = 0;
-out:
+	return ret;
+
+err:
+	module_put(t->u.kernel.target->me);
+cleanup_matches:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
 	return ret;
 }
 
@@ -1989,6 +1925,8 @@
 	return ret;
 }
 
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
 static int
 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 {
@@ -2002,8 +1940,7 @@
 		ret = compat_get_entries(user, len);
 		break;
 	default:
-		duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
-		ret = -EINVAL;
+		ret = do_ipt_get_ctl(sk, cmd, user, len);
 	}
 	return ret;
 }
@@ -2185,7 +2122,6 @@
 	   const void *info,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_icmp *icmpinfo = matchinfo;
@@ -2200,7 +2136,9 @@
 	.targetsize	= sizeof(int),
 	.family		= AF_INET,
 #ifdef CONFIG_COMPAT
-	.compat		= &compat_ipt_standard_fn,
+	.compatsize	= sizeof(compat_int_t),
+	.compat_from_user = compat_standard_from_user,
+	.compat_to_user	= compat_standard_to_user,
 #endif
 };
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d994c5f..4158966 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -302,8 +302,7 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 	enum ip_conntrack_info ctinfo;
@@ -373,7 +372,6 @@
 	   const void *e_void,
 	   const struct xt_target *target,
            void *targinfo,
-           unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
@@ -450,8 +448,7 @@
 }
 
 /* drop reference count of cluster config when rule is deleted */
-static void destroy(const struct xt_target *target, void *targinfo,
-		    unsigned int targinfosize)
+static void destroy(const struct xt_target *target, void *targinfo)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index c8e9712..0000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* iptables module for setting the IPv4 DSCP field, Version 1.8
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as 
- * published by the Free Software Foundation.
- * 
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
-*/
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_DSCP.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
-       const struct net_device *in,
-       const struct net_device *out,
-       unsigned int hooknum,
-       const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
-{
-	const struct ipt_DSCP_info *dinfo = targinfo;
-	u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-
-	if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
-		u_int16_t diffs[2];
-
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
-			return NF_DROP;
-
-		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
-		(*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
-			| sh_dscp;
-		diffs[1] = htons((*pskb)->nh.iph->tos);
-		(*pskb)->nh.iph->check
-			= csum_fold(csum_partial((char *)diffs,
-						 sizeof(diffs),
-						 (*pskb)->nh.iph->check
-						 ^ 0xFFFF));
-	}
-	return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
-	   const void *e_void,
-	   const struct xt_target *target,
-           void *targinfo,
-           unsigned int targinfosize,
-           unsigned int hook_mask)
-{
-	const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
-
-	if ((dscp > IPT_DSCP_MAX)) {
-		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
-		return 0;
-	}
-	return 1;
-}
-
-static struct ipt_target ipt_dscp_reg = {
-	.name		= "DSCP",
-	.target		= target,
-	.targetsize	= sizeof(struct ipt_DSCP_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
-	return ipt_register_target(&ipt_dscp_reg);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
-	ipt_unregister_target(&ipt_dscp_reg);
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4adf5c9..23f9c7e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -27,32 +27,28 @@
 static inline int
 set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
-	if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
-	    != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
-		u_int16_t diffs[2];
+	struct iphdr *iph = (*pskb)->nh.iph;
+	u_int16_t oldtos;
 
+	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return 0;
-
-		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
-		(*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
-		(*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
-		diffs[1] = htons((*pskb)->nh.iph->tos);
-		(*pskb)->nh.iph->check
-			= csum_fold(csum_partial((char *)diffs,
-						 sizeof(diffs),
-						 (*pskb)->nh.iph->check
-						 ^0xFFFF));
+		iph = (*pskb)->nh.iph;
+		oldtos = iph->tos;
+		iph->tos &= ~IPT_ECN_IP_MASK;
+		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+		iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+					    iph->check);
 	} 
 	return 1;
 }
 
 /* Return 0 if there was an error. */
 static inline int
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
-	u_int16_t diffs[2];
+	u_int16_t oldval;
 
 	/* Not enought header? */
 	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -70,22 +66,16 @@
 		return 0;
 	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
 
-	if ((*pskb)->ip_summed == CHECKSUM_HW &&
-	    skb_checksum_help(*pskb, inward))
-		return 0;
-
-	diffs[0] = ((u_int16_t *)tcph)[6];
+	oldval = ((u_int16_t *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
 		tcph->ece = einfo->proto.tcp.ece;
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
-	diffs[1] = ((u_int16_t *)tcph)[6];
-	diffs[0] = diffs[0] ^ 0xFFFF;
 
-	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
-		tcph->check = csum_fold(csum_partial((char *)diffs,
-						     sizeof(diffs),
-						     tcph->check^0xFFFF));
+	tcph->check = nf_proto_csum_update((*pskb),
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 	return 1;
 }
 
@@ -95,8 +85,7 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct ipt_ECN_info *einfo = targinfo;
 
@@ -106,7 +95,7 @@
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
 	    && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
-		if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+		if (!set_ect_tcp(pskb, einfo))
 			return NF_DROP;
 
 	return IPT_CONTINUE;
@@ -117,7 +106,6 @@
 	   const void *e_void,
 	   const struct xt_target *target,
            void *targinfo,
-           unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b98f7b0..7dc820d 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -416,8 +416,7 @@
 	       const struct net_device *out,
 	       unsigned int hooknum,
 	       const struct xt_target *target,
-	       const void *targinfo,
-	       void *userinfo)
+	       const void *targinfo)
 {
 	const struct ipt_log_info *loginfo = targinfo;
 	struct nf_loginfo li;
@@ -440,7 +439,6 @@
 			      const void *e,
 			      const struct xt_target *target,
 			      void *targinfo,
-			      unsigned int targinfosize,
 			      unsigned int hook_mask)
 {
 	const struct ipt_log_info *loginfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ebd94f2..bc65168 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -42,7 +42,6 @@
 		 const void *e,
 		 const struct xt_target *target,
 		 void *targinfo,
-		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
 	const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -64,8 +63,7 @@
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
-		  const void *targinfo,
-		  void *userinfo)
+		  const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 736c4b5..beb2914 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -33,7 +33,6 @@
       const void *e,
       const struct xt_target *target,
       void *targinfo,
-      unsigned int targinfosize,
       unsigned int hook_mask)
 {
 	const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -55,8 +54,7 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f290463..f03d436 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -36,7 +36,6 @@
 	       const void *e,
 	       const struct xt_target *target,
 	       void *targinfo,
-	       unsigned int targinfosize,
 	       unsigned int hook_mask)
 {
 	const struct ip_nat_multi_range_compat *mr = targinfo;
@@ -58,8 +57,7 @@
 		const struct net_device *out,
 		unsigned int hooknum,
 		const struct xt_target *target,
-		const void *targinfo,
-		void *userinfo)
+		const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 269bc20..b81821e 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -90,6 +90,7 @@
 	fl.proto = IPPROTO_TCP;
 	fl.fl_ip_sport = tcph->dest;
 	fl.fl_ip_dport = tcph->source;
+	security_skb_classify_flow(skb, &fl);
 
 	xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
 
@@ -184,6 +185,7 @@
 	tcph->urg_ptr = 0;
 
 	/* Adjust TCP checksum */
+	nskb->ip_summed = CHECKSUM_NONE;
 	tcph->check = 0;
 	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
 				   nskb->nh.iph->saddr,
@@ -226,8 +228,7 @@
 			   const struct net_device *out,
 			   unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo,
-			   void *userinfo)
+			   const void *targinfo)
 {
 	const struct ipt_reject_info *reject = targinfo;
 
@@ -275,7 +276,6 @@
 		 const void *e_void,
 		 const struct xt_target *target,
 		 void *targinfo,
-		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ipt_reject_info *rejinfo = targinfo;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 7169b09..efbcb11 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -52,7 +52,6 @@
 	      const void *e,
 	      const struct xt_target *target,
 	      void *targinfo,
-	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
 	unsigned int count, countess, rangeip, index = 0;
@@ -116,8 +115,7 @@
 }
 
 static void 
-same_destroy(const struct xt_target *target, void *targinfo,
-		unsigned int targinfosize)
+same_destroy(const struct xt_target *target, void *targinfo)
 {
 	struct ipt_same_info *mr = targinfo;
 
@@ -133,8 +131,7 @@
 		const struct net_device *out,
 		unsigned int hooknum,
 		const struct xt_target *target,
-		const void *targinfo,
-		void *userinfo)
+		const void *targinfo)
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index ef2fe5b..4246c43 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -21,26 +21,14 @@
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables TCP MSS modification module");
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
-	u_int32_t diffs[] = { oldvalinv, newval };
-	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
-                                      oldcheck^0xFFFF));
-}
-
 static inline unsigned int
 optlen(const u_int8_t *opt, unsigned int offset)
 {
 	/* Beware zero-length options: make finite progress */
-	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
-	else return opt[offset+1];
+	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
+		return 1;
+	else
+		return opt[offset+1];
 }
 
 static unsigned int
@@ -49,8 +37,7 @@
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
-		  const void *targinfo,
-		  void *userinfo)
+		  const void *targinfo)
 {
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
 	struct tcphdr *tcph;
@@ -62,13 +49,8 @@
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	if ((*pskb)->ip_summed == CHECKSUM_HW &&
-	    skb_checksum_help(*pskb, out == NULL))
-		return NF_DROP;
-
 	iph = (*pskb)->nh.iph;
 	tcplen = (*pskb)->len - iph->ihl*4;
-
 	tcph = (void *)iph + iph->ihl*4;
 
 	/* Since it passed flags test in tcp match, we know it is is
@@ -84,54 +66,41 @@
 		return NF_DROP;
 	}
 
-	if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
-		if(!(*pskb)->dst) {
+	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
+		if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) +
+					     sizeof(struct tcphdr)) {
 			if (net_ratelimit())
-				printk(KERN_ERR
-			       		"ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
+				printk(KERN_ERR "ipt_tcpmss_target: "
+				       "unknown or invalid path-MTU (%d)\n",
+				       dst_mtu((*pskb)->dst));
 			return NF_DROP; /* or IPT_CONTINUE ?? */
 		}
 
-		if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
-			if (net_ratelimit())
-				printk(KERN_ERR
-		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
-			return NF_DROP; /* or IPT_CONTINUE ?? */
-		}
-
-		newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+		newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) -
+						 sizeof(struct tcphdr);
 	} else
 		newmss = tcpmssinfo->mss;
 
  	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
-		if ((opt[i] == TCPOPT_MSS) &&
-		    ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
-		    (opt[i+1] == TCPOLEN_MSS)) {
+	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
+		if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
+		    opt[i+1] == TCPOLEN_MSS) {
 			u_int16_t oldmss;
 
 			oldmss = (opt[i+2] << 8) | opt[i+3];
 
-			if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
-				(oldmss <= newmss))
-					return IPT_CONTINUE;
+			if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+			    oldmss <= newmss)
+				return IPT_CONTINUE;
 
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = (newmss & 0x00ff);
 
-			tcph->check = cheat_check(htons(oldmss)^0xFFFF,
-						  htons(newmss),
-						  tcph->check);
-
-			DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
-			       "->%u.%u.%u.%u:%hu changed TCP MSS option"
-			       " (from %u to %u)\n", 
-			       NIPQUAD((*pskb)->nh.iph->saddr),
-			       ntohs(tcph->source),
-			       NIPQUAD((*pskb)->nh.iph->daddr),
-			       ntohs(tcph->dest),
-			       oldmss, newmss);
-			goto retmodified;
+			tcph->check = nf_proto_csum_update(*pskb,
+							   htons(oldmss)^0xFFFF,
+							   htons(newmss),
+							   tcph->check, 0);
+			return IPT_CONTINUE;
 		}
 	}
 
@@ -143,13 +112,8 @@
 
 		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
 					 TCPOLEN_MSS, GFP_ATOMIC);
-		if (!newskb) {
-			if (net_ratelimit())
-				printk(KERN_ERR "ipt_tcpmss_target:"
-				       " unable to allocate larger skb\n");
+		if (!newskb)
 			return NF_DROP;
-		}
-
 		kfree_skb(*pskb);
 		*pskb = newskb;
 		iph = (*pskb)->nh.iph;
@@ -161,36 +125,29 @@
  	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
-				  htons(tcplen + TCPOLEN_MSS), tcph->check);
-	tcplen += TCPOLEN_MSS;
-
+	tcph->check = nf_proto_csum_update(*pskb,
+					   htons(tcplen) ^ 0xFFFF,
+				           htons(tcplen + TCPOLEN_MSS),
+					   tcph->check, 1);
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = (newmss & 0x00ff);
 
-	tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
+					   tcph->check, 0);
 
 	oldval = ((u_int16_t *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	tcph->check = cheat_check(oldval ^ 0xFFFF,
-				  ((u_int16_t *)tcph)[6], tcph->check);
+	tcph->check = nf_proto_csum_update(*pskb,
+					   oldval ^ 0xFFFF,
+					   ((u_int16_t *)tcph)[6],
+					   tcph->check, 0);
 
 	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
-	iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
-				 newtotlen, iph->check);
+	iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
+				    newtotlen, iph->check);
 	iph->tot_len = newtotlen;
-
-	DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
-	       "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
-	       NIPQUAD((*pskb)->nh.iph->saddr),
-	       ntohs(tcph->source),
-	       NIPQUAD((*pskb)->nh.iph->daddr),
-	       ntohs(tcph->dest),
-	       newmss);
-
- retmodified:
 	return IPT_CONTINUE;
 }
 
@@ -200,9 +157,9 @@
 {
 	const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
 
-	if (strcmp(m->u.kernel.match->name, "tcp") == 0
-	    && (tcpinfo->flg_cmp & TH_SYN)
-	    && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
+	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
+	    tcpinfo->flg_cmp & TH_SYN &&
+	    !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
 		return 1;
 
 	return 0;
@@ -214,17 +171,17 @@
 		      const void *e_void,
 		      const struct xt_target *target,
 		      void *targinfo,
-		      unsigned int targinfosize,
 		      unsigned int hook_mask)
 {
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
 	const struct ipt_entry *e = e_void;
 
-	if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && 
-			((hook_mask & ~((1 << NF_IP_FORWARD)
-			   	| (1 << NF_IP_LOCAL_OUT)
-			   	| (1 << NF_IP_POST_ROUTING))) != 0)) {
-		printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
+	if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU &&
+	    (hook_mask & ~((1 << NF_IP_FORWARD) |
+			   (1 << NF_IP_LOCAL_OUT) |
+			   (1 << NF_IP_POST_ROUTING))) != 0) {
+		printk("TCPMSS: path-MTU clamping only supported in "
+		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 1c7a5ca..471a4c4 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -26,27 +26,20 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
+	struct iphdr *iph = (*pskb)->nh.iph;
+	u_int16_t oldtos;
 
-	if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
-		u_int16_t diffs[2];
-
+	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return NF_DROP;
-
-		diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
-		(*pskb)->nh.iph->tos
-			= ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
-			| tosinfo->tos;
-		diffs[1] = htons((*pskb)->nh.iph->tos);
-		(*pskb)->nh.iph->check
-			= csum_fold(csum_partial((char *)diffs,
-						 sizeof(diffs),
-						 (*pskb)->nh.iph->check
-						 ^0xFFFF));
+		iph = (*pskb)->nh.iph;
+		oldtos = iph->tos;
+		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
+		iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+					    iph->check);
 	}
 	return IPT_CONTINUE;
 }
@@ -56,7 +49,6 @@
 	   const void *e_void,
 	   const struct xt_target *target,
            void *targinfo,
-           unsigned int targinfosize,
            unsigned int hook_mask)
 {
 	const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index f48892a..96e79cc 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -23,11 +23,10 @@
 ipt_ttl_target(struct sk_buff **pskb,
 	       const struct net_device *in, const struct net_device *out,
 	       unsigned int hooknum, const struct xt_target *target,
-	       const void *targinfo, void *userinfo)
+	       const void *targinfo)
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = targinfo;
-	u_int16_t diffs[2];
 	int new_ttl;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
@@ -55,12 +54,10 @@
 	}
 
 	if (new_ttl != iph->ttl) {
-		diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+		iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF,
+					    ntohs(new_ttl << 8),
+					    iph->check);
 		iph->ttl = new_ttl;
-		diffs[1] = htons(((unsigned)iph->ttl) << 8);
-		iph->check = csum_fold(csum_partial((char *)diffs,
-						    sizeof(diffs),
-						    iph->check^0xFFFF));
 	}
 
 	return IPT_CONTINUE;
@@ -70,7 +67,6 @@
 		const void *e,
 		const struct xt_target *target,
 		void *targinfo,
-		unsigned int targinfosize,
 		unsigned int hook_mask)
 {
 	struct ipt_TTL_info *info = targinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d46fd67..2b104ea 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -308,7 +308,7 @@
 				    const struct net_device *out,
 				    unsigned int hooknum,
 				    const struct xt_target *target,
-				    const void *targinfo, void *userinfo)
+				    const void *targinfo)
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
 
@@ -346,7 +346,6 @@
 			       const void *e,
 			       const struct xt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hookmask)
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 2927135..1798f86 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -74,7 +74,6 @@
 	   const void *ip_void,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_ah *ahinfo = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 4717759..0000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* IP tables module for matching the value of the IPv4 DSCP field
- *
- * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_dscp.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb,
-		 const struct net_device *in, const struct net_device *out,
-		 const struct xt_match *match, const void *matchinfo,
-		 int offset, unsigned int protoff, int *hotdrop)
-{
-	const struct ipt_dscp_info *info = matchinfo;
-	const struct iphdr *iph = skb->nh.iph;
-
-	u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-	return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
-}
-
-static struct ipt_match dscp_match = {
-	.name		= "dscp",
-	.match		= match,
-	.matchsize	= sizeof(struct ipt_dscp_info),
-	.me		= THIS_MODULE,
-};
-
-static int __init ipt_dscp_init(void)
-{
-	return ipt_register_match(&dscp_match);
-}
-
-static void __exit ipt_dscp_fini(void)
-{
-	ipt_unregister_match(&dscp_match);
-
-}
-
-module_init(ipt_dscp_init);
-module_exit(ipt_dscp_fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b282504..dafbdec 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -88,8 +88,7 @@
 
 static int checkentry(const char *tablename, const void *ip_void,
 		      const struct xt_match *match,
-		      void *matchinfo, unsigned int matchsize,
-		      unsigned int hook_mask)
+		      void *matchinfo, unsigned int hook_mask)
 {
 	const struct ipt_ecn_info *info = matchinfo;
 	const struct ipt_ip *ip = ip_void;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 3bd2368..4f73a61 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -478,7 +478,6 @@
 		     const void *inf,
 		     const struct xt_match *match,
 		     void *matchinfo,
-		     unsigned int matchsize,
 		     unsigned int hook_mask)
 {
 	struct ipt_hashlimit_info *r = matchinfo;
@@ -529,18 +528,46 @@
 }
 
 static void
-hashlimit_destroy(const struct xt_match *match, void *matchinfo,
-		  unsigned int matchsize)
+hashlimit_destroy(const struct xt_match *match, void *matchinfo)
 {
 	struct ipt_hashlimit_info *r = matchinfo;
 
 	htable_put(r->hinfo);
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_hashlimit_info {
+	char name[IFNAMSIZ];
+	struct hashlimit_cfg cfg;
+	compat_uptr_t hinfo;
+	compat_uptr_t master;
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+	memcpy(dst, src, off);
+	memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off);
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	int off = offsetof(struct compat_ipt_hashlimit_info, hinfo);
+
+	return copy_to_user(dst, src, off) ? -EFAULT : 0;
+}
+#endif
+
 static struct ipt_match ipt_hashlimit = {
 	.name		= "hashlimit",
 	.match		= hashlimit_match,
 	.matchsize	= sizeof(struct ipt_hashlimit_info),
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ipt_hashlimit_info),
+	.compat_from_user = compat_from_user,
+	.compat_to_user	= compat_to_user,
+#endif
 	.checkentry	= hashlimit_checkentry,
 	.destroy	= hashlimit_destroy,
 	.me		= THIS_MODULE
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 5ac6ac0..78c336f1 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -56,7 +56,6 @@
            const void *ip,
 	   const struct xt_match *match,
            void *matchinfo,
-           unsigned int matchsize,
            unsigned int hook_mask)
 {
 	const struct ipt_owner_info *info = matchinfo;
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 61a2139..32ae8d7 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -35,14 +35,20 @@
 static unsigned int ip_pkt_list_tot = 20;
 static unsigned int ip_list_hash_size = 0;
 static unsigned int ip_list_perms = 0644;
+static unsigned int ip_list_uid = 0;
+static unsigned int ip_list_gid = 0;
 module_param(ip_list_tot, uint, 0400);
 module_param(ip_pkt_list_tot, uint, 0400);
 module_param(ip_list_hash_size, uint, 0400);
 module_param(ip_list_perms, uint, 0400);
+module_param(ip_list_uid, uint, 0400);
+module_param(ip_list_gid, uint, 0400);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
 
 
 struct recent_entry {
@@ -232,7 +238,7 @@
 static int
 ipt_recent_checkentry(const char *tablename, const void *ip,
 		      const struct xt_match *match, void *matchinfo,
-		      unsigned int matchsize, unsigned int hook_mask)
+		      unsigned int hook_mask)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
@@ -274,6 +280,8 @@
 		goto out;
 	}
 	t->proc->proc_fops = &recent_fops;
+	t->proc->uid       = ip_list_uid;
+	t->proc->gid       = ip_list_gid;
 	t->proc->data      = t;
 #endif
 	spin_lock_bh(&recent_lock);
@@ -286,8 +294,7 @@
 }
 
 static void
-ipt_recent_destroy(const struct xt_match *match, void *matchinfo,
-		   unsigned int matchsize)
+ipt_recent_destroy(const struct xt_match *match, void *matchinfo)
 {
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 7f41748..e2e7dd8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -90,7 +90,7 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
@@ -108,7 +108,7 @@
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e7998b..79336cb 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -119,7 +119,7 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
@@ -148,7 +148,7 @@
 	daddr = (*pskb)->nh.iph->daddr;
 	tos = (*pskb)->nh.iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
 	    && ((*pskb)->nh.iph->saddr != saddr
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7912cce..bcbeb4a 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -95,7 +95,7 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
+	return ipt_do_table(pskb, hook, in, out, &packet_raw);
 }
 
 /* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 663a73e..790f00d 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -25,7 +25,7 @@
 #include <net/netfilter/nf_conntrack_protocol.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-unsigned long nf_ct_icmp_timeout = 30*HZ;
+unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d61e2a9..9c6cbe3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -173,6 +173,8 @@
 	SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
 	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 62b2762..0e935b4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,8 +38,7 @@
  *		as published by the Free Software Foundation; either version
  *		2 of the License, or (at your option) any later version.
  */
- 
-#include <linux/config.h> 
+
 #include <linux/types.h>
 #include <asm/atomic.h>
 #include <asm/byteorder.h>
@@ -484,6 +483,7 @@
 		if (!inet->hdrincl)
 			raw_probe_proto_opt(&fl, msg);
 
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 	}
 	if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b873cbc..20ffe8e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2639,51 +2639,54 @@
 {
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtmsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	struct nlmsghdr *nlh;
 	struct rta_cacheinfo ci;
-#ifdef CONFIG_IP_MROUTE
-	struct rtattr *eptr;
-#endif
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
-	r = NLMSG_DATA(nlh);
+
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	r = nlmsg_data(nlh);
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= rt->fl.fl4_tos;
 	r->rtm_table	= RT_TABLE_MAIN;
+	NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
 	r->rtm_protocol = RTPROT_UNSPEC;
 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
 	if (rt->rt_flags & RTCF_NOTIFY)
 		r->rtm_flags |= RTM_F_NOTIFY;
-	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+
+	NLA_PUT_U32(skb, RTA_DST, rt->rt_dst);
+
 	if (rt->fl.fl4_src) {
 		r->rtm_src_len = 32;
-		RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+		NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
 	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
 	if (rt->u.dst.tclassid)
-		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-	if (rt->rt_multipath_alg != IP_MP_ALG_NONE) {
-		__u32 alg = rt->rt_multipath_alg;
-
-		RTA_PUT(skb, RTA_MP_ALGO, 4, &alg);
-	}
+	if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+		NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
 #endif
 	if (rt->fl.iif)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+		NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst);
 	else if (rt->rt_src != rt->fl.fl4_src)
-		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+		NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src);
+
 	if (rt->rt_dst != rt->rt_gateway)
-		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+		NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway);
+
 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	ci.rta_lastuse	= jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	ci.rta_used	= rt->u.dst.__use;
 	ci.rta_clntref	= atomic_read(&rt->u.dst.__refcnt);
@@ -2700,10 +2703,7 @@
 			ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
 		}
 	}
-#ifdef CONFIG_IP_MROUTE
-	eptr = (struct rtattr*)skb->tail;
-#endif
-	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+
 	if (rt->fl.iif) {
 #ifdef CONFIG_IP_MROUTE
 		u32 dst = rt->rt_dst;
@@ -2715,41 +2715,46 @@
 				if (!nowait) {
 					if (err == 0)
 						return 0;
-					goto nlmsg_failure;
+					goto nla_put_failure;
 				} else {
 					if (err == -EMSGSIZE)
-						goto nlmsg_failure;
-					((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err;
+						goto nla_put_failure;
+					ci.rta_error = err;
 				}
 			}
 		} else
 #endif
-			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
 int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	struct rtmsg *rtm = NLMSG_DATA(nlh);
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
 	struct rtable *rt = NULL;
-	u32 dst = 0;
-	u32 src = 0;
-	int iif = 0;
-	int err = -ENOBUFS;
+	u32 dst, src, iif;
+	int err;
 	struct sk_buff *skb;
 
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
+	if (err < 0)
+		goto errout;
+
+	rtm = nlmsg_data(nlh);
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		goto out;
+	if (skb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
@@ -2760,62 +2765,61 @@
 	skb->nh.iph->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
-	if (rta[RTA_SRC - 1])
-		memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4);
-	if (rta[RTA_DST - 1])
-		memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4);
-	if (rta[RTA_IIF - 1])
-		memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int));
+	src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0;
+	dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0;
+	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 
 	if (iif) {
-		struct net_device *dev = __dev_get_by_index(iif);
-		err = -ENODEV;
-		if (!dev)
-			goto out_free;
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(iif);
+		if (dev == NULL) {
+			err = -ENODEV;
+			goto errout_free;
+		}
+
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
 		local_bh_disable();
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
-		rt = (struct rtable*)skb->dst;
-		if (!err && rt->u.dst.error)
+
+		rt = (struct rtable*) skb->dst;
+		if (err == 0 && rt->u.dst.error)
 			err = -rt->u.dst.error;
 	} else {
-		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
-							 .saddr = src,
-							 .tos = rtm->rtm_tos } } };
-		int oif = 0;
-		if (rta[RTA_OIF - 1])
-			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
-		fl.oif = oif;
+		struct flowi fl = {
+			.nl_u = {
+				.ip4_u = {
+					.daddr = dst,
+					.saddr = src,
+					.tos = rtm->rtm_tos,
+				},
+			},
+			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+		};
 		err = ip_route_output_key(&rt, &fl);
 	}
+
 	if (err)
-		goto out_free;
+		goto errout_free;
 
 	skb->dst = &rt->u.dst;
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-
 	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 				RTM_NEWROUTE, 0, 0);
-	if (!err)
-		goto out_free;
-	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
-	}
+	if (err <= 0)
+		goto errout_free;
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:	return err;
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout:
+	return err;
 
-out_free:
+errout_free:
 	kfree_skb(skb);
-	goto out;
+	goto errout;
 }
 
 int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
@@ -3143,13 +3147,9 @@
 	}
 #endif
 
-	ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
-						     sizeof(struct rtable),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-
-	if (!ipv4_dst_ops.kmem_cachep)
-		panic("IP: failed to allocate ip_dst_cache\n");
+	ipv4_dst_ops.kmem_cachep =
+		kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	rt_hash_table = (struct rt_hash_bucket *)
 		alloc_large_system_hash("IP route cache",
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e20be33..661e0a4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,6 +214,10 @@
 	if (!req)
 		goto out;
 
+	if (security_inet_conn_request(sk, skb, req)) {
+		reqsk_free(req);
+		goto out;
+	}
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
 	treq->rcv_isn		= htonl(skb->h.th->seq) - 1;
@@ -259,6 +263,7 @@
 				    .uli_u = { .ports =
 					       { .sport = skb->h.th->dest,
 						 .dport = skb->h.th->source } } };
+		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
 			goto out; 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70cea9d..19b2071 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
+#include <net/cipso_ipv4.h>
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
@@ -697,6 +698,40 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+#ifdef CONFIG_NETLABEL
+	{
+		.ctl_name	= NET_CIPSOV4_CACHE_ENABLE,
+		.procname	= "cipso_cache_enable",
+		.data		= &cipso_v4_cache_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_CACHE_BUCKET_SIZE,
+		.procname	= "cipso_cache_bucket_size",
+		.data		= &cipso_v4_cache_bucketsize,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_RBM_OPTFMT,
+		.procname	= "cipso_rbm_optfmt",
+		.data		= &cipso_v4_rbm_optfmt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= NET_CIPSOV4_RBM_STRICTVALID,
+		.procname	= "cipso_rbm_strictvalid",
+		.data		= &cipso_v4_rbm_strictvalid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif /* CONFIG_NETLABEL */
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 934396b..66e9a72 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,7 +268,7 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
-int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
 DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
 
@@ -568,7 +568,7 @@
 		skb->truesize += copy;
 		sk->sk_wmem_queued += copy;
 		sk->sk_forward_alloc -= copy;
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		tp->write_seq += copy;
 		TCP_SKB_CB(skb)->end_seq += copy;
 		skb_shinfo(skb)->gso_segs = 0;
@@ -723,7 +723,7 @@
 				 * Check whether we can use HW checksum.
 				 */
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
-					skb->ip_summed = CHECKSUM_HW;
+					skb->ip_summed = CHECKSUM_PARTIAL;
 
 				skb_entail(sk, tp, skb);
 				copy = size_goal;
@@ -955,8 +955,11 @@
 		     * receive buffer and there was a small segment
 		     * in queue.
 		     */
-		    (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
-		     !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
+		    (copied > 0 &&
+		     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
+		      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
+		       !icsk->icsk_ack.pingpong)) &&
+		      !atomic_read(&sk->sk_rmem_alloc)))
 			time_to_ack = 1;
 	}
 
@@ -2205,7 +2208,7 @@
 		th->fin = th->psh = 0;
 
 		th->check = ~csum_fold(th->check + delta);
-		if (skb->ip_summed != CHECKSUM_HW)
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			th->check = csum_fold(csum_partial(skb->h.raw, thlen,
 							   skb->csum));
 
@@ -2219,7 +2222,7 @@
 
 	delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
 	th->check = ~csum_fold(th->check + delta);
-	if (skb->ip_summed != CHECKSUM_HW)
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
 		th->check = csum_fold(csum_partial(skb->h.raw, thlen,
 						   skb->csum));
 
@@ -2254,9 +2257,7 @@
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
-				  SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (!tcp_hashinfo.bind_bucket_cachep)
-		panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
 	/* Size and allocate the main established and bind bucket
 	 * hash tables.
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b0134ab..5730333 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -231,7 +231,7 @@
 
 static int __init bictcp_register(void)
 {
-	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&bictcp);
 }
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 2be2798..a60ef38 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -358,7 +358,7 @@
 
 static int __init cubictcp_register(void)
 {
-	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 
 	/* Precompute a bunch of the scaling factors that are used per-packet
 	 * based on SRTT of 100ms
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index fa3e1aa..c4fc811 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -189,7 +189,7 @@
 
 static int __init hstcp_register(void)
 {
-	BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_highspeed);
 }
 
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 6edfe5e..682e7d5 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -286,7 +286,7 @@
 
 static int __init htcp_register(void)
 {
-	BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
 	BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
 	return tcp_register_congestion_control(&htcp);
 }
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 7406e0c..59e691d 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -170,7 +170,7 @@
 
 static int __init hybla_register(void)
 {
-	BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_hybla);
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 159fa3f..b3def0d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,24 +72,24 @@
 #include <asm/unaligned.h>
 #include <net/netdma.h>
 
-int sysctl_tcp_timestamps = 1;
-int sysctl_tcp_window_scaling = 1;
-int sysctl_tcp_sack = 1;
-int sysctl_tcp_fack = 1;
-int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn;
-int sysctl_tcp_dsack = 1;
-int sysctl_tcp_app_win = 31;
-int sysctl_tcp_adv_win_scale = 2;
+int sysctl_tcp_timestamps __read_mostly = 1;
+int sysctl_tcp_window_scaling __read_mostly = 1;
+int sysctl_tcp_sack __read_mostly = 1;
+int sysctl_tcp_fack __read_mostly = 1;
+int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_dsack __read_mostly = 1;
+int sysctl_tcp_app_win __read_mostly = 31;
+int sysctl_tcp_adv_win_scale __read_mostly = 2;
 
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_frto;
-int sysctl_tcp_nometrics_save;
+int sysctl_tcp_stdurg __read_mostly;
+int sysctl_tcp_rfc1337 __read_mostly;
+int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_nometrics_save __read_mostly;
 
-int sysctl_tcp_moderate_rcvbuf = 1;
-int sysctl_tcp_abc;
+int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
+int sysctl_tcp_abc __read_mostly;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -127,7 +127,7 @@
 	/* skb->len may jitter because of SACKs, even if peer
 	 * sends good full-sized frames.
 	 */
-	len = skb->len;
+	len = skb_shinfo(skb)->gso_size ?: skb->len;
 	if (len >= icsk->icsk_ack.rcv_mss) {
 		icsk->icsk_ack.rcv_mss = len;
 	} else {
@@ -156,6 +156,8 @@
 				return;
 			}
 		}
+		if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
+			icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
 		icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
 	}
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b04c3e..39b1798 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -78,8 +78,8 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-int sysctl_tcp_tw_reuse;
-int sysctl_tcp_low_latency;
+int sysctl_tcp_tw_reuse __read_mostly;
+int sysctl_tcp_low_latency __read_mostly;
 
 /* Check TCP sequence numbers in ICMP packets. */
 #define ICMP_MIN_LENGTH 8
@@ -484,7 +484,7 @@
 	struct inet_sock *inet = inet_sk(sk);
 	struct tcphdr *th = skb->h.th;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
 		skb->csum = offsetof(struct tcphdr, check);
 	} else {
@@ -509,7 +509,7 @@
 	th->check = 0;
 	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
 	skb->csum = offsetof(struct tcphdr, check);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -798,6 +798,9 @@
 
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	if (security_inet_conn_request(sk, skb, req))
+		goto drop_and_free;
+
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
@@ -948,9 +951,9 @@
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
-					th->source, skb->nh.iph->daddr,
-					ntohs(th->dest), inet_iif(skb));
+	nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
+				      th->source, skb->nh.iph->daddr,
+				      th->dest, inet_iif(skb));
 
 	if (nsk) {
 		if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -970,7 +973,7 @@
 
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
 				  skb->nh.iph->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1087,7 +1090,7 @@
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
 	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
-			   skb->nh.iph->daddr, ntohs(th->dest),
+			   skb->nh.iph->daddr, th->dest,
 			   inet_iif(skb));
 
 	if (!sk)
@@ -1101,7 +1104,7 @@
 		goto discard_and_relse;
 	nf_reset(skb);
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
@@ -1165,7 +1168,7 @@
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
 							skb->nh.iph->daddr,
-							ntohs(th->dest),
+							th->dest,
 							inet_iif(skb));
 		if (sk2) {
 			inet_twsk_deschedule((struct inet_timewait_sock *)sk,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 48f28d6..308fb7e 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -35,7 +35,6 @@
  * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <net/tcp.h>
 
@@ -328,7 +327,7 @@
 
 static int __init tcp_lp_register(void)
 {
-	BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_lp);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624e2b2..0163d98 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,8 +34,8 @@
 #define SYNC_INIT 1
 #endif
 
-int sysctl_tcp_syncookies = SYNC_INIT; 
-int sysctl_tcp_abort_on_overflow;
+int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
+int sysctl_tcp_abort_on_overflow __read_mostly;
 
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4f3ffe..061edfa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -43,24 +43,24 @@
 #include <linux/smp_lock.h>
 
 /* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse = 1;
+int sysctl_tcp_retrans_collapse __read_mostly = 1;
 
 /* People can turn this on to  work with those rare, broken TCPs that
  * interpret the window field as a signed quantity.
  */
-int sysctl_tcp_workaround_signed_windows = 0;
+int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
 
 /* This limits the percentage of the congestion window which we
  * will allow a single TSO frame to consume.  Building TSO frames
  * which are too large can cause TCP streams to be bursty.
  */
-int sysctl_tcp_tso_win_divisor = 3;
+int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-int sysctl_tcp_mtu_probing = 0;
-int sysctl_tcp_base_mss = 512;
+int sysctl_tcp_mtu_probing __read_mostly = 0;
+int sysctl_tcp_base_mss __read_mostly = 512;
 
 /* By default, RFC2861 behavior.  */
-int sysctl_tcp_slow_start_after_idle = 1;
+int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
 static void update_send_head(struct sock *sk, struct tcp_sock *tp,
 			     struct sk_buff *skb)
@@ -577,7 +577,7 @@
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
-	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
+	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Copy and checksum data tail into the new buffer. */
 		buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
 						       nsize, 0);
@@ -586,7 +586,7 @@
 
 		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
 	} else {
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb_split(skb, buff, len);
 	}
 
@@ -689,7 +689,7 @@
 		__pskb_trim_head(skb, len - skb_headlen(skb));
 
 	TCP_SKB_CB(skb)->seq += len;
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 
 	skb->truesize	     -= len;
 	sk->sk_wmem_queued   -= len;
@@ -1062,7 +1062,7 @@
 	/* This packet was never sent out yet, so no SACK bits. */
 	TCP_SKB_CB(buff)->sacked = 0;
 
-	buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
+	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
 	skb_split(skb, buff, len);
 
 	/* Fix up tso_factor for both original and new SKB.  */
@@ -1206,8 +1206,7 @@
 	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
-	if (skb->ip_summed == CHECKSUM_HW)
-		nskb->ip_summed = CHECKSUM_HW;
+	nskb->ip_summed = skb->ip_summed;
 
 	len = 0;
 	while (len < probe_size) {
@@ -1231,7 +1230,7 @@
 			                           ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
-				if (skb->ip_summed != CHECKSUM_HW)
+				if (skb->ip_summed != CHECKSUM_PARTIAL)
 					skb->csum = csum_partial(skb->data, skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
@@ -1572,10 +1571,9 @@
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
 
-		if (next_skb->ip_summed == CHECKSUM_HW)
-			skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = next_skb->ip_summed;
 
-		if (skb->ip_summed != CHECKSUM_HW)
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
 			skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
 
 		/* Update sequence range on original skb. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7c1bde3..fb09ade 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -23,14 +23,14 @@
 #include <linux/module.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 
-int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 
-int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
-int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
-int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
-int sysctl_tcp_retries1 = TCP_RETR1;
-int sysctl_tcp_retries2 = TCP_RETR2;
-int sysctl_tcp_orphan_retries;
+int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
+int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
+int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
+int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
+int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
+int sysctl_tcp_orphan_retries __read_mostly;
 
 static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 490360b..a3b7aa0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -370,7 +370,7 @@
 
 static int __init tcp_vegas_register(void)
 {
-	BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE);
 	tcp_register_congestion_control(&tcp_vegas);
 	return 0;
 }
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 11b42a7..ce57bf3 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -9,7 +9,6 @@
  * 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
  */
 
-#include <linux/config.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -213,7 +212,7 @@
 
 static int __init tcp_veno_register(void)
 {
-	BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
 	tcp_register_congestion_control(&tcp_veno);
 	return 0;
 }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 5446312..4f42a86 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -289,7 +289,7 @@
 
 static int __init tcp_westwood_register(void)
 {
-	BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_westwood);
 }
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f136cec..77e265d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -118,14 +118,33 @@
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
-/* Shared by v4/v6 udp. */
-int udp_port_rover;
+static int udp_port_rover;
 
-static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_lport_inuse(u16 num)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+
+	sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
+		if (inet_sk(sk)->num == num)
+			return 1;
+	return 0;
+}
+
+/**
+ *  udp_get_port  -  common port lookup for IPv4 and IPv6
+ *
+ *  @sk:          socket struct in question
+ *  @snum:        port number to look up
+ *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
+ */
+int udp_get_port(struct sock *sk, unsigned short snum,
+		 int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2))
 {
 	struct hlist_node *node;
+	struct hlist_head *head;
 	struct sock *sk2;
-	struct inet_sock *inet = inet_sk(sk);
+	int    error = 1;
 
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
@@ -137,11 +156,10 @@
 		best_size_so_far = 32767;
 		best = result = udp_port_rover;
 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-			struct hlist_head *list;
 			int size;
 
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-			if (hlist_empty(list)) {
+			head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+			if (hlist_empty(head)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
 						((result - sysctl_local_port_range[0]) &
@@ -149,12 +167,11 @@
 				goto gotit;
 			}
 			size = 0;
-			sk_for_each(sk2, node, list)
-				if (++size >= best_size_so_far)
-					goto next;
-			best_size_so_far = size;
-			best = result;
-		next:;
+			sk_for_each(sk2, node, head)
+				if (++size < best_size_so_far) {
+					best_size_so_far = size;
+					best = result;
+				}
 		}
 		result = best;
 		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
@@ -170,38 +187,44 @@
 gotit:
 		udp_port_rover = snum = result;
 	} else {
-		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-			struct inet_sock *inet2 = inet_sk(sk2);
+		head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
 
-			if (inet2->num == snum &&
-			    sk2 != sk &&
-			    !ipv6_only_sock(sk2) &&
-			    (!sk2->sk_bound_dev_if ||
-			     !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!inet2->rcv_saddr ||
-			     !inet->rcv_saddr ||
-			     inet2->rcv_saddr == inet->rcv_saddr) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse))
+		sk_for_each(sk2, node, head)
+			if (inet_sk(sk2)->num == snum                        &&
+			    sk2 != sk                                        &&
+			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
+			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+			    (*saddr_cmp)(sk, sk2)                              )
 				goto fail;
-		}
 	}
-	inet->num = snum;
+	inet_sk(sk)->num = snum;
 	if (sk_unhashed(sk)) {
-		struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
-
-		sk_add_node(sk, h);
+		head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+		sk_add_node(sk, head);
 		sock_prot_inc_use(sk->sk_prot);
 	}
-	write_unlock_bh(&udp_hash_lock);
-	return 0;
-
+	error = 0;
 fail:
 	write_unlock_bh(&udp_hash_lock);
-	return 1;
+	return error;
 }
 
+static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+{
+	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
+
+	return 	( !ipv6_only_sock(sk2)  &&
+		  (!inet1->rcv_saddr || !inet2->rcv_saddr ||
+		   inet1->rcv_saddr == inet2->rcv_saddr      ));
+}
+
+static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+	return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
+}
+
+
 static void udp_v4_hash(struct sock *sk)
 {
 	BUG();
@@ -429,7 +452,7 @@
 		/*
 		 * Only one fragment on the socket.
 		 */
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			skb->csum = offsetof(struct udphdr, check);
 			uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst,
 					up->len, IPPROTO_UDP, 0);
@@ -448,7 +471,7 @@
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together.
 		 */
-		if (skb->ip_summed == CHECKSUM_HW) {
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			int offset = (unsigned char *)uh - skb->data;
 			skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -603,6 +626,7 @@
 				    .uli_u = { .ports =
 					       { .sport = inet->sport,
 						 .dport = dport } } };
+		security_sk_classify_flow(sk, &fl);
 		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
 		if (err)
 			goto out;
@@ -661,6 +685,16 @@
 		UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
 		return len;
 	}
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+	}
 	return err;
 
 do_confirm:
@@ -980,6 +1014,7 @@
 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
 	struct udp_sock *up = udp_sk(sk);
+	int rc;
 
 	/*
 	 *	Charge it to the socket, dropping if the queue is full.
@@ -1026,7 +1061,10 @@
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
+	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return -1;
@@ -1087,7 +1125,7 @@
 {
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else if (skb->ip_summed == CHECKSUM_HW) {
+	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -1581,7 +1619,7 @@
 EXPORT_SYMBOL(udp_hash);
 EXPORT_SYMBOL(udp_hash_lock);
 EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_port_rover);
+EXPORT_SYMBOL(udp_get_port);
 EXPORT_SYMBOL(udp_prot);
 EXPORT_SYMBOL(udp_sendmsg);
 EXPORT_SYMBOL(udp_poll);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 817ed84..040e847 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -106,7 +106,7 @@
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode) {
+		if (x->props.mode == XFRM_MODE_TUNNEL) {
 			decaps = 1;
 			break;
 		}
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a9e6b3d..92676b7 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -21,9 +21,8 @@
  * On exit, skb->h will be set to the start of the payload to be processed
  * by x->type->output and skb->nh will be set to the top IP header.
  */
-static int xfrm4_transport_output(struct sk_buff *skb)
+static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_state *x;
 	struct iphdr *iph;
 	int ihl;
 
@@ -33,7 +32,6 @@
 	ihl = iph->ihl * 4;
 	skb->h.raw += ihl;
 
-	x = skb->dst->xfrm;
 	skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 13cafbe..e23c21d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -33,10 +33,9 @@
  * On exit, skb->h will be set to the start of the payload to be processed
  * by x->type->output and skb->nh will be set to the top IP header.
  */
-static int xfrm4_tunnel_output(struct sk_buff *skb)
+static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	struct iphdr *iph, *top_iph;
 	int flags;
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d16f863..04403fb0 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -48,13 +48,13 @@
 	struct xfrm_state *x = dst->xfrm;
 	int err;
 	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
 		if (err)
 			goto error_nolock;
 	}
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = xfrm4_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
@@ -66,7 +66,7 @@
 		if (err)
 			goto error;
 
-		err = x->mode->output(skb);
+		err = x->mode->output(x, skb);
 		if (err)
 			goto error;
 
@@ -85,7 +85,7 @@
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !x->props.mode);
+	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8f50eae..eabcd27 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -21,6 +21,25 @@
 	return __ip_route_output_key((struct rtable**)dst, fl);
 }
 
+static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct rtable *rt;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip4_u = {
+				.daddr = daddr->a4,
+			},
+		},
+	};
+
+	if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+		saddr->a4 = rt->rt_src;
+		dst_release(&rt->u.dst);
+		return 0;
+	}
+	return -EHOSTUNREACH;
+}
+
 static struct dst_entry *
 __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 {
@@ -33,7 +52,7 @@
 		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
 	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
 	    	    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET)) {
+		    xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
 			dst_clone(dst);
 			break;
 		}
@@ -93,10 +112,11 @@
 
 		xdst = (struct xfrm_dst *)dst1;
 		xdst->route = &rt->u.dst;
+		xdst->genid = xfrm[i]->genid;
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			remote = xfrm[i]->id.daddr.a4;
 			local  = xfrm[i]->props.saddr.a4;
 			tunnel = 1;
@@ -135,6 +155,7 @@
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
 		dst_prev->header_len	= header_len;
+		dst_prev->nfheader_len	= 0;
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
@@ -296,6 +317,7 @@
 	.family = 		AF_INET,
 	.dst_ops =		&xfrm4_dst_ops,
 	.dst_lookup =		xfrm4_dst_lookup,
+	.get_saddr =		xfrm4_get_saddr,
 	.find_bundle = 		__xfrm4_find_bundle,
 	.bundle_create =	__xfrm4_bundle_create,
 	.decode_session =	_decode_session4,
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 81e1751..fe20344 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -42,99 +42,15 @@
 	x->props.saddr = tmpl->saddr;
 	if (x->props.saddr.a4 == 0)
 		x->props.saddr.a4 = saddr->a4;
-	if (tmpl->mode && x->props.saddr.a4 == 0) {
-		struct rtable *rt;
-	        struct flowi fl_tunnel = {
-        	        .nl_u = {
-        			.ip4_u = {
-					.daddr = x->id.daddr.a4,
-				}
-			}
-		};
-		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
-		                     &fl_tunnel, AF_INET)) {
-			x->props.saddr.a4 = rt->rt_src;
-			dst_release(&rt->u.dst);
-		}
-	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET;
 }
 
-static struct xfrm_state *
-__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
-{
-	unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
-	struct xfrm_state *x;
-
-	list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
-		if (x->props.family == AF_INET &&
-		    spi == x->id.spi &&
-		    daddr->a4 == x->id.daddr.a4 &&
-		    proto == x->id.proto) {
-			xfrm_state_hold(x);
-			return x;
-		}
-	}
-	return NULL;
-}
-
-static struct xfrm_state *
-__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, 
-		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
-		 int create)
-{
-	struct xfrm_state *x, *x0;
-	unsigned h = __xfrm4_dst_hash(daddr);
-
-	x0 = NULL;
-
-	list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
-		if (x->props.family == AF_INET &&
-		    daddr->a4 == x->id.daddr.a4 &&
-		    mode == x->props.mode &&
-		    proto == x->id.proto &&
-		    saddr->a4 == x->props.saddr.a4 &&
-		    reqid == x->props.reqid &&
-		    x->km.state == XFRM_STATE_ACQ &&
-		    !x->id.spi) {
-			    x0 = x;
-			    break;
-		    }
-	}
-	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
-		x0->sel.daddr.a4 = daddr->a4;
-		x0->sel.saddr.a4 = saddr->a4;
-		x0->sel.prefixlen_d = 32;
-		x0->sel.prefixlen_s = 32;
-		x0->props.saddr.a4 = saddr->a4;
-		x0->km.state = XFRM_STATE_ACQ;
-		x0->id.daddr.a4 = daddr->a4;
-		x0->id.proto = proto;
-		x0->props.family = AF_INET;
-		x0->props.mode = mode;
-		x0->props.reqid = reqid;
-		x0->props.family = AF_INET;
-		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-		xfrm_state_hold(x0);
-		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
-		add_timer(&x0->timer);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
-		wake_up(&km_waitq);
-	}
-	if (x0)
-		xfrm_state_hold(x0);
-	return x0;
-}
-
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.init_flags		= xfrm4_init_flags,
 	.init_tempsel		= __xfrm4_init_tempsel,
-	.state_lookup		= __xfrm4_state_lookup,
-	.find_acq		= __xfrm4_find_acq,
 };
 
 void __init xfrm4_state_init(void)
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index f8ceaa1..f110af5 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -28,7 +28,7 @@
 
 static int ipip_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e923d4d..a2d211d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -77,6 +77,7 @@
 	select CRYPTO
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
+	select CRYPTO_CBC
 	select CRYPTO_SHA1
 	select CRYPTO_DES
 	---help---
@@ -97,6 +98,15 @@
 
 	  If unsure, say Y.
 
+config IPV6_MIP6
+	bool "IPv6: Mobility (EXPERIMENTAL)"
+	depends on IPV6 && EXPERIMENTAL
+	select XFRM
+	---help---
+	  Support for IPv6 Mobility described in RFC 3775.
+
+	  If unsure, say N.
+
 config INET6_XFRM_TUNNEL
 	tristate
 	select INET6_TUNNEL
@@ -126,6 +136,13 @@
 
 	  If unsure, say Y.
 
+config INET6_XFRM_MODE_ROUTEOPTIMIZATION
+	tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
+	depends on IPV6 && EXPERIMENTAL
+	select XFRM
+	---help---
+	  Support for MIPv6 route optimization mode.
+
 config IPV6_TUNNEL
 	tristate "IPv6: IPv6-in-IPv6 tunnel"
 	select INET6_TUNNEL
@@ -135,3 +152,31 @@
 
 	  If unsure, say N.
 
+config IPV6_SUBTREES
+	bool "IPv6: source address based routing"
+	depends on IPV6 && EXPERIMENTAL
+	---help---
+	  Enable routing by source address or prefix.
+
+	  The destination address is still the primary routing key, so mixing
+	  normal and source prefix specific routes in the same routing table
+	  may sometimes lead to unintended routing behavior.  This can be
+	  avoided by defining different routing tables for the normal and
+	  source prefix specific routes.
+
+	  If unsure, say N.
+
+config IPV6_MULTIPLE_TABLES
+	bool "IPv6: Multiple Routing Tables"
+	depends on IPV6 && EXPERIMENTAL
+	select FIB_RULES
+	---help---
+	  Support multiple routing tables.
+
+config IPV6_ROUTE_FWMARK
+	bool "IPv6: use netfilter MARK value as routing key"
+	depends on IPV6_MULTIPLE_TABLES && NETFILTER
+	---help---
+	  If you say Y here, you will be able to specify different routes for
+	  packets with different mark values (see iptables(8), MARK target).
+
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 386e0a6..0213c66 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -13,6 +13,9 @@
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+
 ipv6-objs += $(ipv6-y)
 
 obj-$(CONFIG_INET6_AH) += ah6.o
@@ -22,6 +25,7 @@
 obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
 obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
 obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
 
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c7852b3..c186763 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -48,6 +48,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/if_arcnet.h>
 #include <linux/if_infiniband.h>
@@ -72,6 +73,7 @@
 #include <net/addrconf.h>
 #include <net/tcp.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 
@@ -117,9 +119,6 @@
 static struct inet6_ifaddr		*inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_RWLOCK(addrconf_hash_lock);
 
-/* Protects inet6 devices */
-DEFINE_RWLOCK(addrconf_lock);
-
 static void addrconf_verify(unsigned long);
 
 static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
@@ -144,7 +143,7 @@
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
-struct ipv6_devconf ipv6_devconf = {
+struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -173,9 +172,10 @@
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.proxy_ndp		= 0,
 };
 
-static struct ipv6_devconf ipv6_devconf_dflt = {
+static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.forwarding		= 0,
 	.hop_limit		= IPV6_DEFAULT_HOPLIMIT,
 	.mtu6			= IPV6_MIN_MTU,
@@ -203,6 +203,7 @@
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.proxy_ndp		= 0,
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -314,6 +315,12 @@
 
 /* Nobody refers to this device, we may destroy it. */
 
+static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+	kfree(idev);
+}
+
 void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
 	struct net_device *dev = idev->dev;
@@ -328,7 +335,7 @@
 		return;
 	}
 	snmp6_free_dev(idev);
-	kfree(idev);
+	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 
 static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
@@ -404,9 +411,8 @@
 	if (netif_carrier_ok(dev))
 		ndev->if_flags |= IF_READY;
 
-	write_lock_bh(&addrconf_lock);
-	dev->ip6_ptr = ndev;
-	write_unlock_bh(&addrconf_lock);
+	/* protected by rtnl_lock */
+	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
 	ipv6_mc_init_dev(ndev);
 	ndev->tstamp = jiffies;
@@ -470,7 +476,7 @@
 
 	read_lock(&dev_base_lock);
 	for (dev=dev_base; dev; dev=dev->next) {
-		read_lock(&addrconf_lock);
+		rcu_read_lock();
 		idev = __in6_dev_get(dev);
 		if (idev) {
 			int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
@@ -478,7 +484,7 @@
 			if (changed)
 				dev_forward_change(idev);
 		}
-		read_unlock(&addrconf_lock);
+		rcu_read_unlock();
 	}
 	read_unlock(&dev_base_lock);
 }
@@ -539,7 +545,7 @@
 	int hash;
 	int err = 0;
 
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	if (idev->dead) {
 		err = -ENODEV;			/*XXX*/
 		goto out2;
@@ -608,7 +614,7 @@
 	in6_ifa_hold(ifa);
 	write_unlock(&idev->lock);
 out2:
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 
 	if (likely(err == 0))
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
@@ -734,7 +740,7 @@
 
 		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
 			if (onlink == 0) {
-				ip6_del_rt(rt, NULL, NULL, NULL);
+				ip6_del_rt(rt);
 				rt = NULL;
 			} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
 				rt->rt6i_expires = expires;
@@ -911,7 +917,7 @@
 	memset(&hiscore, 0, sizeof(hiscore));
 
 	read_lock(&dev_base_lock);
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 
 	for (dev = dev_base; dev; dev=dev->next) {
 		struct inet6_dev *idev;
@@ -1032,9 +1038,27 @@
 					continue;
 			}
 
-			/* Rule 4: Prefer home address -- not implemented yet */
+			/* Rule 4: Prefer home address */
+#ifdef CONFIG_IPV6_MIP6
+			if (hiscore.rule < 4) {
+				if (ifa_result->flags & IFA_F_HOMEADDRESS)
+					hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
+				hiscore.rule++;
+			}
+			if (ifa->flags & IFA_F_HOMEADDRESS) {
+				score.attrs |= IPV6_SADDR_SCORE_HOA;
+				if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
+					score.rule = 4;
+					goto record_it;
+				}
+			} else {
+				if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
+					continue;
+			}
+#else
 			if (hiscore.rule < 4)
 				hiscore.rule++;
+#endif
 
 			/* Rule 5: Prefer outgoing interface */
 			if (hiscore.rule < 5) {
@@ -1123,7 +1147,7 @@
 		}
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	read_unlock(&dev_base_lock);
 
 	if (!ifa_result)
@@ -1147,7 +1171,7 @@
 	struct inet6_dev *idev;
 	int err = -EADDRNOTAVAIL;
 
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 	if ((idev = __in6_dev_get(dev)) != NULL) {
 		struct inet6_ifaddr *ifp;
 
@@ -1161,7 +1185,7 @@
 		}
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -1462,7 +1486,7 @@
 	struct inet6_dev *idev = (struct inet6_dev *) data;
 	unsigned long expires;
 
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	write_lock_bh(&idev->lock);
 
 	if (idev->dead)
@@ -1486,7 +1510,7 @@
 
 out:
 	write_unlock_bh(&idev->lock);
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 	in6_dev_put(idev);
 }
 
@@ -1507,59 +1531,56 @@
 addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_PREFIX,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_expires = expires,
+		.fc_dst_len = plen,
+		.fc_flags = RTF_UP | flags,
+	};
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
-	rtmsg.rtmsg_dst_len = plen;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
-	rtmsg.rtmsg_ifindex = dev->ifindex;
-	rtmsg.rtmsg_info = expires;
-	rtmsg.rtmsg_flags = RTF_UP|flags;
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+	ipv6_addr_copy(&cfg.fc_dst, pfx);
 
 	/* Prevent useless cloning on PtP SIT.
 	   This thing is done here expecting that the whole
 	   class of non-broadcast devices need not cloning.
 	 */
-	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
-		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+	if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
+		cfg.fc_flags |= RTF_NONEXTHOP;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&cfg);
 }
 
 /* Create "default" multicast route to the interface */
 
 static void addrconf_add_mroute(struct net_device *dev)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_LOCAL,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_dst_len = 8,
+		.fc_flags = RTF_UP,
+	};
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	ipv6_addr_set(&rtmsg.rtmsg_dst,
-		      htonl(0xFF000000), 0, 0, 0);
-	rtmsg.rtmsg_dst_len = 8;
-	rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
-	rtmsg.rtmsg_ifindex = dev->ifindex;
-	rtmsg.rtmsg_flags = RTF_UP;
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
+
+	ip6_route_add(&cfg);
 }
 
 static void sit_route_add(struct net_device *dev)
 {
-	struct in6_rtmsg rtmsg;
-
-	memset(&rtmsg, 0, sizeof(rtmsg));
-
-	rtmsg.rtmsg_type	= RTMSG_NEWROUTE;
-	rtmsg.rtmsg_metric	= IP6_RT_PRIO_ADDRCONF;
+	struct fib6_config cfg = {
+		.fc_table = RT6_TABLE_MAIN,
+		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_ifindex = dev->ifindex,
+		.fc_dst_len = 96,
+		.fc_flags = RTF_UP | RTF_NONEXTHOP,
+	};
 
 	/* prefix length - 96 bits "::d.d.d.d" */
-	rtmsg.rtmsg_dst_len	= 96;
-	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
-	rtmsg.rtmsg_ifindex	= dev->ifindex;
-
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&cfg);
 }
 
 static void addrconf_add_lroute(struct net_device *dev)
@@ -1660,7 +1681,7 @@
 		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
 			if (rt->rt6i_flags&RTF_EXPIRES) {
 				if (valid_lft == 0) {
-					ip6_del_rt(rt, NULL, NULL, NULL);
+					ip6_del_rt(rt);
 					rt = NULL;
 				} else {
 					rt->rt6i_expires = jiffies + rt_expires;
@@ -1870,12 +1891,11 @@
  *	Manual configuration of address on an interface
  */
 static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
-			  __u32 prefered_lft, __u32 valid_lft)
+			  __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
-	__u8 ifa_flags = 0;
 	int scope;
 
 	ASSERT_RTNL();
@@ -1887,9 +1907,6 @@
 	if ((dev = __dev_get_by_index(ifindex)) == NULL)
 		return -ENODEV;
 	
-	if (!(dev->flags&IFF_UP))
-		return -ENETDOWN;
-
 	if ((idev = addrconf_add_dev(dev)) == NULL)
 		return -ENOBUFS;
 
@@ -1971,7 +1988,7 @@
 
 	rtnl_lock();
 	err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
-			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+			     IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 	rtnl_unlock();
 	return err;
 }
@@ -2344,10 +2361,10 @@
 	           Do not dev_put!
 	 */
 	if (how == 1) {
-		write_lock_bh(&addrconf_lock);
-		dev->ip6_ptr = NULL;
 		idev->dead = 1;
-		write_unlock_bh(&addrconf_lock);
+
+		/* protected by rtnl_lock */
+		rcu_assign_pointer(dev->ip6_ptr, NULL);
 
 		/* Step 1.5: remove snmp6 entry */
 		snmp6_unregister_dev(idev);
@@ -2514,7 +2531,8 @@
 	spin_lock_bh(&ifp->lock);
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-	    !(ifp->flags&IFA_F_TENTATIVE)) {
+	    !(ifp->flags&IFA_F_TENTATIVE) ||
+	    ifp->flags & IFA_F_NODAD) {
 		ifp->flags &= ~IFA_F_TENTATIVE;
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
@@ -2759,6 +2777,26 @@
 }
 #endif	/* CONFIG_PROC_FS */
 
+#ifdef CONFIG_IPV6_MIP6
+/* Check if address is a home address configured on any interface. */
+int ipv6_chk_home_addr(struct in6_addr *addr)
+{
+	int ret = 0;
+	struct inet6_ifaddr * ifp;
+	u8 hash = ipv6_addr_hash(addr);
+	read_lock_bh(&addrconf_hash_lock);
+	for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+		if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
+		    (ifp->flags & IFA_F_HOMEADDRESS)) {
+			ret = 1;
+			break;
+		}
+	}
+	read_unlock_bh(&addrconf_hash_lock);
+	return ret;
+}
+#endif
+
 /*
  *	Periodic address status verification
  */
@@ -2869,66 +2907,68 @@
 	spin_unlock_bh(&addrconf_verify_lock);
 }
 
+static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
+{
+	struct in6_addr *pfx = NULL;
+
+	if (addr)
+		pfx = nla_data(addr);
+
+	if (local) {
+		if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
+			pfx = NULL;
+		else
+			pfx = nla_data(local);
+	}
+
+	return pfx;
+}
+
+static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = {
+	[IFA_ADDRESS]		= { .len = sizeof(struct in6_addr) },
+	[IFA_LOCAL]		= { .len = sizeof(struct in6_addr) },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
+};
+
 static int
 inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
+	int err;
 
-	pfx = NULL;
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) ||
-		    (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
-	}
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
 	if (pfx == NULL)
 		return -EINVAL;
 
 	return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
 }
 
-static int
-inet6_addr_modify(int ifindex, struct in6_addr *pfx,
-		  __u32 prefered_lft, __u32 valid_lft)
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
+			     u32 prefered_lft, u32 valid_lft)
 {
-	struct inet6_ifaddr *ifp = NULL;
-	struct net_device *dev;
-	int ifa_flags = 0;
-
-	if ((dev = __dev_get_by_index(ifindex)) == NULL)
-		return -ENODEV;
-
-	if (!(dev->flags&IFF_UP))
-		return -ENETDOWN;
-
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
-	ifp = ipv6_get_ifaddr(pfx, dev, 1);
-	if (ifp == NULL)
-		return -ENOENT;
-
 	if (valid_lft == INFINITY_LIFE_TIME)
-		ifa_flags = IFA_F_PERMANENT;
+		ifa_flags |= IFA_F_PERMANENT;
 	else if (valid_lft >= 0x7FFFFFFF/HZ)
 		valid_lft = 0x7FFFFFFF/HZ;
 
 	if (prefered_lft == 0)
-		ifa_flags = IFA_F_DEPRECATED;
+		ifa_flags |= IFA_F_DEPRECATED;
 	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
 		 (prefered_lft != INFINITY_LIFE_TIME))
 		prefered_lft = 0x7FFFFFFF/HZ;
 
 	spin_lock_bh(&ifp->lock);
-	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags;
-
+	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
 	ifp->tstamp = jiffies;
 	ifp->valid_lft = valid_lft;
 	ifp->prefered_lft = prefered_lft;
@@ -2936,7 +2976,6 @@
 	spin_unlock_bh(&ifp->lock);
 	if (!(ifp->flags&IFA_F_TENTATIVE))
 		ipv6_ifa_notify(0, ifp);
-	in6_ifa_put(ifp);
 
 	addrconf_verify(0);
 
@@ -2946,172 +2985,189 @@
 static int
 inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-	struct rtattr  **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *pfx;
-	__u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME;
+	struct inet6_ifaddr *ifa;
+	struct net_device *dev;
+	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
+	u8 ifa_flags;
+	int err;
 
-	pfx = NULL;
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) ||
-		    (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))))
-			return -EINVAL;
-		pfx = RTA_DATA(rta[IFA_LOCAL-1]);
-	}
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		return err;
+
+	ifm = nlmsg_data(nlh);
+	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
 	if (pfx == NULL)
 		return -EINVAL;
 
-	if (rta[IFA_CACHEINFO-1]) {
+	if (tb[IFA_CACHEINFO]) {
 		struct ifa_cacheinfo *ci;
-		if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci))
-			return -EINVAL;
-		ci = RTA_DATA(rta[IFA_CACHEINFO-1]);
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
 		valid_lft = ci->ifa_valid;
-		prefered_lft = ci->ifa_prefered;
+		preferred_lft = ci->ifa_prefered;
+	} else {
+		preferred_lft = INFINITY_LIFE_TIME;
+		valid_lft = INFINITY_LIFE_TIME;
 	}
 
-	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		int ret;
-		ret = inet6_addr_modify(ifm->ifa_index, pfx,
-					prefered_lft, valid_lft);
-		if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE))
-			return ret;
+	dev =  __dev_get_by_index(ifm->ifa_index);
+	if (dev == NULL)
+		return -ENODEV;
+
+	/* We ignore other flags so far. */
+	ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
+
+	ifa = ipv6_get_ifaddr(pfx, dev, 1);
+	if (ifa == NULL) {
+		/*
+		 * It would be best to check for !NLM_F_CREATE here but
+		 * userspace alreay relies on not having to provide this.
+		 */
+		return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
+				      ifa_flags, preferred_lft, valid_lft);
 	}
 
-	return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
-			      prefered_lft, valid_lft);
+	if (nlh->nlmsg_flags & NLM_F_EXCL ||
+	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+		err = -EEXIST;
+	else
+		err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
 
+	in6_ifa_put(ifa);
+
+	return err;
 }
 
-/* Maximum length of ifa_cacheinfo attributes */
-#define INET6_IFADDR_RTA_SPACE \
-		RTA_SPACE(16) /* IFA_ADDRESS */ + \
-		RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */
+static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
+			  u8 scope, int ifindex)
+{
+	struct ifaddrmsg *ifm;
+
+	ifm = nlmsg_data(nlh);
+	ifm->ifa_family = AF_INET6;
+	ifm->ifa_prefixlen = prefixlen;
+	ifm->ifa_flags = flags;
+	ifm->ifa_scope = scope;
+	ifm->ifa_index = ifindex;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
+			+ TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
+			+ TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
+static inline int rt_scope(int ifa_scope)
+{
+	if (ifa_scope & IFA_HOST)
+		return RT_SCOPE_HOST;
+	else if (ifa_scope & IFA_LINK)
+		return RT_SCOPE_LINK;
+	else if (ifa_scope & IFA_SITE)
+		return RT_SCOPE_SITE;
+	else
+		return RT_SCOPE_UNIVERSE;
+}
+
+static inline int inet6_ifaddr_msgsize(void)
+{
+	return nlmsg_total_size(sizeof(struct ifaddrmsg) +
+				nla_total_size(16) +
+				nla_total_size(sizeof(struct ifa_cacheinfo)) +
+				128);
+}
 
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
-	unsigned char	 *b = skb->tail;
+	u32 preferred, valid;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;
-	ifm->ifa_prefixlen = ifa->prefix_len;
-	ifm->ifa_flags = ifa->flags;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ifa->scope&IFA_HOST)
-		ifm->ifa_scope = RT_SCOPE_HOST;
-	else if (ifa->scope&IFA_LINK)
-		ifm->ifa_scope = RT_SCOPE_LINK;
-	else if (ifa->scope&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifa->idev->dev->ifindex;
-	RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
+		      ifa->idev->dev->ifindex);
+
 	if (!(ifa->flags&IFA_F_PERMANENT)) {
-		ci.ifa_prefered = ifa->prefered_lft;
-		ci.ifa_valid = ifa->valid_lft;
-		if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
+		preferred = ifa->prefered_lft;
+		valid = ifa->valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
 			long tval = (jiffies - ifa->tstamp)/HZ;
-			ci.ifa_prefered -= tval;
-			if (ci.ifa_valid != INFINITY_LIFE_TIME)
-				ci.ifa_valid -= tval;
+			preferred -= tval;
+			if (valid != INFINITY_LIFE_TIME)
+				valid -= tval;
 		}
 	} else {
-		ci.ifa_prefered = INFINITY_LIFE_TIME;
-		ci.ifa_valid = INFINITY_LIFE_TIME;
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
 	}
-	ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
+	    put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+		return nlmsg_cancel(skb, nlh);
+
+	return nlmsg_end(skb, nlh);
 }
 
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 				u32 pid, u32 seq, int event, u16 flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
-	unsigned char	 *b = skb->tail;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	int ifindex = ifmca->idev->dev->ifindex;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;	
-	ifm->ifa_prefixlen = 128;
-	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifmca->idev->dev->ifindex;
-	RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
-	ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.ifa_prefered = INFINITY_LIFE_TIME;
-	ci.ifa_valid = INFINITY_LIFE_TIME;
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+		scope = RT_SCOPE_SITE;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+	if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
+	    put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
+		return nlmsg_cancel(skb, nlh);
+
+	return nlmsg_end(skb, nlh);
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 				u32 pid, u32 seq, int event, unsigned int flags)
 {
-	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
-	struct ifa_cacheinfo ci;
-	unsigned char	 *b = skb->tail;
+	u8 scope = RT_SCOPE_UNIVERSE;
+	int ifindex = ifaca->aca_idev->dev->ifindex;
 
-	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
-	ifm = NLMSG_DATA(nlh);
-	ifm->ifa_family = AF_INET6;	
-	ifm->ifa_prefixlen = 128;
-	ifm->ifa_flags = IFA_F_PERMANENT;
-	ifm->ifa_scope = RT_SCOPE_UNIVERSE;
-	if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
-		ifm->ifa_scope = RT_SCOPE_SITE;
-	ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
-	RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
-	ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
-		    * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
-		    * 100 / HZ);
-	ci.ifa_prefered = INFINITY_LIFE_TIME;
-	ci.ifa_valid = INFINITY_LIFE_TIME;
-	RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
+		scope = RT_SCOPE_SITE;
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
+	if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
+	    put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+			  INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0)
+		return nlmsg_cancel(skb, nlh);
+
+	return nlmsg_end(skb, nlh);
 }
 
 enum addr_type_t
@@ -3222,79 +3278,74 @@
 	return inet6_dump_addr(skb, cb, type);
 }
 
-static int inet6_rtm_getaddr(struct sk_buff *in_skb,
-		struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+			     void *arg)
 {
-	struct rtattr **rta = arg;
-	struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+	struct ifaddrmsg *ifm;
+	struct nlattr *tb[IFA_MAX+1];
 	struct in6_addr *addr = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_ifaddr *ifa;
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
 	int err;
 
-	if (rta[IFA_ADDRESS-1]) {
-		if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr))
-			return -EINVAL;
-		addr = RTA_DATA(rta[IFA_ADDRESS-1]);
-	}
-	if (rta[IFA_LOCAL-1]) {
-		if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) ||
-		    (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr))))
-			return -EINVAL;
-		addr = RTA_DATA(rta[IFA_LOCAL-1]);
-	}
-	if (addr == NULL)
-		return -EINVAL;
+	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+	if (err < 0)
+		goto errout;
 
+	addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
+	if (addr == NULL) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	ifm = nlmsg_data(nlh);
 	if (ifm->ifa_index)
 		dev = __dev_get_by_index(ifm->ifa_index);
 
-	if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL)
-		return -EADDRNOTAVAIL;
-
-	if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) {
-		err = -ENOBUFS;
-		goto out;
+	if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+		err = -EADDRNOTAVAIL;
+		goto errout;
 	}
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+	if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+		err = -ENOBUFS;
+		goto errout_ifa;
+	}
+
 	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
 				nlh->nlmsg_seq, RTM_NEWADDR, 0);
 	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
+		kfree_skb(skb);
+		goto errout_ifa;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout_ifa:
 	in6_ifa_put(ifa);
+errout:
 	return err;
-out_free:
-	kfree_skb(skb);
-	goto out;
 }
 
 static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE);
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
 static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
@@ -3329,6 +3380,7 @@
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
+	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
 }
 
 /* Maximum length of ifinfomsg attributes */
@@ -3435,20 +3487,23 @@
 void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE);
+	int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE;
+	int err = -ENOBUFS;
 	
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
 }
 
 /* Maximum length of prefix_cacheinfo attributes */
@@ -3500,20 +3555,23 @@
 			 struct prefix_info *pinfo)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE);
+	int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE;
+	int err = -ENOBUFS;
 
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS);
-		return;
-	}
-	if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
+	skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC);
+
+	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
 }
 
 static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
@@ -3528,6 +3586,9 @@
 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
 				      .dumpit	= inet6_dump_fib, },
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	[RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib6_rules_dump,   },
+#endif
 };
 
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -3536,7 +3597,7 @@
 
 	switch (event) {
 	case RTM_NEWADDR:
-		ip6_ins_rt(ifp->rt, NULL, NULL, NULL);
+		ip6_ins_rt(ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		break;
@@ -3545,7 +3606,7 @@
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
-		if (ip6_del_rt(ifp->rt, NULL, NULL, NULL))
+		if (ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}
@@ -3553,10 +3614,10 @@
 
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
-	read_lock_bh(&addrconf_lock);
+	rcu_read_lock_bh();
 	if (likely(ifp->idev->dead == 0))
 		__ipv6_ifa_notify(event, ifp);
-	read_unlock_bh(&addrconf_lock);
+	rcu_read_unlock_bh();
 }
 
 #ifdef CONFIG_SYSCTL
@@ -3653,7 +3714,7 @@
 	ctl_table addrconf_conf_dir[2];
 	ctl_table addrconf_proto_dir[2];
 	ctl_table addrconf_root_dir[2];
-} addrconf_sysctl = {
+} addrconf_sysctl __read_mostly = {
 	.sysctl_header = NULL,
 	.addrconf_vars = {
         	{
@@ -3843,6 +3904,14 @@
 #endif
 #endif
 		{
+			.ctl_name	=	NET_IPV6_PROXY_NDP,
+			.procname	=	"proxy_ndp",
+			.data		=	&ipv6_devconf.proxy_ndp,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
+		{
 			.ctl_name	=	0,	/* sentinel */
 		}
 	},
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ac85e9c..bf6e8af 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -59,6 +59,9 @@
 #ifdef CONFIG_IPV6_TUNNEL
 #include <net/ip6_tunnel.h>
 #endif
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -67,7 +70,7 @@
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
 
-int sysctl_ipv6_bindv6only;
+int sysctl_ipv6_bindv6only __read_mostly;
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -637,6 +640,7 @@
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet->dport;
 		fl.fl_ip_sport = inet->sport;
+		security_sk_classify_flow(sk, &fl);
 
 		if (np->opt && np->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
@@ -658,7 +662,7 @@
 			return err;
 		}
 
-		__ip6_dst_store(sk, dst, NULL);
+		__ip6_dst_store(sk, dst, NULL, NULL);
 	}
 
 	return 0;
@@ -757,6 +761,8 @@
         struct list_head *r;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
+
 #ifdef MODULE
 #if 0 /* FIXME --RR */
 	if (!mod_member_present(&__this_module, can_unload))
@@ -766,11 +772,6 @@
 #endif
 #endif
 
-	if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "inet6_proto_init: size fault\n");
-		return -EINVAL;
-	}
-
 	err = proto_register(&tcpv6_prot, 1);
 	if (err)
 		goto out;
@@ -856,6 +857,9 @@
 	ipv6_frag_init();
 	ipv6_nodata_init();
 	ipv6_destopt_init();
+#ifdef CONFIG_IPV6_MIP6
+	mip6_init();
+#endif
 
 	/* Init v6 transport protocols. */
 	udpv6_init();
@@ -919,6 +923,9 @@
  	tcp6_proc_exit();
  	raw6_proc_exit();
 #endif
+#ifdef CONFIG_IPV6_MIP6
+	mip6_fini();
+#endif
 	/* Cleanup code parts. */
 	sit_cleanup();
 	ip6_flowlabel_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 9d4831b..b0d83e8 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -74,6 +74,66 @@
 	return 0;
 }
 
+#ifdef CONFIG_IPV6_MIP6
+/**
+ *	ipv6_rearrange_destopt - rearrange IPv6 destination options header
+ *	@iph: IPv6 header
+ *	@destopt: destionation options header
+ */
+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt)
+{
+	u8 *opt = (u8 *)destopt;
+	int len = ipv6_optlen(destopt);
+	int off = 0;
+	int optlen = 0;
+
+	off += 2;
+	len -= 2;
+
+	while (len > 0) {
+
+		switch (opt[off]) {
+
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		default:
+			if (len < 2)
+				goto bad;
+			optlen = opt[off+1]+2;
+			if (len < optlen)
+				goto bad;
+
+			/* Rearrange the source address in @iph and the
+			 * addresses in home address option for final source.
+			 * See 11.3.2 of RFC 3775 for details.
+			 */
+			if (opt[off] == IPV6_TLV_HAO) {
+				struct in6_addr final_addr;
+				struct ipv6_destopt_hao *hao;
+
+				hao = (struct ipv6_destopt_hao *)&opt[off];
+				if (hao->length != sizeof(hao->addr)) {
+					if (net_ratelimit())
+						printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
+					goto bad;
+				}
+				ipv6_addr_copy(&final_addr, &hao->addr);
+				ipv6_addr_copy(&hao->addr, &iph->saddr);
+				ipv6_addr_copy(&iph->saddr, &final_addr);
+			}
+			break;
+		}
+
+		off += optlen;
+		len -= optlen;
+	}
+	/* Note: ok if len == 0 */
+bad:
+	return;
+}
+#endif
+
 /**
  *	ipv6_rearrange_rthdr - rearrange IPv6 routing header
  *	@iph: IPv6 header
@@ -113,7 +173,7 @@
 	ipv6_addr_copy(&iph->daddr, &final_addr);
 }
 
-static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
 {
 	union {
 		struct ipv6hdr *iph;
@@ -128,8 +188,12 @@
 
 	while (exthdr.raw < end) {
 		switch (nexthdr) {
-		case NEXTHDR_HOP:
 		case NEXTHDR_DEST:
+#ifdef CONFIG_IPV6_MIP6
+			if (dir == XFRM_POLICY_OUT)
+				ipv6_rearrange_destopt(iph, exthdr.opth);
+#endif
+		case NEXTHDR_HOP:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
 				LIMIT_NETDEBUG(
 					KERN_WARNING "overrun %sopts\n",
@@ -164,6 +228,9 @@
 	u8 nexthdr;
 	char tmp_base[8];
 	struct {
+#ifdef CONFIG_IPV6_MIP6
+		struct in6_addr saddr;
+#endif
 		struct in6_addr daddr;
 		char hdrs[0];
 	} *tmp_ext;
@@ -188,10 +255,15 @@
 			err = -ENOMEM;
 			goto error;
 		}
+#ifdef CONFIG_IPV6_MIP6
+		memcpy(tmp_ext, &top_iph->saddr, extlen);
+#else
 		memcpy(tmp_ext, &top_iph->daddr, extlen);
+#endif
 		err = ipv6_clear_mutable_options(top_iph,
 						 extlen - sizeof(*tmp_ext) +
-						 sizeof(*top_iph));
+						 sizeof(*top_iph),
+						 XFRM_POLICY_OUT);
 		if (err)
 			goto error_free_iph;
 	}
@@ -213,13 +285,20 @@
 	ah->spi = x->id.spi;
 	ah->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
-	ahp->icv(ahp, skb, ah->auth_data);
+	err = ah_mac_digest(ahp, skb, ah->auth_data);
+	if (err)
+		goto error_free_iph;
+	memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
 
 	err = 0;
 
 	memcpy(top_iph, tmp_base, sizeof(tmp_base));
 	if (tmp_ext) {
+#ifdef CONFIG_IPV6_MIP6
+		memcpy(&top_iph->saddr, tmp_ext, extlen);
+#else
 		memcpy(&top_iph->daddr, tmp_ext, extlen);
+#endif
 error_free_iph:
 		kfree(tmp_ext);
 	}
@@ -251,6 +330,7 @@
 	u16 hdr_len;
 	u16 ah_hlen;
 	int nexthdr;
+	int err = -EINVAL;
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
 		goto out;
@@ -278,7 +358,7 @@
 	if (!tmp_hdr)
 		goto out;
 	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
-	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
+	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
 		goto free_out;
 	skb->nh.ipv6h->priority    = 0;
 	skb->nh.ipv6h->flow_lbl[0] = 0;
@@ -292,8 +372,11 @@
 		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
 		memset(ah->auth_data, 0, ahp->icv_trunc_len);
 		skb_push(skb, hdr_len);
-		ahp->icv(ahp, skb, ah->auth_data);
-		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+		err = ah_mac_digest(ahp, skb, ah->auth_data);
+		if (err)
+			goto free_out;
+		err = -EINVAL;
+		if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) {
 			LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n");
 			x->stats.integrity_failed++;
 			goto free_out;
@@ -310,7 +393,7 @@
 free_out:
 	kfree(tmp_hdr);
 out:
-	return -EINVAL;
+	return err;
 }
 
 static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 
@@ -338,6 +421,7 @@
 {
 	struct ah_data *ahp = NULL;
 	struct xfrm_algo_desc *aalg_desc;
+	struct crypto_hash *tfm;
 
 	if (!x->aalg)
 		goto error;
@@ -355,24 +439,27 @@
 
 	ahp->key = x->aalg->alg_key;
 	ahp->key_len = (x->aalg->alg_key_len+7)/8;
-	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-	if (!ahp->tfm)
+	tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		goto error;
-	ahp->icv = ah_hmac_digest;
+
+	ahp->tfm = tfm;
+	if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len))
+		goto error;
 	
 	/*
 	 * Lookup the algorithm description maintained by xfrm_algo,
 	 * verify crypto transform properties, and store information
 	 * we need for AH processing.  This lookup cannot fail here
-	 * after a successful crypto_alloc_tfm().
+	 * after a successful crypto_alloc_hash().
 	 */
 	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 	BUG_ON(!aalg_desc);
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-	    crypto_tfm_alg_digestsize(ahp->tfm)) {
+	    crypto_hash_digestsize(tfm)) {
 		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
-		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+		       x->aalg->alg_name, crypto_hash_digestsize(tfm),
 		       aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
 	}
@@ -387,7 +474,7 @@
 		goto error;
 	
 	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	x->data = ahp;
 
@@ -396,7 +483,7 @@
 error:
 	if (ahp) {
 		kfree(ahp->work_icv);
-		crypto_free_tfm(ahp->tfm);
+		crypto_free_hash(ahp->tfm);
 		kfree(ahp);
 	}
 	return -EINVAL;
@@ -411,7 +498,7 @@
 
 	kfree(ahp->work_icv);
 	ahp->work_icv = NULL;
-	crypto_free_tfm(ahp->tfm);
+	crypto_free_hash(ahp->tfm);
 	ahp->tfm = NULL;
 	kfree(ahp);
 }
@@ -424,7 +511,8 @@
 	.init_state	= ah6_init_state,
 	.destructor	= ah6_destroy,
 	.input		= ah6_input,
-	.output		= ah6_output
+	.output		= ah6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol ah6_protocol = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f6881d7..a960476 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -56,7 +56,7 @@
 	int	onlink;
 
 	onlink = 0;
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
@@ -68,7 +68,7 @@
 		}
 		read_unlock_bh(&idev->lock);
 	}
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 	return onlink;
 }
 
@@ -335,7 +335,7 @@
 	write_unlock_bh(&idev->lock);
 
 	dst_hold(&rt->u.dst);
-	if (ip6_ins_rt(rt, NULL, NULL, NULL))
+	if (ip6_ins_rt(rt))
 		dst_release(&rt->u.dst);
 
 	addrconf_join_solict(dev, &aca->aca_addr);
@@ -378,7 +378,7 @@
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	dst_hold(&aca->aca_rt->u.dst);
-	if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
+	if (ip6_del_rt(aca->aca_rt))
 		dst_free(&aca->aca_rt->u.dst);
 	else
 		dst_release(&aca->aca_rt->u.dst);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b55b4c..7206747 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -156,6 +156,8 @@
 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl.oif = np->mcast_oif;
 
+	security_sk_classify_flow(sk, &fl);
+
 	if (flowlabel) {
 		if (flowlabel->opt && flowlabel->opt->srcrt) {
 			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
@@ -191,7 +193,12 @@
 
 	ip6_dst_store(sk, dst,
 		      ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
-		      &np->daddr : NULL);
+		      &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+		      ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
+		      &np->saddr :
+#endif
+		      NULL);
 
 	sk->sk_state = TCP_ESTABLISHED;
 out:
@@ -641,10 +648,13 @@
 
 			rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
 
-			/*
-			 *	TYPE 0
-			 */
-			if (rthdr->type) {
+			switch (rthdr->type) {
+			case IPV6_SRCRT_TYPE_0:
+#ifdef CONFIG_IPV6_MIP6
+			case IPV6_SRCRT_TYPE_2:
+#endif
+				break;
+			default:
 				err = -EINVAL;
 				goto exit_f;
 			}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a278d5e..e78680a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -24,6 +24,7 @@
  * 	This file is derived from net/ipv4/esp.c
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -44,7 +45,8 @@
 	int hdr_len;
 	struct ipv6hdr *top_iph;
 	struct ipv6_esp_hdr *esph;
-	struct crypto_tfm *tfm;
+	struct crypto_blkcipher *tfm;
+	struct blkcipher_desc desc;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
 	int blksize;
@@ -67,7 +69,9 @@
 
 	alen = esp->auth.icv_trunc_len;
 	tfm = esp->conf.tfm;
-	blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+	desc.tfm = tfm;
+	desc.flags = 0;
+	blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	clen = ALIGN(clen + 2, blksize);
 	if (esp->conf.padlen)
 		clen = ALIGN(clen, esp->conf.padlen);
@@ -95,8 +99,13 @@
 	esph->seq_no = htonl(++x->replay.oseq);
 	xfrm_aevent_doreplay(x);
 
-	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+	if (esp->conf.ivlen) {
+		if (unlikely(!esp->conf.ivinitted)) {
+			get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+			esp->conf.ivinitted = 1;
+		}
+		crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
+	}
 
 	do {
 		struct scatterlist *sg = &esp->sgbuf[0];
@@ -107,24 +116,25 @@
 				goto error;
 		}
 		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
-		crypto_cipher_encrypt(tfm, sg, sg, clen);
+		err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
 	} while (0);
 
+	if (unlikely(err))
+		goto error;
+
 	if (esp->conf.ivlen) {
-		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
-		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+		memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
+		crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
 	}
 
 	if (esp->auth.icv_full_len) {
-		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
-			sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
-		pskb_put(skb, trailer, alen);
+		err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data,
+				     sizeof(*esph) + esp->conf.ivlen + clen);
+		memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
 	}
 
-	err = 0;
-
 error:
 	return err;
 }
@@ -134,8 +144,10 @@
 	struct ipv6hdr *iph;
 	struct ipv6_esp_hdr *esph;
 	struct esp_data *esp = x->data;
+	struct crypto_blkcipher *tfm = esp->conf.tfm;
+	struct blkcipher_desc desc = { .tfm = tfm };
 	struct sk_buff *trailer;
-	int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	int alen = esp->auth.icv_trunc_len;
 	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
 
@@ -155,15 +167,16 @@
 
 	/* If integrity check is required, do this. */
         if (esp->auth.icv_full_len) {
-		u8 sum[esp->auth.icv_full_len];
-		u8 sum1[alen];
+		u8 sum[alen];
 
-		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+		ret = esp_mac_digest(esp, skb, 0, skb->len - alen);
+		if (ret)
+			goto out;
 
-		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+		if (skb_copy_bits(skb, skb->len - alen, sum, alen))
 			BUG();
 
-		if (unlikely(memcmp(sum, sum1, alen))) {
+		if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
 			x->stats.integrity_failed++;
 			ret = -EINVAL;
 			goto out;
@@ -182,7 +195,7 @@
 
 	/* Get ivec. This can be wrong, check against another impls. */
 	if (esp->conf.ivlen)
-		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+		crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
 
         {
 		u8 nexthdr[2];
@@ -197,9 +210,11 @@
 			}
 		}
 		skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
-		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+		ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
 		if (unlikely(sg != &esp->sgbuf[0]))
 			kfree(sg);
+		if (unlikely(ret))
+			goto out;
 
 		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
 			BUG();
@@ -225,9 +240,9 @@
 static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
-	u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
+	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		mtu = ALIGN(mtu + 2, blksize);
 	} else {
 		/* The worst case. */
@@ -266,11 +281,11 @@
 	if (!esp)
 		return;
 
-	crypto_free_tfm(esp->conf.tfm);
+	crypto_free_blkcipher(esp->conf.tfm);
 	esp->conf.tfm = NULL;
 	kfree(esp->conf.ivec);
 	esp->conf.ivec = NULL;
-	crypto_free_tfm(esp->auth.tfm);
+	crypto_free_hash(esp->auth.tfm);
 	esp->auth.tfm = NULL;
 	kfree(esp->auth.work_icv);
 	esp->auth.work_icv = NULL;
@@ -280,6 +295,7 @@
 static int esp6_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
+	struct crypto_blkcipher *tfm;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -298,24 +314,29 @@
 
 	if (x->aalg) {
 		struct xfrm_algo_desc *aalg_desc;
+		struct crypto_hash *hash;
 
 		esp->auth.key = x->aalg->alg_key;
 		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
-		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
-		if (esp->auth.tfm == NULL)
+		hash = crypto_alloc_hash(x->aalg->alg_name, 0,
+					 CRYPTO_ALG_ASYNC);
+		if (IS_ERR(hash))
 			goto error;
-		esp->auth.icv = esp_hmac_digest;
+
+		esp->auth.tfm = hash;
+		if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len))
+			goto error;
  
 		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
 		BUG_ON(!aalg_desc);
  
 		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
-			crypto_tfm_alg_digestsize(esp->auth.tfm)) {
-				printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
-					x->aalg->alg_name,
-					crypto_tfm_alg_digestsize(esp->auth.tfm),
-					aalg_desc->uinfo.auth.icv_fullbits/8);
-				goto error;
+		    crypto_hash_digestsize(hash)) {
+			NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+				 x->aalg->alg_name,
+				 crypto_hash_digestsize(hash),
+				 aalg_desc->uinfo.auth.icv_fullbits/8);
+			goto error;
 		}
  
 		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
@@ -327,24 +348,22 @@
 	}
 	esp->conf.key = x->ealg->alg_key;
 	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
-	if (x->props.ealgo == SADB_EALG_NULL)
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
-	else
-		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
-	if (esp->conf.tfm == NULL)
+	tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
 		goto error;
-	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+	esp->conf.tfm = tfm;
+	esp->conf.ivlen = crypto_blkcipher_ivsize(tfm);
 	esp->conf.padlen = 0;
 	if (esp->conf.ivlen) {
 		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
 		if (unlikely(esp->conf.ivec == NULL))
 			goto error;
-		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+		esp->conf.ivinitted = 0;
 	}
-	if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+	if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len))
 		goto error;
 	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	x->data = esp;
 	return 0;
@@ -365,7 +384,8 @@
 	.destructor	= esp6_destroy,
 	.get_max_size	= esp6_get_max_size,
 	.input		= esp6_input,
-	.output		= esp6_output
+	.output		= esp6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol esp6_protocol = {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 86dac10..88c96b1 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,9 +43,54 @@
 #include <net/ndisc.h>
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/xfrm.h>
+#endif
 
 #include <asm/uaccess.h>
 
+int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+{
+	int packet_len = skb->tail - skb->nh.raw;
+	struct ipv6_opt_hdr *hdr;
+	int len;
+
+	if (offset + 2 > packet_len)
+		goto bad;
+	hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	len = ((hdr->hdrlen + 1) << 3);
+
+	if (offset + len > packet_len)
+		goto bad;
+
+	offset += 2;
+	len -= 2;
+
+	while (len > 0) {
+		int opttype = skb->nh.raw[offset];
+		int optlen;
+
+		if (opttype == type)
+			return offset;
+
+		switch (opttype) {
+		case IPV6_TLV_PAD0:
+			optlen = 1;
+			break;
+		default:
+			optlen = skb->nh.raw[offset + 1] + 2;
+			if (optlen > len)
+				goto bad;
+			break;
+		}
+		offset += optlen;
+		len -= optlen;
+	}
+	/* not_found */
+ bad:
+	return -1;
+}
+
 /*
  *	Parsing tlv encoded headers.
  *
@@ -56,7 +101,7 @@
 
 struct tlvtype_proc {
 	int	type;
-	int	(*func)(struct sk_buff *skb, int offset);
+	int	(*func)(struct sk_buff **skbp, int offset);
 };
 
 /*********************
@@ -65,8 +110,10 @@
 
 /* An unknown option is detected, decide what to do */
 
-static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 {
+	struct sk_buff *skb = *skbp;
+
 	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
@@ -91,8 +138,9 @@
 
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 {
+	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
 	int off = skb->h.raw - skb->nh.raw;
 	int len = ((skb->h.raw[1]+1)<<3);
@@ -122,13 +170,13 @@
 					/* type specific length/alignment 
 					   checks will be performed in the 
 					   func(). */
-					if (curr->func(skb, off) == 0)
+					if (curr->func(skbp, off) == 0)
 						return 0;
 					break;
 				}
 			}
 			if (curr->type < 0) {
-				if (ip6_tlvopt_unknown(skb, off) == 0)
+				if (ip6_tlvopt_unknown(skbp, off) == 0)
 					return 0;
 			}
 			break;
@@ -147,8 +195,85 @@
   Destination options header.
  *****************************/
 
+#ifdef CONFIG_IPV6_MIP6
+static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+{
+	struct sk_buff *skb = *skbp;
+	struct ipv6_destopt_hao *hao;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+	struct in6_addr tmp_addr;
+	int ret;
+
+	if (opt->dsthao) {
+		LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n");
+		goto discard;
+	}
+	opt->dsthao = opt->dst1;
+	opt->dst1 = 0;
+
+	hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+
+	if (hao->length != 16) {
+		LIMIT_NETDEBUG(
+			KERN_DEBUG "hao invalid option length = %d\n", hao->length);
+		goto discard;
+	}
+
+	if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
+		LIMIT_NETDEBUG(
+			KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr));
+		goto discard;
+	}
+
+	ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
+			       (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
+	if (unlikely(ret < 0))
+		goto discard;
+
+	if (skb_cloned(skb)) {
+		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+		struct inet6_skb_parm *opt2;
+
+		if (skb2 == NULL)
+			goto discard;
+
+		opt2 = IP6CB(skb2);
+		memcpy(opt2, opt, sizeof(*opt2));
+
+		kfree_skb(skb);
+
+		/* update all variable using below by copied skbuff */
+		*skbp = skb = skb2;
+		hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
+		ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+	}
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
+	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
+	ipv6_addr_copy(&hao->addr, &tmp_addr);
+
+	if (skb->tstamp.off_sec == 0)
+		__net_timestamp(skb);
+
+	return 1;
+
+ discard:
+	kfree_skb(skb);
+	return 0;
+}
+#endif
+
 static struct tlvtype_proc tlvprocdestopt_lst[] = {
-	/* No destination options are defined now */
+#ifdef CONFIG_IPV6_MIP6
+	{
+		.type	= IPV6_TLV_HAO,
+		.func	= ipv6_dest_hao,
+	},
+#endif
 	{-1,			NULL}
 };
 
@@ -156,6 +281,9 @@
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
+#ifdef CONFIG_IPV6_MIP6
+	__u16 dstbuf;
+#endif
 
 	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
 	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
@@ -166,10 +294,19 @@
 
 	opt->lastopt = skb->h.raw - skb->nh.raw;
 	opt->dst1 = skb->h.raw - skb->nh.raw;
+#ifdef CONFIG_IPV6_MIP6
+	dstbuf = opt->dst1;
+#endif
 
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+		skb = *skbp;
 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+		opt = IP6CB(skb);
+#ifdef CONFIG_IPV6_MIP6
+		opt->nhoff = dstbuf;
+#else
 		opt->nhoff = opt->dst1;
+#endif
 		return 1;
 	}
 
@@ -219,7 +356,7 @@
 {
 	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	struct in6_addr *addr;
+	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
 	int n, i;
 
@@ -244,6 +381,23 @@
 
 looped_back:
 	if (hdr->segments_left == 0) {
+		switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+		case IPV6_SRCRT_TYPE_2:
+			/* Silently discard type 2 header unless it was
+			 * processed by own
+			 */
+			if (!addr) {
+				IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+				kfree_skb(skb);
+				return -1;
+			}
+			break;
+#endif
+		default:
+			break;
+		}
+
 		opt->lastopt = skb->h.raw - skb->nh.raw;
 		opt->srcrt = skb->h.raw - skb->nh.raw;
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
@@ -253,17 +407,29 @@
 		return 1;
 	}
 
-	if (hdr->type != IPV6_SRCRT_TYPE_0) {
+	switch (hdr->type) {
+	case IPV6_SRCRT_TYPE_0:
+		if (hdr->hdrlen & 0x01) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+			return -1;
+		}
+		break;
+#ifdef CONFIG_IPV6_MIP6
+	case IPV6_SRCRT_TYPE_2:
+		/* Silently discard invalid RTH type 2 */
+		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		break;
+#endif
+	default:
 		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
 		return -1;
 	}
-	
-	if (hdr->hdrlen & 0x01) {
-		IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
-		return -1;
-	}
 
 	/*
 	 *	This is the routing header forwarding algorithm from
@@ -294,7 +460,7 @@
 		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW)
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
 	i = n - --hdr->segments_left;
@@ -303,6 +469,27 @@
 	addr = rthdr->addr;
 	addr += i - 1;
 
+	switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+	case IPV6_SRCRT_TYPE_2:
+		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+				     (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+				     IPPROTO_ROUTING) < 0) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		if (!ipv6_chk_home_addr(addr)) {
+			IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+			kfree_skb(skb);
+			return -1;
+		}
+		break;
+#endif
+	default:
+		break;
+	}
+
 	if (ipv6_addr_is_multicast(addr)) {
 		IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
 		kfree_skb(skb);
@@ -421,8 +608,10 @@
 
 /* Router Alert as of RFC 2711 */
 
-static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 {
+	struct sk_buff *skb = *skbp;
+
 	if (skb->nh.raw[optoff+1] == 2) {
 		IP6CB(skb)->ra = optoff;
 		return 1;
@@ -435,8 +624,9 @@
 
 /* Jumbo payload */
 
-static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
 {
+	struct sk_buff *skb = *skbp;
 	u32 pkt_len;
 
 	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
@@ -485,8 +675,9 @@
 	{ -1, }
 };
 
-int ipv6_parse_hopopts(struct sk_buff *skb)
+int ipv6_parse_hopopts(struct sk_buff **skbp)
 {
+	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
@@ -502,8 +693,10 @@
 	}
 
 	opt->hop = sizeof(struct ipv6hdr);
-	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
+		skb = *skbp;
 		skb->h.raw += (skb->h.raw[1]+1)<<3;
+		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
 	}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
new file mode 100644
index 0000000..34f5bfa
--- /dev/null
+++ b/net/ipv6/fib6_rules.c
@@ -0,0 +1,305 @@
+/*
+ * net/ipv6/fib6_rules.c	IPv6 Routing Policy Rules
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Authors
+ *	Thomas Graf		<tgraf@suug.ch>
+ *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ */
+
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+#include <net/fib_rules.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netlink.h>
+
+struct fib6_rule
+{
+	struct fib_rule		common;
+	struct rt6key		src;
+	struct rt6key		dst;
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	u32			fwmark;
+	u32			fwmask;
+#endif
+	u8			tclass;
+};
+
+static struct fib_rules_ops fib6_rules_ops;
+
+static struct fib6_rule main_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0x7FFE,
+		.action =	FR_ACT_TO_TBL,
+		.table =	RT6_TABLE_MAIN,
+	},
+};
+
+static struct fib6_rule local_rule = {
+	.common = {
+		.refcnt =	ATOMIC_INIT(2),
+		.pref =		0,
+		.action =	FR_ACT_TO_TBL,
+		.table =	RT6_TABLE_LOCAL,
+		.flags =	FIB_RULE_PERMANENT,
+	},
+};
+
+static LIST_HEAD(fib6_rules);
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	struct fib_lookup_arg arg = {
+		.lookup_ptr = lookup,
+	};
+
+	fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg);
+	if (arg.rule)
+		fib_rule_put(arg.rule);
+
+	if (arg.result)
+		return (struct dst_entry *) arg.result;
+
+	dst_hold(&ip6_null_entry.u.dst);
+	return &ip6_null_entry.u.dst;
+}
+
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
+	pol_lookup_t lookup = arg->lookup_ptr;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		rt = &ip6_null_entry;
+		goto discard_pkt;
+	default:
+	case FR_ACT_BLACKHOLE:
+		rt = &ip6_blk_hole_entry;
+		goto discard_pkt;
+	case FR_ACT_PROHIBIT:
+		rt = &ip6_prohibit_entry;
+		goto discard_pkt;
+	}
+
+	table = fib6_get_table(rule->table);
+	if (table)
+		rt = lookup(table, flp, flags);
+
+	if (rt != &ip6_null_entry)
+		goto out;
+	dst_release(&rt->u.dst);
+	rt = NULL;
+	goto out;
+
+discard_pkt:
+	dst_hold(&rt->u.dst);
+out:
+	arg->result = rt;
+	return rt == NULL ? -EAGAIN : 0;
+}
+
+
+static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	struct fib6_rule *r = (struct fib6_rule *) rule;
+
+	if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
+		return 0;
+
+	if ((flags & RT6_LOOKUP_F_HAS_SADDR) &&
+	    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+		return 0;
+
+	if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff))
+		return 0;
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask)
+		return 0;
+#endif
+
+	return 1;
+}
+
+static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
+	[FRA_IFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[FRA_PRIORITY]	= { .type = NLA_U32 },
+	[FRA_SRC]	= { .len = sizeof(struct in6_addr) },
+	[FRA_DST]	= { .len = sizeof(struct in6_addr) },
+	[FRA_FWMARK]	= { .type = NLA_U32 },
+	[FRA_FWMASK]	= { .type = NLA_U32 },
+	[FRA_TABLE]	= { .type = NLA_U32 },
+};
+
+static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+			       struct nlattr **tb)
+{
+	int err = -EINVAL;
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len > 128 || frh->dst_len > 128 ||
+	    (frh->tos & ~IPV6_FLOWINFO_MASK))
+		goto errout;
+
+	if (rule->action == FR_ACT_TO_TBL) {
+		if (rule->table == RT6_TABLE_UNSPEC)
+			goto errout;
+
+		if (fib6_new_table(rule->table) == NULL) {
+			err = -ENOBUFS;
+			goto errout;
+		}
+	}
+
+	if (tb[FRA_SRC])
+		nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
+			   sizeof(struct in6_addr));
+
+	if (tb[FRA_DST])
+		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
+			   sizeof(struct in6_addr));
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (tb[FRA_FWMARK]) {
+		rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]);
+		if (rule6->fwmark) {
+			/*
+			 * if the mark value is non-zero,
+			 * all bits are compared by default
+			 * unless a mask is explicitly specified.
+			 */
+			rule6->fwmask = 0xFFFFFFFF;
+		}
+	}
+
+	if (tb[FRA_FWMASK])
+		rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]);
+#endif
+
+	rule6->src.plen = frh->src_len;
+	rule6->dst.plen = frh->dst_len;
+	rule6->tclass = frh->tos;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	if (frh->src_len && (rule6->src.plen != frh->src_len))
+		return 0;
+
+	if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
+		return 0;
+
+	if (frh->tos && (rule6->tclass != frh->tos))
+		return 0;
+
+	if (tb[FRA_SRC] &&
+	    nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
+		return 0;
+
+	if (tb[FRA_DST] &&
+	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
+		return 0;
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK])))
+		return 0;
+
+	if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK])))
+		return 0;
+#endif
+
+	return 1;
+}
+
+static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+{
+	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
+
+	frh->family = AF_INET6;
+	frh->dst_len = rule6->dst.plen;
+	frh->src_len = rule6->src.plen;
+	frh->tos = rule6->tclass;
+
+	if (rule6->dst.plen)
+		NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr),
+			&rule6->dst.addr);
+
+	if (rule6->src.plen)
+		NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr),
+			&rule6->src.addr);
+
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+	if (rule6->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark);
+
+	if (rule6->fwmask || rule6->fwmark)
+		NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask);
+#endif
+
+	return 0;
+
+nla_put_failure:
+	return -ENOBUFS;
+}
+
+int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return fib_rules_dump(skb, cb, AF_INET6);
+}
+
+static u32 fib6_rule_default_pref(void)
+{
+	return 0x3FFF;
+}
+
+static struct fib_rules_ops fib6_rules_ops = {
+	.family			= AF_INET6,
+	.rule_size		= sizeof(struct fib6_rule),
+	.action			= fib6_rule_action,
+	.match			= fib6_rule_match,
+	.configure		= fib6_rule_configure,
+	.compare		= fib6_rule_compare,
+	.fill			= fib6_rule_fill,
+	.default_pref		= fib6_rule_default_pref,
+	.nlgroup		= RTNLGRP_IPV6_RULE,
+	.policy			= fib6_rule_policy,
+	.rules_list		= &fib6_rules,
+	.owner			= THIS_MODULE,
+};
+
+void __init fib6_rules_init(void)
+{
+	list_add_tail(&local_rule.common.list, &fib6_rules);
+	list_add_tail(&main_rule.common.list, &fib6_rules);
+
+	fib_rules_register(&fib6_rules_ops);
+}
+
+void fib6_rules_cleanup(void)
+{
+	fib_rules_unregister(&fib6_rules_ops);
+}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 356a8a7..4ec8760 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -151,7 +151,7 @@
 	return 0;
 }
 
-static int sysctl_icmpv6_time = 1*HZ; 
+static int sysctl_icmpv6_time __read_mostly = 1*HZ;
 
 /* 
  * Check the ICMP output rate limit 
@@ -273,6 +273,29 @@
 	return 0;
 }
 
+#ifdef CONFIG_IPV6_MIP6
+static void mip6_addr_swap(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct ipv6_destopt_hao *hao;
+	struct in6_addr tmp;
+	int off;
+
+	if (opt->dsthao) {
+		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+		if (likely(off >= 0)) {
+			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+			ipv6_addr_copy(&tmp, &iph->saddr);
+			ipv6_addr_copy(&iph->saddr, &hao->addr);
+			ipv6_addr_copy(&hao->addr, &tmp);
+		}
+	}
+}
+#else
+static inline void mip6_addr_swap(struct sk_buff *skb) {}
+#endif
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
@@ -350,6 +373,8 @@
 		return;
 	}
 
+	mip6_addr_swap(skb);
+
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_ICMPV6;
 	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
@@ -358,6 +383,7 @@
 	fl.oif = iif;
 	fl.fl_icmp_type = type;
 	fl.fl_icmp_code = code;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -472,6 +498,7 @@
 		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
 	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+	security_skb_classify_flow(skb, &fl);
 
 	if (icmpv6_xmit_lock())
 		return;
@@ -604,7 +631,7 @@
 
 	/* Perform checksum. */
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
 				     skb->csum))
 			break;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index bf49107..827f41d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -157,6 +157,7 @@
 	fl.oif = sk->sk_bound_dev_if;
 	fl.fl_ip_sport = inet->sport;
 	fl.fl_ip_dport = inet->dport;
+	security_sk_classify_flow(sk, &fl);
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
@@ -185,7 +186,7 @@
 			return err;
 		}
 
-		__ip6_dst_store(sk, dst, NULL);
+		__ip6_dst_store(sk, dst, NULL, NULL);
 	}
 
 	skb->dst = dst_clone(dst);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7642212..8fcae7a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -18,6 +18,7 @@
  * 	Yuji SEKIYA @USAGI:	Support default route on router node;
  * 				remove ip6_null_entry from the top of
  * 				routing table.
+ * 	Ville Nuorvala:		Fixed routing subtrees.
  */
 #include <linux/errno.h>
 #include <linux/types.h>
@@ -26,6 +27,7 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
+#include <linux/list.h>
 
 #ifdef 	CONFIG_PROC_FS
 #include <linux/proc_fs.h>
@@ -68,19 +70,19 @@
 	void *arg;
 };
 
-DEFINE_RWLOCK(fib6_walker_lock);
-
+static DEFINE_RWLOCK(fib6_walker_lock);
 
 #ifdef CONFIG_IPV6_SUBTREES
 #define FWS_INIT FWS_S
-#define SUBTREE(fn) ((fn)->subtree)
 #else
 #define FWS_INIT FWS_L
-#define SUBTREE(fn) NULL
 #endif
 
 static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
+static struct rt6_info * fib6_find_prefix(struct fib6_node *fn);
 static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+static int fib6_walk(struct fib6_walker_t *w);
+static int fib6_walk_continue(struct fib6_walker_t *w);
 
 /*
  *	A routing update causes an increase of the serial number on the
@@ -93,13 +95,31 @@
 
 static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
 
-struct fib6_walker_t fib6_walker_list = {
+static struct fib6_walker_t fib6_walker_list = {
 	.prev	= &fib6_walker_list,
 	.next	= &fib6_walker_list, 
 };
 
 #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
 
+static inline void fib6_walker_link(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	w->next = fib6_walker_list.next;
+	w->prev = &fib6_walker_list;
+	w->next->prev = w;
+	w->prev->next = w;
+	write_unlock_bh(&fib6_walker_lock);
+}
+
+static inline void fib6_walker_unlink(struct fib6_walker_t *w)
+{
+	write_lock_bh(&fib6_walker_lock);
+	w->next->prev = w->prev;
+	w->prev->next = w->next;
+	w->prev = w->next = w;
+	write_unlock_bh(&fib6_walker_lock);
+}
 static __inline__ u32 fib6_new_sernum(void)
 {
 	u32 n = ++rt_sernum;
@@ -147,6 +167,253 @@
 		dst_free(&rt->u.dst);
 }
 
+static struct fib6_table fib6_main_tbl = {
+	.tb6_id		= RT6_TABLE_MAIN,
+	.tb6_lock	= RW_LOCK_UNLOCKED,
+	.tb6_root	= {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	},
+};
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB_TABLE_HASHSZ 256
+#else
+#define FIB_TABLE_HASHSZ 1
+#endif
+static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+
+static void fib6_link_table(struct fib6_table *tb)
+{
+	unsigned int h;
+
+	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
+
+	/*
+	 * No protection necessary, this is the only list mutatation
+	 * operation, tables never disappear once they exist.
+	 */
+	hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
+}
+
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+static struct fib6_table fib6_local_tbl = {
+	.tb6_id		= RT6_TABLE_LOCAL,
+	.tb6_lock	= RW_LOCK_UNLOCKED,
+	.tb6_root 	= {
+		.leaf		= &ip6_null_entry,
+		.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+	},
+};
+
+static struct fib6_table *fib6_alloc_table(u32 id)
+{
+	struct fib6_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_ATOMIC);
+	if (table != NULL) {
+		table->tb6_id = id;
+		table->tb6_lock = RW_LOCK_UNLOCKED;
+		table->tb6_root.leaf = &ip6_null_entry;
+		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
+	}
+
+	return table;
+}
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+	struct fib6_table *tb;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	tb = fib6_get_table(id);
+	if (tb)
+		return tb;
+
+	tb = fib6_alloc_table(id);
+	if (tb != NULL)
+		fib6_link_table(tb);
+
+	return tb;
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+	struct fib6_table *tb;
+	struct hlist_node *node;
+	unsigned int h;
+
+	if (id == 0)
+		id = RT6_TABLE_MAIN;
+	h = id & (FIB_TABLE_HASHSZ - 1);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
+		if (tb->tb6_id == id) {
+			rcu_read_unlock();
+			return tb;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+static void __init fib6_tables_init(void)
+{
+	fib6_link_table(&fib6_main_tbl);
+	fib6_link_table(&fib6_local_tbl);
+}
+
+#else
+
+struct fib6_table *fib6_new_table(u32 id)
+{
+	return fib6_get_table(id);
+}
+
+struct fib6_table *fib6_get_table(u32 id)
+{
+	return &fib6_main_tbl;
+}
+
+struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
+				   pol_lookup_t lookup)
+{
+	return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
+}
+
+static void __init fib6_tables_init(void)
+{
+	fib6_link_table(&fib6_main_tbl);
+}
+
+#endif
+
+static int fib6_dump_node(struct fib6_walker_t *w)
+{
+	int res;
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->u.next) {
+		res = rt6_dump_route(rt, w->args);
+		if (res < 0) {
+			/* Frame is full, suspend walking */
+			w->leaf = rt;
+			return 1;
+		}
+		BUG_TRAP(res!=0);
+	}
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_dump_end(struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w = (void*)cb->args[2];
+
+	if (w) {
+		cb->args[2] = 0;
+		kfree(w);
+	}
+	cb->done = (void*)cb->args[3];
+	cb->args[1] = 3;
+}
+
+static int fib6_dump_done(struct netlink_callback *cb)
+{
+	fib6_dump_end(cb);
+	return cb->done ? cb->done(cb) : 0;
+}
+
+static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+			   struct netlink_callback *cb)
+{
+	struct fib6_walker_t *w;
+	int res;
+
+	w = (void *)cb->args[2];
+	w->root = &table->tb6_root;
+
+	if (cb->args[4] == 0) {
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res > 0)
+			cb->args[4] = 1;
+	} else {
+		read_lock_bh(&table->tb6_lock);
+		res = fib6_walk_continue(w);
+		read_unlock_bh(&table->tb6_lock);
+		if (res != 0) {
+			if (res < 0)
+				fib6_walker_unlink(w);
+			goto end;
+		}
+		fib6_walker_unlink(w);
+		cb->args[4] = 0;
+	}
+end:
+	return res;
+}
+
+int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	unsigned int h, s_h;
+	unsigned int e = 0, s_e;
+	struct rt6_rtnl_dump_arg arg;
+	struct fib6_walker_t *w;
+	struct fib6_table *tb;
+	struct hlist_node *node;
+	int res = 0;
+
+	s_h = cb->args[0];
+	s_e = cb->args[1];
+
+	w = (void *)cb->args[2];
+	if (w == NULL) {
+		/* New dump:
+		 *
+		 * 1. hook callback destructor.
+		 */
+		cb->args[3] = (long)cb->done;
+		cb->done = fib6_dump_done;
+
+		/*
+		 * 2. allocate and initialize walker.
+		 */
+		w = kzalloc(sizeof(*w), GFP_ATOMIC);
+		if (w == NULL)
+			return -ENOMEM;
+		w->func = fib6_dump_node;
+		cb->args[2] = (long)w;
+	}
+
+	arg.skb = skb;
+	arg.cb = cb;
+	w->args = &arg;
+
+	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+		e = 0;
+		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
+			if (e < s_e)
+				goto next;
+			res = fib6_dump_table(tb, skb, cb);
+			if (res != 0)
+				goto out;
+next:
+			e++;
+		}
+	}
+out:
+	cb->args[1] = e;
+	cb->args[0] = h;
+
+	res = res < 0 ? res : skb->len;
+	if (res <= 0)
+		fib6_dump_end(cb);
+	return res;
+}
 
 /*
  *	Routing Table
@@ -343,7 +610,7 @@
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-		struct nlmsghdr *nlh,  struct netlink_skb_parms *req)
+			    struct nl_info *info)
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
@@ -398,7 +665,7 @@
 	*ins = rt;
 	rt->rt6i_node = fn;
 	atomic_inc(&rt->rt6i_ref);
-	inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
+	inet6_rt_notify(RTM_NEWROUTE, rt, info);
 	rt6_stats.fib_rt_entries++;
 
 	if ((fn->fn_flags & RTN_RTINFO) == 0) {
@@ -428,10 +695,9 @@
  *	with source addr info in sub-trees
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, 
-		struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
 {
-	struct fib6_node *fn;
+	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
 
 	fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
@@ -440,6 +706,8 @@
 	if (fn == NULL)
 		goto out;
 
+	pn = fn;
+
 #ifdef CONFIG_IPV6_SUBTREES
 	if (rt->rt6i_src.plen) {
 		struct fib6_node *sn;
@@ -485,10 +753,6 @@
 			/* Now link new subtree to main tree */
 			sfn->parent = fn;
 			fn->subtree = sfn;
-			if (fn->leaf == NULL) {
-				fn->leaf = rt;
-				atomic_inc(&rt->rt6i_ref);
-			}
 		} else {
 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
 					sizeof(struct in6_addr), rt->rt6i_src.plen,
@@ -498,21 +762,42 @@
 				goto st_failure;
 		}
 
+		if (fn->leaf == NULL) {
+			fn->leaf = rt;
+			atomic_inc(&rt->rt6i_ref);
+		}
 		fn = sn;
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, nlh, req);
+	err = fib6_add_rt2node(fn, rt, info);
 
 	if (err == 0) {
 		fib6_start_gc(rt);
 		if (!(rt->rt6i_flags&RTF_CACHE))
-			fib6_prune_clones(fn, rt);
+			fib6_prune_clones(pn, rt);
 	}
 
 out:
-	if (err)
+	if (err) {
+#ifdef CONFIG_IPV6_SUBTREES
+		/*
+		 * If fib6_add_1 has cleared the old leaf pointer in the
+		 * super-tree leaf node we have to find a new one for it.
+		 */
+		if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
+			pn->leaf = fib6_find_prefix(pn);
+#if RT6_DEBUG >= 2
+			if (!pn->leaf) {
+				BUG_TRAP(pn->leaf != NULL);
+				pn->leaf = &ip6_null_entry;
+			}
+#endif
+			atomic_inc(&pn->leaf->rt6i_ref);
+		}
+#endif
 		dst_free(&rt->u.dst);
+	}
 	return err;
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -543,6 +828,9 @@
 	struct fib6_node *fn;
 	int dir;
 
+	if (unlikely(args->offset == 0))
+		return NULL;
+
 	/*
 	 *	Descend on a tree
 	 */
@@ -564,33 +852,26 @@
 		break;
 	}
 
-	while ((fn->fn_flags & RTN_ROOT) == 0) {
-#ifdef CONFIG_IPV6_SUBTREES
-		if (fn->subtree) {
-			struct fib6_node *st;
-			struct lookup_args *narg;
-
-			narg = args + 1;
-
-			if (narg->addr) {
-				st = fib6_lookup_1(fn->subtree, narg);
-
-				if (st && !(st->fn_flags & RTN_ROOT))
-					return st;
-			}
-		}
-#endif
-
-		if (fn->fn_flags & RTN_RTINFO) {
+	while(fn) {
+		if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
 			struct rt6key *key;
 
 			key = (struct rt6key *) ((u8 *) fn->leaf +
 						 args->offset);
 
-			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
-				return fn;
+			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
+#ifdef CONFIG_IPV6_SUBTREES
+				if (fn->subtree)
+					fn = fib6_lookup_1(fn->subtree, args + 1);
+#endif
+				if (!fn || fn->fn_flags & RTN_RTINFO)
+					return fn;
+			}
 		}
 
+		if (fn->fn_flags & RTN_ROOT)
+			break;
+
 		fn = fn->parent;
 	}
 
@@ -600,18 +881,24 @@
 struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
 			       struct in6_addr *saddr)
 {
-	struct lookup_args args[2];
 	struct fib6_node *fn;
-
-	args[0].offset = offsetof(struct rt6_info, rt6i_dst);
-	args[0].addr = daddr;
-
+	struct lookup_args args[] = {
+		{
+			.offset = offsetof(struct rt6_info, rt6i_dst),
+			.addr = daddr,
+		},
 #ifdef CONFIG_IPV6_SUBTREES
-	args[1].offset = offsetof(struct rt6_info, rt6i_src);
-	args[1].addr = saddr;
+		{
+			.offset = offsetof(struct rt6_info, rt6i_src),
+			.addr = saddr,
+		},
 #endif
+		{
+			.offset = 0,	/* sentinel */
+		}
+	};
 
-	fn = fib6_lookup_1(root, args);
+	fn = fib6_lookup_1(root, daddr ? args : args + 1);
 
 	if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
 		fn = root;
@@ -667,10 +954,8 @@
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src_len) {
 		BUG_TRAP(saddr!=NULL);
-		if (fn == NULL)
-			fn = fn->subtree;
-		if (fn)
-			fn = fib6_locate_1(fn, saddr, src_len,
+		if (fn && fn->subtree)
+			fn = fib6_locate_1(fn->subtree, saddr, src_len,
 					   offsetof(struct rt6_info, rt6i_src));
 	}
 #endif
@@ -699,7 +984,7 @@
 		if(fn->right)
 			return fn->right->leaf;
 
-		fn = SUBTREE(fn);
+		fn = FIB6_SUBTREE(fn);
 	}
 	return NULL;
 }
@@ -730,7 +1015,7 @@
 		if (fn->right) child = fn->right, children |= 1;
 		if (fn->left) child = fn->left, children |= 2;
 
-		if (children == 3 || SUBTREE(fn) 
+		if (children == 3 || FIB6_SUBTREE(fn)
 #ifdef CONFIG_IPV6_SUBTREES
 		    /* Subtree root (i.e. fn) may have one child */
 		    || (children && fn->fn_flags&RTN_ROOT)
@@ -749,9 +1034,9 @@
 
 		pn = fn->parent;
 #ifdef CONFIG_IPV6_SUBTREES
-		if (SUBTREE(pn) == fn) {
+		if (FIB6_SUBTREE(pn) == fn) {
 			BUG_TRAP(fn->fn_flags&RTN_ROOT);
-			SUBTREE(pn) = NULL;
+			FIB6_SUBTREE(pn) = NULL;
 			nstate = FWS_L;
 		} else {
 			BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
@@ -799,7 +1084,7 @@
 		read_unlock(&fib6_walker_lock);
 
 		node_free(fn);
-		if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+		if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn))
 			return pn;
 
 		rt6_release(pn->leaf);
@@ -809,7 +1094,7 @@
 }
 
 static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
-    struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+			   struct nl_info *info)
 {
 	struct fib6_walker_t *w;
 	struct rt6_info *rt = *rtp;
@@ -865,11 +1150,11 @@
 		if (atomic_read(&rt->rt6i_ref) != 1) BUG();
 	}
 
-	inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
+	inet6_rt_notify(RTM_DELROUTE, rt, info);
 	rt6_release(rt);
 }
 
-int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int fib6_del(struct rt6_info *rt, struct nl_info *info)
 {
 	struct fib6_node *fn = rt->rt6i_node;
 	struct rt6_info **rtp;
@@ -885,8 +1170,18 @@
 
 	BUG_TRAP(fn->fn_flags&RTN_RTINFO);
 
-	if (!(rt->rt6i_flags&RTF_CACHE))
-		fib6_prune_clones(fn, rt);
+	if (!(rt->rt6i_flags&RTF_CACHE)) {
+		struct fib6_node *pn = fn;
+#ifdef CONFIG_IPV6_SUBTREES
+		/* clones of this route might be in another subtree */
+		if (rt->rt6i_src.plen) {
+			while (!(pn->fn_flags&RTN_ROOT))
+				pn = pn->parent;
+			pn = pn->parent;
+		}
+#endif
+		fib6_prune_clones(pn, rt);
+	}
 
 	/*
 	 *	Walk the leaf entries looking for ourself
@@ -894,7 +1189,7 @@
 
 	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
 		if (*rtp == rt) {
-			fib6_del_route(fn, rtp, nlh, _rtattr, req);
+			fib6_del_route(fn, rtp, info);
 			return 0;
 		}
 	}
@@ -925,7 +1220,7 @@
  *	<0  -> walk is terminated by an error.
  */
 
-int fib6_walk_continue(struct fib6_walker_t *w)
+static int fib6_walk_continue(struct fib6_walker_t *w)
 {
 	struct fib6_node *fn, *pn;
 
@@ -942,8 +1237,8 @@
 		switch (w->state) {
 #ifdef CONFIG_IPV6_SUBTREES
 		case FWS_S:
-			if (SUBTREE(fn)) {
-				w->node = SUBTREE(fn);
+			if (FIB6_SUBTREE(fn)) {
+				w->node = FIB6_SUBTREE(fn);
 				continue;
 			}
 			w->state = FWS_L;
@@ -977,7 +1272,7 @@
 			pn = fn->parent;
 			w->node = pn;
 #ifdef CONFIG_IPV6_SUBTREES
-			if (SUBTREE(pn) == fn) {
+			if (FIB6_SUBTREE(pn) == fn) {
 				BUG_TRAP(fn->fn_flags&RTN_ROOT);
 				w->state = FWS_L;
 				continue;
@@ -999,7 +1294,7 @@
 	}
 }
 
-int fib6_walk(struct fib6_walker_t *w)
+static int fib6_walk(struct fib6_walker_t *w)
 {
 	int res;
 
@@ -1023,7 +1318,7 @@
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
-			res = fib6_del(rt, NULL, NULL, NULL);
+			res = fib6_del(rt, NULL);
 			if (res) {
 #if RT6_DEBUG >= 2
 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
@@ -1049,9 +1344,9 @@
  *	ignoring pure split nodes) will be scanned.
  */
 
-void fib6_clean_tree(struct fib6_node *root,
-		     int (*func)(struct rt6_info *, void *arg),
-		     int prune, void *arg)
+static void fib6_clean_tree(struct fib6_node *root,
+			    int (*func)(struct rt6_info *, void *arg),
+			    int prune, void *arg)
 {
 	struct fib6_cleaner_t c;
 
@@ -1064,6 +1359,25 @@
 	fib6_walk(&c.w);
 }
 
+void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
+		    int prune, void *arg)
+{
+	struct fib6_table *table;
+	struct hlist_node *node;
+	unsigned int h;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
+					 tb6_hlist) {
+			write_lock_bh(&table->tb6_lock);
+			fib6_clean_tree(&table->tb6_root, func, prune, arg);
+			write_unlock_bh(&table->tb6_lock);
+		}
+	}
+	rcu_read_unlock();
+}
+
 static int fib6_prune_clone(struct rt6_info *rt, void *arg)
 {
 	if (rt->rt6i_flags & RTF_CACHE) {
@@ -1142,11 +1456,8 @@
 	}
 	gc_args.more = 0;
 
-
-	write_lock_bh(&rt6_lock);
 	ndisc_dst_gc(&gc_args.more);
-	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(fib6_age, 0, NULL);
 
 	if (gc_args.more)
 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
@@ -1161,10 +1472,10 @@
 {
 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
 					   sizeof(struct fib6_node),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!fib6_node_kmem)
-		panic("cannot create fib6_nodes cache");
+
+	fib6_tables_init();
 }
 
 void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 25c2a9e..6b8e6d7 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,7 +111,7 @@
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
-		if (ipv6_parse_hopopts(skb) < 0) {
+		if (ipv6_parse_hopopts(&skb) < 0) {
 			IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
 			return 0;
 		}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4fb47a2..6671691 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -308,6 +308,56 @@
 	return 0;
 }
 
+static int ip6_forward_proxy_check(struct sk_buff *skb)
+{
+	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	u8 nexthdr = hdr->nexthdr;
+	int offset;
+
+	if (ipv6_ext_hdr(nexthdr)) {
+		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
+		if (offset < 0)
+			return 0;
+	} else
+		offset = sizeof(struct ipv6hdr);
+
+	if (nexthdr == IPPROTO_ICMPV6) {
+		struct icmp6hdr *icmp6;
+
+		if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+			return 0;
+
+		icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+
+		switch (icmp6->icmp6_type) {
+		case NDISC_ROUTER_SOLICITATION:
+		case NDISC_ROUTER_ADVERTISEMENT:
+		case NDISC_NEIGHBOUR_SOLICITATION:
+		case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		case NDISC_REDIRECT:
+			/* For reaction involving unicast neighbor discovery
+			 * message destined to the proxied address, pass it to
+			 * input function.
+			 */
+			return 1;
+		default:
+			break;
+		}
+	}
+
+	/*
+	 * The proxying router can't forward traffic sent to a link-local
+	 * address, so signal the sender and discard the packet. This
+	 * behavior is clarified by the MIPv6 specification.
+	 */
+	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
+		dst_link_failure(skb);
+		return -1;
+	}
+
+	return 0;
+}
+
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
 	return dst_output(skb);
@@ -362,6 +412,18 @@
 		return -ETIMEDOUT;
 	}
 
+	/* XXX: idev->cnf.proxy_ndp? */
+	if (ipv6_devconf.proxy_ndp &&
+	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
+		int proxied = ip6_forward_proxy_check(skb);
+		if (proxied > 0)
+			return ip6_input(skb);
+		else if (proxied < 0) {
+			IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+			goto drop;
+		}
+	}
+
 	if (!xfrm6_route_forward(skb)) {
 		IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
 		goto drop;
@@ -475,17 +537,25 @@
 		switch (**nexthdr) {
 
 		case NEXTHDR_HOP:
+			break;
 		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
 		case NEXTHDR_DEST:
-			if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
-			if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
-			offset += ipv6_optlen(exthdr);
-			*nexthdr = &exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+#ifdef CONFIG_IPV6_MIP6
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				break;
+#endif
+			if (found_rhdr)
+				return offset;
 			break;
 		default :
 			return offset;
 		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
 	}
 
 	return offset;
@@ -726,6 +796,14 @@
 	return err;
 }
 
+static inline int ip6_rt_check(struct rt6key *rt_key,
+			       struct in6_addr *fl_addr,
+			       struct in6_addr *addr_cache)
+{
+	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
+		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
+}
+
 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 					  struct dst_entry *dst,
 					  struct flowi *fl)
@@ -741,8 +819,8 @@
 	 * that we do not support routing by source, TOS,
 	 * and MSG_DONTROUTE 		--ANK (980726)
 	 *
-	 * 1. If route was host route, check that
-	 *    cached destination is current.
+	 * 1. ip6_rt_check(): If route was host route,
+	 *    check that cached destination is current.
 	 *    If it is network route, we still may
 	 *    check its validity using saved pointer
 	 *    to the last used address: daddr_cache.
@@ -753,11 +831,11 @@
 	 *    sockets.
 	 * 2. oif also should be the same.
 	 */
-	if (((rt->rt6i_dst.plen != 128 ||
-	      !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
-	     && (np->daddr_cache == NULL ||
-		 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
-	    || (fl->oif && fl->oif != dst->dev->ifindex)) {
+	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
+#ifdef CONFIG_IPV6_SUBTREES
+	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
+#endif
+	    (fl->oif && fl->oif != dst->dev->ifindex)) {
 		dst_release(dst);
 		dst = NULL;
 	}
@@ -866,7 +944,7 @@
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_HW;
+		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 		sk->sk_sndmsg_off = 0;
 	}
@@ -963,7 +1041,7 @@
 
 	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
 
-	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+	fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
 
 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7e4d1c1..ad9c6e8 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -53,7 +53,7 @@
 
 struct ipcomp6_tfms {
 	struct list_head list;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int users;
 };
 
@@ -70,7 +70,7 @@
 	int plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 
 	if (skb_linearize_cow(skb))
@@ -129,7 +129,7 @@
 	struct ipcomp_data *ipcd = x->data;
 	int plen, dlen;
 	u8 *start, *scratch;
-	struct crypto_tfm *tfm;
+	struct crypto_comp *tfm;
 	int cpu;
 
 	hdr_len = skb->h.raw - skb->data;
@@ -212,7 +212,7 @@
 	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
 	memcpy(&t->sel, &x->sel, sizeof(t->sel));
 	t->props.family = AF_INET6;
-	t->props.mode = 1;
+	t->props.mode = XFRM_MODE_TUNNEL;
 	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
 
 	if (xfrm_init_state(t))
@@ -301,7 +301,7 @@
 	return scratches;
 }
 
-static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
+static void ipcomp6_free_tfms(struct crypto_comp **tfms)
 {
 	struct ipcomp6_tfms *pos;
 	int cpu;
@@ -323,28 +323,28 @@
 		return;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
-		crypto_free_tfm(tfm);
+		struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+		crypto_free_comp(tfm);
 	}
 	free_percpu(tfms);
 }
 
-static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
+static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
 {
 	struct ipcomp6_tfms *pos;
-	struct crypto_tfm **tfms;
+	struct crypto_comp **tfms;
 	int cpu;
 
 	/* This can be any valid CPU ID so we don't need locking. */
 	cpu = raw_smp_processor_id();
 
 	list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
-		struct crypto_tfm *tfm;
+		struct crypto_comp *tfm;
 
 		tfms = pos->tfms;
 		tfm = *per_cpu_ptr(tfms, cpu);
 
-		if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;
 			return tfms;
 		}
@@ -358,12 +358,13 @@
 	INIT_LIST_HEAD(&pos->list);
 	list_add(&pos->list, &ipcomp6_tfms_list);
 
-	pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+	pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
 	if (!tfms)
 		goto error;
 
 	for_each_possible_cpu(cpu) {
-		struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+		struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+							    CRYPTO_ALG_ASYNC);
 		if (!tfm)
 			goto error;
 		*per_cpu_ptr(tfms, cpu) = tfm;
@@ -416,7 +417,7 @@
 		goto out;
 
 	x->props.header_len = 0;
-	if (x->props.mode)
+	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct ipv6hdr);
 	
 	mutex_lock(&ipcomp6_resource_mutex);
@@ -428,7 +429,7 @@
 		goto error;
 	mutex_unlock(&ipcomp6_resource_mutex);
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = ipcomp6_tunnel_attach(x);
 		if (err)
 			goto error_tunnel;
@@ -460,6 +461,7 @@
 	.destructor	= ipcomp6_destroy,
 	.input		= ipcomp6_input,
 	.output		= ipcomp6_output,
+	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol ipcomp6_protocol = 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5eaaf6..4f3bb7f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -407,8 +407,16 @@
 		/* routing header option needs extra check */
 		if (optname == IPV6_RTHDR && opt->srcrt) {
 			struct ipv6_rt_hdr *rthdr = opt->srcrt;
-			if (rthdr->type)
+			switch (rthdr->type) {
+			case IPV6_SRCRT_TYPE_0:
+#ifdef CONFIG_IPV6_MIP6
+			case IPV6_SRCRT_TYPE_2:
+#endif
+				break;
+			default:
 				goto sticky_done;
+			}
+
 			if ((rthdr->hdrlen & 1) ||
 			    (rthdr->hdrlen >> 1) != rthdr->segments_left)
 				goto sticky_done;
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index dd4d1ce..0e8e067 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -14,7 +14,6 @@
 EXPORT_SYMBOL(register_inet6addr_notifier);
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(addrconf_lock);
 EXPORT_SYMBOL(ipv6_setsockopt);
 EXPORT_SYMBOL(ipv6_getsockopt);
 EXPORT_SYMBOL(inet6_register_protosw);
@@ -31,6 +30,8 @@
 EXPORT_SYMBOL(in6_dev_finish_destroy);
 #ifdef CONFIG_XFRM
 EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_input_addr);
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
 #endif
 EXPORT_SYMBOL(rt6_lookup);
 EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 639eb20..3b114e3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -171,7 +171,7 @@
 
 #define IPV6_MLD_MAX_MSF	64
 
-int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
 
 /*
  *	socket join on multicast group
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
new file mode 100644
index 0000000..99d116c
--- /dev/null
+++ b/net/ipv6/mip6.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/time.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/xfrm.h>
+#include <net/mip6.h>
+
+static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr)
+{
+	return x->coaddr;
+}
+
+static inline unsigned int calc_padlen(unsigned int len, unsigned int n)
+{
+	return (n - len + 16) & 0x7;
+}
+
+static inline void *mip6_padn(__u8 *data, __u8 padlen)
+{
+	if (!data)
+		return NULL;
+	if (padlen == 1) {
+		data[0] = MIP6_OPT_PAD_1;
+	} else if (padlen > 1) {
+		data[0] = MIP6_OPT_PAD_N;
+		data[1] = padlen - 2;
+		if (padlen > 2)
+			memset(data+2, 0, data[1]);
+	}
+	return data + padlen;
+}
+
+static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos)
+{
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+}
+
+static int mip6_mh_len(int type)
+{
+	int len = 0;
+
+	switch (type) {
+	case IP6_MH_TYPE_BRR:
+		len = 0;
+		break;
+	case IP6_MH_TYPE_HOTI:
+	case IP6_MH_TYPE_COTI:
+	case IP6_MH_TYPE_BU:
+	case IP6_MH_TYPE_BACK:
+		len = 1;
+		break;
+	case IP6_MH_TYPE_HOT:
+	case IP6_MH_TYPE_COT:
+	case IP6_MH_TYPE_BERROR:
+		len = 2;
+		break;
+	}
+	return len;
+}
+
+int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+{
+	struct ip6_mh *mh;
+	int mhlen;
+
+	if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
+	    !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+		return -1;
+
+	mh = (struct ip6_mh *)skb->h.raw;
+
+	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
+		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
+			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
+		mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+		return -1;
+	}
+	mhlen = (mh->ip6mh_hdrlen + 1) << 3;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+				    &skb->nh.ipv6h->daddr,
+				    mhlen, IPPROTO_MH,
+				    skb->csum)) {
+			LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n");
+			skb->ip_summed = CHECKSUM_NONE;
+		}
+	}
+	if (skb->ip_summed == CHECKSUM_NONE) {
+		if (csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+				    &skb->nh.ipv6h->daddr,
+				    mhlen, IPPROTO_MH,
+				    skb_checksum(skb, 0, mhlen, 0))) {
+			LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed "
+				       "[" NIP6_FMT " > " NIP6_FMT "]\n",
+				       NIP6(skb->nh.ipv6h->saddr),
+				       NIP6(skb->nh.ipv6h->daddr));
+			return -1;
+		}
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (mh->ip6mh_proto != IPPROTO_NONE) {
+		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
+			       mh->ip6mh_proto);
+		mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+		return -1;
+	}
+
+	return 0;
+}
+
+struct mip6_report_rate_limiter {
+	spinlock_t lock;
+	struct timeval stamp;
+	int iif;
+	struct in6_addr src;
+	struct in6_addr dst;
+};
+
+static struct mip6_report_rate_limiter mip6_report_rl = {
+	.lock = SPIN_LOCK_UNLOCKED
+};
+
+static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
+
+	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		return -ENOENT;
+
+	return destopt->nexthdr;
+}
+
+/* Destination Option Header is inserted.
+ * IP Header's src address is replaced with Home Address Option in
+ * Destination Option Header.
+ */
+static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct ipv6_destopt_hdr *dstopt;
+	struct ipv6_destopt_hao *hao;
+	u8 nexthdr;
+	int len;
+
+	iph = (struct ipv6hdr *)skb->data;
+	iph->payload_len = htons(skb->len - sizeof(*iph));
+
+	nexthdr = *skb->nh.raw;
+	*skb->nh.raw = IPPROTO_DSTOPTS;
+
+	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+	dstopt->nexthdr = nexthdr;
+
+	hao = mip6_padn((char *)(dstopt + 1),
+			calc_padlen(sizeof(*dstopt), 6));
+
+	hao->type = IPV6_TLV_HAO;
+	hao->length = sizeof(*hao) - 2;
+	BUG_TRAP(hao->length == 16);
+
+	len = ((char *)hao - (char *)dstopt) + sizeof(*hao);
+
+	memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr));
+	memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
+
+	BUG_TRAP(len == x->props.header_len);
+	dstopt->hdrlen = (x->props.header_len >> 3) - 1;
+
+	return 0;
+}
+
+static inline int mip6_report_rl_allow(struct timeval *stamp,
+				       struct in6_addr *dst,
+				       struct in6_addr *src, int iif)
+{
+	int allow = 0;
+
+	spin_lock_bh(&mip6_report_rl.lock);
+	if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
+	    mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+	    mip6_report_rl.iif != iif ||
+	    !ipv6_addr_equal(&mip6_report_rl.src, src) ||
+	    !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
+		mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
+		mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+		mip6_report_rl.iif = iif;
+		ipv6_addr_copy(&mip6_report_rl.src, src);
+		ipv6_addr_copy(&mip6_report_rl.dst, dst);
+		allow = 1;
+	}
+	spin_unlock_bh(&mip6_report_rl.lock);
+	return allow;
+}
+
+static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
+{
+	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+	struct ipv6_destopt_hao *hao = NULL;
+	struct xfrm_selector sel;
+	int offset;
+	struct timeval stamp;
+	int err = 0;
+
+	if (unlikely(fl->proto == IPPROTO_MH &&
+		     fl->fl_mh_type <= IP6_MH_TYPE_MAX))
+		goto out;
+
+	if (likely(opt->dsthao)) {
+		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
+		if (likely(offset >= 0))
+			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+	}
+
+	skb_get_timestamp(skb, &stamp);
+
+	if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
+				  hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+				  opt->iif))
+		goto out;
+
+	memset(&sel, 0, sizeof(sel));
+	memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+	       sizeof(sel.daddr));
+	sel.prefixlen_d = 128;
+	memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+	       sizeof(sel.saddr));
+	sel.prefixlen_s = 128;
+	sel.family = AF_INET6;
+	sel.proto = fl->proto;
+	sel.dport = xfrm_flowi_dport(fl);
+	if (sel.dport)
+		sel.dport_mask = ~((__u16)0);
+	sel.sport = xfrm_flowi_sport(fl);
+	if (sel.sport)
+		sel.sport_mask = ~((__u16)0);
+	sel.ifindex = fl->oif;
+
+	err = km_report(IPPROTO_DSTOPTS, &sel,
+			(hao ? (xfrm_address_t *)&hao->addr : NULL));
+
+ out:
+	return err;
+}
+
+static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
+			       u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	int found_rhdr = 0;
+
+	*nexthdr = &skb->nh.ipv6h->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			/*
+			 * HAO MUST NOT appear more than once.
+			 * XXX: It is better to try to find by the end of
+			 * XXX: packet if HAO exists.
+			 */
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+				LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n");
+				return offset;
+			}
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	}
+
+	return offset;
+}
+
+static int mip6_destopt_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+		       x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+		       __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+		return -EINVAL;
+	}
+
+	x->props.header_len = sizeof(struct ipv6_destopt_hdr) +
+		calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) +
+		sizeof(struct ipv6_destopt_hao);
+	BUG_TRAP(x->props.header_len == 24);
+
+	return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for
+ * destination options header unlike IPsec protocols.
+ */
+static void mip6_destopt_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type mip6_destopt_type =
+{
+	.description	= "MIP6DESTOPT",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_DSTOPTS,
+	.flags		= XFRM_TYPE_NON_FRAGMENT,
+	.init_state	= mip6_destopt_init_state,
+	.destructor	= mip6_destopt_destroy,
+	.input		= mip6_destopt_input,
+	.output		= mip6_destopt_output,
+ 	.reject		= mip6_destopt_reject,
+	.hdr_offset	= mip6_destopt_offset,
+	.local_addr	= mip6_xfrm_addr,
+};
+
+static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
+
+	if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		return -ENOENT;
+
+	return rt2->rt_hdr.nexthdr;
+}
+
+/* Routing Header type 2 is inserted.
+ * IP Header's dst address is replaced with Routing Header's Home Address.
+ */
+static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	struct rt2_hdr *rt2;
+	u8 nexthdr;
+
+	iph = (struct ipv6hdr *)skb->data;
+	iph->payload_len = htons(skb->len - sizeof(*iph));
+
+	nexthdr = *skb->nh.raw;
+	*skb->nh.raw = IPPROTO_ROUTING;
+
+	rt2 = (struct rt2_hdr *)skb->h.raw;
+	rt2->rt_hdr.nexthdr = nexthdr;
+	rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
+	rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
+	rt2->rt_hdr.segments_left = 1;
+	memset(&rt2->reserved, 0, sizeof(rt2->reserved));
+
+	BUG_TRAP(rt2->rt_hdr.hdrlen == 2);
+
+	memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr));
+	memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
+
+	return 0;
+}
+
+static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
+			     u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	unsigned int packet_len = skb->tail - skb->nh.raw;
+	int found_rhdr = 0;
+
+	*nexthdr = &skb->nh.ipv6h->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			if (offset + 3 <= packet_len) {
+				struct ipv6_rt_hdr *rt;
+				rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+				if (rt->type != 0)
+					return offset;
+			}
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				return offset;
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	}
+
+	return offset;
+}
+
+static int mip6_rthdr_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__,
+		       x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+		       __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode);
+		return -EINVAL;
+	}
+
+	x->props.header_len = sizeof(struct rt2_hdr);
+
+	return 0;
+}
+
+/*
+ * Do nothing about destroying since it has no specific operation for routing
+ * header type 2 unlike IPsec protocols.
+ */
+static void mip6_rthdr_destroy(struct xfrm_state *x)
+{
+}
+
+static struct xfrm_type mip6_rthdr_type =
+{
+	.description	= "MIP6RT",
+	.owner		= THIS_MODULE,
+	.proto	     	= IPPROTO_ROUTING,
+	.flags		= XFRM_TYPE_NON_FRAGMENT,
+	.init_state	= mip6_rthdr_init_state,
+	.destructor	= mip6_rthdr_destroy,
+	.input		= mip6_rthdr_input,
+	.output		= mip6_rthdr_output,
+	.hdr_offset	= mip6_rthdr_offset,
+	.remote_addr	= mip6_xfrm_addr,
+};
+
+int __init mip6_init(void)
+{
+	printk(KERN_INFO "Mobile IPv6\n");
+
+	if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) {
+		printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__);
+		goto mip6_destopt_xfrm_fail;
+	}
+	if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) {
+		printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
+		goto mip6_rthdr_xfrm_fail;
+	}
+	return 0;
+
+ mip6_rthdr_xfrm_fail:
+	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+ mip6_destopt_xfrm_fail:
+	return -EAGAIN;
+}
+
+void __exit mip6_fini(void)
+{
+	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+		printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+		printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
+}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b50055b..0304b5f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -62,6 +62,7 @@
 #include <linux/sysctl.h>
 #endif
 
+#include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
@@ -411,7 +412,8 @@
  */
 
 static inline void ndisc_flow_init(struct flowi *fl, u8 type,
-			    struct in6_addr *saddr, struct in6_addr *daddr)
+			    struct in6_addr *saddr, struct in6_addr *daddr,
+			    int oif)
 {
 	memset(fl, 0, sizeof(*fl));
 	ipv6_addr_copy(&fl->fl6_src, saddr);
@@ -419,6 +421,8 @@
 	fl->proto	 	= IPPROTO_ICMPV6;
 	fl->fl_icmp_type	= type;
 	fl->fl_icmp_code	= 0;
+	fl->oif			= oif;
+	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
@@ -450,7 +454,8 @@
 		src_addr = &tmpaddr;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
 	if (!dst)
@@ -491,7 +496,7 @@
         msg->icmph.icmp6_unused = 0;
         msg->icmph.icmp6_router    = router;
         msg->icmph.icmp6_solicited = solicited;
-        msg->icmph.icmp6_override  = !!override;
+        msg->icmph.icmp6_override  = override;
 
         /* Set the target address. */
 	ipv6_addr_copy(&msg->target, solicited_addr);
@@ -540,7 +545,8 @@
 		saddr = &addr_buf;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
+	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
+			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
 	if (!dst)
@@ -615,7 +621,8 @@
         int len;
 	int err;
 
-	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
+	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
+			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
 	if (!dst)
@@ -729,8 +736,10 @@
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev = NULL;
 	struct neighbour *neigh;
+	struct pneigh_entry *pneigh = NULL;
 	int dad = ipv6_addr_any(saddr);
 	int inc;
+	int is_router;
 
 	if (ipv6_addr_is_multicast(&msg->target)) {
 		ND_PRINTK2(KERN_WARNING 
@@ -815,7 +824,9 @@
 
 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
 		    (idev->cnf.forwarding && 
-		     pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
+		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
+		     (pneigh = pneigh_lookup(&nd_tbl,
+					     &msg->target, dev, 0)) != NULL)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
 			    inc != 0 &&
@@ -836,12 +847,14 @@
 			goto out;
 	}
 
+	is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding);
+
 	if (dad) {
 		struct in6_addr maddr;
 
 		ipv6_addr_all_nodes(&maddr);
 		ndisc_send_na(dev, NULL, &maddr, &msg->target,
-			      idev->cnf.forwarding, 0, (ifp != NULL), 1);
+			      is_router, 0, (ifp != NULL), 1);
 		goto out;
 	}
 
@@ -862,7 +875,7 @@
 			     NEIGH_UPDATE_F_OVERRIDE);
 	if (neigh || !dev->hard_header) {
 		ndisc_send_na(dev, neigh, saddr, &msg->target,
-			      idev->cnf.forwarding, 
+			      is_router,
 			      1, (ifp != NULL && inc), inc);
 		if (neigh)
 			neigh_release(neigh);
@@ -945,6 +958,20 @@
 		if (neigh->nud_state & NUD_FAILED)
 			goto out;
 
+		/*
+		 * Don't update the neighbor cache entry on a proxy NA from
+		 * ourselves because either the proxied node is off link or it
+		 * has already sent a NA to us.
+		 */
+		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+		    ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
+		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+			/* XXX: idev->cnf.prixy_ndp */
+			WARN_ON(skb->dst != NULL &&
+				((struct rt6_info *)skb->dst)->rt6i_idev);
+			goto out;
+		}
+
 		neigh_update(neigh, lladdr,
 			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
@@ -959,7 +986,7 @@
 			struct rt6_info *rt;
 			rt = rt6_get_dflt_router(saddr, dev);
 			if (rt)
-				ip6_del_rt(rt, NULL, NULL, NULL);
+				ip6_del_rt(rt);
 		}
 
 out:
@@ -1112,7 +1139,7 @@
 
 	if (rt && lifetime == 0) {
 		neigh_clone(neigh);
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		rt = NULL;
 	}
 
@@ -1344,7 +1371,8 @@
 
 	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
 	if (neigh) {
-		rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, 
+		rt6_redirect(dest, &skb->nh.ipv6h->daddr,
+			     &skb->nh.ipv6h->saddr, neigh, lladdr,
 			     on_link);
 		neigh_release(neigh);
 	}
@@ -1380,7 +1408,8 @@
  		return;
  	}
 
-	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
+	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+			dev->ifindex);
 
 	dst = ip6_route_output(NULL, &fl);
 	if (dst == NULL)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 395a417..580b1ab 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -87,7 +87,7 @@
 	unsigned int csum = 0;
 
 	switch (skb->ip_summed) {
-	case CHECKSUM_HW:
+	case CHECKSUM_COMPLETE:
 		if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN)
 			break;
 		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index eeeb57d..ac1dfeb 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -5,7 +5,7 @@
 # Link order matters here.
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
 obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
-obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
 obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
 obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 968a14b..9510c24 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -56,15 +56,15 @@
 
 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
 
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
+static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
 static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
+static int peer_pid __read_mostly;
+static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
 static unsigned int queue_dropped = 0;
 static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
+static struct sock *ipqnl __read_mostly;
 static LIST_HEAD(queue_list);
 static DEFINE_MUTEX(ipqnl_mutex);
 
@@ -206,9 +206,9 @@
 		break;
 	
 	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entry->skb,
-		                               entry->info->outdev == NULL))) {
+		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
+		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
 		}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c9d6b23..4ab368f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -70,9 +70,6 @@
 #define IP_NF_ASSERT(x)
 #endif
 
-
-#include <linux/netfilter_ipv4/listhelp.h>
-
 #if 0
 /* All the better to debug you with... */
 #define static
@@ -220,8 +217,7 @@
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	if (net_ratelimit())
 		printk("ip6_tables: error: `%s'\n", (char *)targinfo);
@@ -258,8 +254,7 @@
 	      unsigned int hook,
 	      const struct net_device *in,
 	      const struct net_device *out,
-	      struct xt_table *table,
-	      void *userdata)
+	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	int offset = 0;
@@ -349,8 +344,7 @@
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
-								     t->data,
-								     userdata);
+								     t->data);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 				if (((struct ip6t_entry *)table_base)->comefrom
@@ -507,8 +501,7 @@
 		return 1;
 
 	if (m->u.kernel.match->destroy)
-		m->u.kernel.match->destroy(m->u.kernel.match, m->data,
-					   m->u.match_size - sizeof(*m));
+		m->u.kernel.match->destroy(m->u.kernel.match, m->data);
 	module_put(m->u.kernel.match->me);
 	return 0;
 }
@@ -561,7 +554,6 @@
 
 	if (m->u.kernel.match->checkentry
 	    && !m->u.kernel.match->checkentry(name, ipv6, match,  m->data,
-					      m->u.match_size - sizeof(*m),
 					      hookmask)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 m->u.kernel.match->name);
@@ -618,12 +610,10 @@
 	if (t->u.kernel.target == &ip6t_standard_target) {
 		if (!standard_check(t, size)) {
 			ret = -EINVAL;
-			goto cleanup_matches;
+			goto err;
 		}
 	} else if (t->u.kernel.target->checkentry
 		   && !t->u.kernel.target->checkentry(name, e, target, t->data,
-						      t->u.target_size
-						      - sizeof(*t),
 						      e->comefrom)) {
 		duprintf("ip_tables: check failed for `%s'.\n",
 			 t->u.kernel.target->name);
@@ -695,8 +685,7 @@
 	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
 	t = ip6t_get_target(e);
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-					    t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
 	module_put(t->u.kernel.target->me);
 	return 0;
 }
@@ -1352,7 +1341,6 @@
 	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_icmp *icmpinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index b8eff8e..435750f 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -22,11 +22,10 @@
 				   const struct net_device *out,
 				   unsigned int hooknum,
 				   const struct xt_target *target,
-				   const void *targinfo, void *userinfo)
+				   const void *targinfo)
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = targinfo;
-	u_int16_t diffs[2];
 	int new_hl;
 
 	if (!skb_make_writable(pskb, (*pskb)->len))
@@ -53,11 +52,8 @@
 			break;
 	}
 
-	if (new_hl != ip6h->hop_limit) {
-		diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF;
+	if (new_hl != ip6h->hop_limit)
 		ip6h->hop_limit = new_hl;
-		diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8);
-	}
 
 	return IP6T_CONTINUE;
 }
@@ -66,7 +62,6 @@
 		const void *entry,
 		const struct xt_target *target,
 		void *targinfo,
-		unsigned int targinfosize,
 		unsigned int hook_mask)
 {
 	struct ip6t_HL_info *info = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 73c6300..0cf537d 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -427,8 +427,7 @@
 		const struct net_device *out,
 		unsigned int hooknum,
 		const struct xt_target *target,
-		const void *targinfo,
-		void *userinfo)
+		const void *targinfo)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
 	struct nf_loginfo li;
@@ -452,7 +451,6 @@
 			       const void *entry,
 			       const struct xt_target *target,
 			       void *targinfo,
-			       unsigned int targinfosize,
 			       unsigned int hook_mask)
 {
 	const struct ip6t_log_info *loginfo = targinfo;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8629ba1..311eae8 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -96,6 +96,7 @@
 	ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
 	fl.fl_ip_sport = otcph.dest;
 	fl.fl_ip_dport = otcph.source;
+	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(NULL, &fl);
 	if (dst == NULL)
 		return;
@@ -179,8 +180,7 @@
 			   const struct net_device *out,
 			   unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo,
-			   void *userinfo)
+			   const void *targinfo)
 {
 	const struct ip6t_reject_info *reject = targinfo;
 
@@ -223,7 +223,6 @@
 		 const void *entry,
 		 const struct xt_target *target,
 		 void *targinfo,
-		 unsigned int targinfosize,
 		 unsigned int hook_mask)
 {
  	const struct ip6t_reject_info *rejinfo = targinfo;
@@ -256,9 +255,7 @@
 
 static int __init ip6t_reject_init(void)
 {
-	if (ip6t_register_target(&ip6t_reject_reg))
-		return -EINVAL;
-	return 0;
+	return ip6t_register_target(&ip6t_reject_reg);
 }
 
 static void __exit ip6t_reject_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 2f7bb20..ec1b160 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -102,7 +102,6 @@
           const void *entry,
 	  const struct xt_match *match,
           void *matchinfo,
-          unsigned int matchinfosize,
           unsigned int hook_mask)
 {
 	const struct ip6t_ah *ahinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
deleted file mode 100644
index 9422413..0000000
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/* Kernel module to match Hop-by-Hop and Destination parameters. */
-
-/* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <net/checksum.h>
-#include <net/ipv6.h>
-
-#include <asm/byteorder.h>
-
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter_ipv6/ip6t_opts.h>
-
-#define HOPBYHOP	0
-
-MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
-MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/*
- *  (Type & 0xC0) >> 6
- *	0	-> ignorable
- *	1	-> must drop the packet
- *	2	-> send ICMP PARM PROB regardless and drop packet
- *	3	-> Send ICMP if not a multicast address and drop packet
- *  (Type & 0x20) >> 5
- *	0	-> invariant
- *	1	-> can change the routing
- *  (Type & 0x1F) Type
- *	0	-> Pad1 (only 1 byte!)
- *	1	-> PadN LENGTH info (total length = length + 2)
- *	C0 | 2	-> JUMBO 4 x x x x ( xxxx > 64k )
- *	5	-> RTALERT 2 x x
- */
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	struct ipv6_opt_hdr _optsh, *oh;
-	const struct ip6t_opts *optinfo = matchinfo;
-	unsigned int temp;
-	unsigned int ptr;
-	unsigned int hdrlen = 0;
-	unsigned int ret = 0;
-	u8 _opttype, *tp = NULL;
-	u8 _optlen, *lp = NULL;
-	unsigned int optlen;
-
-#if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
-#else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
-#endif
-		return 0;
-
-	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-	if (oh == NULL) {
-		*hotdrop = 1;
-		return 0;
-	}
-
-	hdrlen = ipv6_optlen(oh);
-	if (skb->len - ptr < hdrlen) {
-		/* Packet smaller than it's length field */
-		return 0;
-	}
-
-	DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
-
-	DEBUGP("len %02X %04X %02X ",
-	       optinfo->hdrlen, hdrlen,
-	       (!(optinfo->flags & IP6T_OPTS_LEN) ||
-		((optinfo->hdrlen == hdrlen) ^
-		 !!(optinfo->invflags & IP6T_OPTS_INV_LEN))));
-
-	ret = (oh != NULL) &&
-	      (!(optinfo->flags & IP6T_OPTS_LEN) ||
-	       ((optinfo->hdrlen == hdrlen) ^
-		!!(optinfo->invflags & IP6T_OPTS_INV_LEN)));
-
-	ptr += 2;
-	hdrlen -= 2;
-	if (!(optinfo->flags & IP6T_OPTS_OPTS)) {
-		return ret;
-	} else if (optinfo->flags & IP6T_OPTS_NSTRICT) {
-		DEBUGP("Not strict - not implemented");
-	} else {
-		DEBUGP("Strict ");
-		DEBUGP("#%d ", optinfo->optsnr);
-		for (temp = 0; temp < optinfo->optsnr; temp++) {
-			/* type field exists ? */
-			if (hdrlen < 1)
-				break;
-			tp = skb_header_pointer(skb, ptr, sizeof(_opttype),
-						&_opttype);
-			if (tp == NULL)
-				break;
-
-			/* Type check */
-			if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) {
-				DEBUGP("Tbad %02X %02X\n",
-				       *tp,
-				       (optinfo->opts[temp] & 0xFF00) >> 8);
-				return 0;
-			} else {
-				DEBUGP("Tok ");
-			}
-			/* Length check */
-			if (*tp) {
-				u16 spec_len;
-
-				/* length field exists ? */
-				if (hdrlen < 2)
-					break;
-				lp = skb_header_pointer(skb, ptr + 1,
-							sizeof(_optlen),
-							&_optlen);
-				if (lp == NULL)
-					break;
-				spec_len = optinfo->opts[temp] & 0x00FF;
-
-				if (spec_len != 0x00FF && spec_len != *lp) {
-					DEBUGP("Lbad %02X %04X\n", *lp,
-					       spec_len);
-					return 0;
-				}
-				DEBUGP("Lok ");
-				optlen = *lp + 2;
-			} else {
-				DEBUGP("Pad1\n");
-				optlen = 1;
-			}
-
-			/* Step to the next */
-			DEBUGP("len%04X \n", optlen);
-
-			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
-			    (temp < optinfo->optsnr - 1)) {
-				DEBUGP("new pointer is too large! \n");
-				break;
-			}
-			ptr += optlen;
-			hdrlen -= optlen;
-		}
-		if (temp == optinfo->optsnr)
-			return ret;
-		else
-			return 0;
-	}
-
-	return 0;
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
-	   const void *info,
-	   const struct xt_match *match,
-	   void *matchinfo,
-	   unsigned int matchinfosize,
-	   unsigned int hook_mask)
-{
-	const struct ip6t_opts *optsinfo = matchinfo;
-
-	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-		DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
-		return 0;
-	}
-	return 1;
-}
-
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
-	.name		= "hbh",
-#else
-	.name		= "dst",
-#endif
-	.match		= match,
-	.matchsize	= sizeof(struct ip6t_opts),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-};
-
-static int __init ip6t_dst_init(void)
-{
-	return ip6t_register_match(&opts_match);
-}
-
-static void __exit ip6t_dst_fini(void)
-{
-	ip6t_unregister_match(&opts_match);
-}
-
-module_init(ip6t_dst_init);
-module_exit(ip6t_dst_fini);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 06768c8..78d9c8b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -119,7 +119,6 @@
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_frag *fraginfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 374f1be..d32a205 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -19,15 +19,10 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_ipv6/ip6t_opts.h>
 
-#define HOPBYHOP	1
-
 MODULE_LICENSE("GPL");
-#if HOPBYHOP
-MODULE_DESCRIPTION("IPv6 HbH match");
-#else
-MODULE_DESCRIPTION("IPv6 DST match");
-#endif
+MODULE_DESCRIPTION("IPv6 opts match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
+MODULE_ALIAS("ip6t_dst");
 
 #if 0
 #define DEBUGP printk
@@ -71,11 +66,7 @@
 	u8 _optlen, *lp = NULL;
 	unsigned int optlen;
 
-#if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
-#else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
-#endif
+	if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0)
 		return 0;
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
@@ -182,7 +173,6 @@
 	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_opts *optsinfo = matchinfo;
@@ -194,26 +184,35 @@
 	return 1;
 }
 
-static struct ip6t_match opts_match = {
-#if HOPBYHOP
-	.name		= "hbh",
-#else
-	.name		= "dst",
-#endif
-	.match		= match,
-	.matchsize	= sizeof(struct ip6t_opts),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match opts_match[] = {
+	{
+		.name		= "hbh",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct ip6t_opts),
+		.checkentry	= checkentry,
+		.me		= THIS_MODULE,
+		.data		= NEXTHDR_HOP,
+	},
+	{
+		.name		= "dst",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct ip6t_opts),
+		.checkentry	= checkentry,
+		.me		= THIS_MODULE,
+		.data		= NEXTHDR_DEST,
+	},
 };
 
 static int __init ip6t_hbh_init(void)
 {
-	return ip6t_register_match(&opts_match);
+	return xt_register_matches(opts_match, ARRAY_SIZE(opts_match));
 }
 
 static void __exit ip6t_hbh_fini(void)
 {
-	ip6t_unregister_match(&opts_match);
+	xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match));
 }
 
 module_init(ip6t_hbh_init);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 9375eeb..3093c39 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -128,7 +128,6 @@
 		      const void *ip,
 		      const struct xt_match *match,
 		      void *matchinfo,
-		      unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	const struct ip6t_ipv6header_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c
index 5d04799..4eb9bbc 100644
--- a/net/ipv6/netfilter/ip6t_owner.c
+++ b/net/ipv6/netfilter/ip6t_owner.c
@@ -57,7 +57,6 @@
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_owner_info *info = matchinfo;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index fbb0184..bcb2e16 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -197,7 +197,6 @@
 	   const void *entry,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct ip6t_rt *rtinfo = matchinfo;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 60976c0..2fc07c7 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -108,7 +108,7 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
@@ -128,7 +128,7 @@
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 03a13ea..386ea26 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -138,7 +138,7 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
@@ -174,18 +174,14 @@
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
 
 	if (ret != NF_DROP && ret != NF_STOLEN 
 		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
 		    || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
 		    || (*pskb)->nfmark != nfmark
-		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) {
-
-		/* something which could affect routing has changed */
-
-		DEBUGP("ip6table_mangle: we'd need to re-route a packet\n");
-	}
+		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+		return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 61a7c58..b4154da 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -122,7 +122,7 @@
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL);
+	return ip6t_do_table(pskb, hook, in, out, &packet_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] = { 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c2ab38f..e5e53ff 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -335,7 +335,7 @@
 /* From nf_conntrack_proto_icmpv6.c */
 extern unsigned int nf_ct_icmpv6_timeout;
 
-/* From nf_conntrack_frag6.c */
+/* From nf_conntrack_reasm.c */
 extern unsigned int nf_ct_frag6_timeout;
 extern unsigned int nf_ct_frag6_low_thresh;
 extern unsigned int nf_ct_frag6_high_thresh;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ef18a7b..34d4472 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -33,7 +33,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 
-unsigned long nf_ct_icmpv6_timeout = 30*HZ;
+unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
 
 #if 0
 #define DEBUGP printk
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 00d5583..bf93c1e 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -54,9 +54,9 @@
 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
 
-unsigned int nf_ct_frag6_high_thresh = 256*1024;
-unsigned int nf_ct_frag6_low_thresh = 192*1024;
-unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
+unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
+unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
+unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
 
 struct nf_ct_frag6_skb_cb
 {
@@ -408,7 +408,7 @@
  		return -1;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
+ 	if (skb->ip_summed == CHECKSUM_COMPLETE)
  		skb->csum = csum_sub(skb->csum,
  				     csum_partial(skb->nh.raw,
 						  (u8*)(fhdr + 1) - skb->nh.raw,
@@ -640,7 +640,7 @@
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &nf_ct_frag6_mem);
@@ -652,7 +652,7 @@
 	head->nh.ipv6h->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
+	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
 
 	fq->fragments = NULL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 15b862d..d09329c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -50,6 +50,9 @@
 #include <net/udp.h>
 #include <net/inet_common.h>
 #include <net/tcp_states.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
 
 #include <net/rawv6.h>
 #include <net/xfrm.h>
@@ -169,8 +172,32 @@
 	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif);
 
 	while (sk) {
+		int filtered;
+
 		delivered = 1;
-		if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
+		switch (nexthdr) {
+		case IPPROTO_ICMPV6:
+			filtered = icmpv6_filter(sk, skb);
+			break;
+#ifdef CONFIG_IPV6_MIP6
+		case IPPROTO_MH:
+			/* XXX: To validate MH only once for each packet,
+			 * this is placed here. It should be after checking
+			 * xfrm policy, however it doesn't. The checking xfrm
+			 * policy is placed in rawv6_rcv() because it is
+			 * required for each socket.
+			 */
+			filtered = mip6_mh_filter(sk, skb);
+			break;
+#endif
+		default:
+			filtered = 0;
+			break;
+		}
+
+		if (filtered < 0)
+			break;
+		if (filtered == 0) {
 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
 			/* Not releasing hash table! */
@@ -334,7 +361,7 @@
 	if (!rp->checksum)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		skb_postpull_rcsum(skb, skb->nh.raw,
 		                   skb->h.raw - skb->nh.raw);
 		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -582,6 +609,9 @@
 	struct iovec *iov;
 	u8 __user *type = NULL;
 	u8 __user *code = NULL;
+#ifdef CONFIG_IPV6_MIP6
+	u8 len = 0;
+#endif
 	int probed = 0;
 	int i;
 
@@ -613,6 +643,20 @@
 				probed = 1;
 			}
 			break;
+#ifdef CONFIG_IPV6_MIP6
+		case IPPROTO_MH:
+			if (iov->iov_base && iov->iov_len < 1)
+				break;
+			/* check if type field is readable or not. */
+			if (iov->iov_len > 2 - len) {
+				u8 __user *p = iov->iov_base;
+				get_user(fl->fl_mh_type, &p[2 - len]);
+				probed = 1;
+			} else
+				len += iov->iov_len;
+
+			break;
+#endif
 		default:
 			probed = 1;
 			break;
@@ -759,6 +803,7 @@
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
+	security_sk_classify_flow(sk, &fl);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4e299c6..f39bbed 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -53,10 +53,10 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 
-int sysctl_ip6frag_high_thresh = 256*1024;
-int sysctl_ip6frag_low_thresh = 192*1024;
+int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
+int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
 
-int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
 
 struct ip6frag_skb_cb
 {
@@ -152,7 +152,7 @@
 }
 
 static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval = 10 * 60 * HZ;
+int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
 
 static void ip6_frag_secret_rebuild(unsigned long dummy)
 {
@@ -433,7 +433,7 @@
  		return;
 	}
 
- 	if (skb->ip_summed == CHECKSUM_HW)
+ 	if (skb->ip_summed == CHECKSUM_COMPLETE)
  		skb->csum = csum_sub(skb->csum,
  				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
 
@@ -647,7 +647,7 @@
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
 			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_HW)
+		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 		atomic_sub(fp->truesize, &ip6_frag_mem);
@@ -662,7 +662,7 @@
 	*skb_in = head;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_HW)
+	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
 
 	IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9baca0..d6b4b4f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -22,6 +22,8 @@
  *		routers in REACHABLE, STALE, DELAY or PROBE states).
  *		- always select the same router if it is (probably)
  *		reachable.  otherwise, round-robin the list.
+ *	Ville Nuorvala
+ *		Fixed routing subtrees.
  */
 
 #include <linux/capability.h>
@@ -35,7 +37,6 @@
 #include <linux/netdevice.h>
 #include <linux/in6.h>
 #include <linux/init.h>
-#include <linux/netlink.h>
 #include <linux/if_arp.h>
 
 #ifdef 	CONFIG_PROC_FS
@@ -54,6 +55,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/netevent.h>
+#include <net/netlink.h>
 
 #include <asm/uaccess.h>
 
@@ -74,9 +76,6 @@
 
 #define CLONE_OFFLINK_ROUTE 0
 
-#define RT6_SELECT_F_IFACE	0x1
-#define RT6_SELECT_F_REACHABLE	0x2
-
 static int ip6_rt_max_size = 4096;
 static int ip6_rt_gc_min_interval = HZ / 2;
 static int ip6_rt_gc_timeout = 60*HZ;
@@ -140,15 +139,49 @@
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-struct fib6_node ip6_routing_table = {
-	.leaf		= &ip6_null_entry,
-	.fn_flags	= RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+struct rt6_info ip6_prohibit_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -EACCES,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_prohibit_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-/* Protects all the ip6 fib */
+struct rt6_info ip6_blk_hole_entry = {
+	.u = {
+		.dst = {
+			.__refcnt	= ATOMIC_INIT(1),
+			.__use		= 1,
+			.dev		= &loopback_dev,
+			.obsolete	= -1,
+			.error		= -EINVAL,
+			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+			.input		= ip6_pkt_discard,
+			.output		= ip6_pkt_discard_out,
+			.ops		= &ip6_dst_ops,
+			.path		= (struct dst_entry*)&ip6_blk_hole_entry,
+		}
+	},
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_metric	= ~(u32) 0,
+	.rt6i_ref	= ATOMIC_INIT(1),
+};
 
-DEFINE_RWLOCK(rt6_lock);
-
+#endif
 
 /* allocate dst with ip6_dst_ops */
 static __inline__ struct rt6_info *ip6_dst_alloc(void)
@@ -188,8 +221,14 @@
 		time_after(jiffies, rt->rt6i_expires));
 }
 
+static inline int rt6_need_strict(struct in6_addr *daddr)
+{
+	return (ipv6_addr_type(daddr) &
+		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+}
+
 /*
- *	Route lookup. Any rt6_lock is implied.
+ *	Route lookup. Any table->tb6_lock is implied.
  */
 
 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
@@ -298,7 +337,7 @@
 	int m, n;
 		
 	m = rt6_check_dev(rt, oif);
-	if (!m && (strict & RT6_SELECT_F_IFACE))
+	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 		return -1;
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
@@ -306,7 +345,7 @@
 	n = rt6_check_neigh(rt);
 	if (n > 1)
 		m |= 16;
-	else if (!n && strict & RT6_SELECT_F_REACHABLE)
+	else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
 		return -1;
 	return m;
 }
@@ -346,7 +385,7 @@
 	}
 
 	if (!match &&
-	    (strict & RT6_SELECT_F_REACHABLE) &&
+	    (strict & RT6_LOOKUP_F_REACHABLE) &&
 	    last && last != rt0) {
 		/* no entries matched; do round-robin */
 		static DEFINE_SPINLOCK(lock);
@@ -417,7 +456,7 @@
 	rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
 
 	if (rt && !lifetime) {
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		rt = NULL;
 	}
 
@@ -441,44 +480,95 @@
 }
 #endif
 
-struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
-			    int oif, int strict)
+#define BACKTRACK(saddr) \
+do { \
+	if (rt == &ip6_null_entry) { \
+		struct fib6_node *pn; \
+		while (fn) { \
+			if (fn->fn_flags & RTN_TL_ROOT) \
+				goto out; \
+			pn = fn->parent; \
+			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+				fn = fib6_lookup(pn->subtree, NULL, saddr); \
+			else \
+				fn = pn; \
+			if (fn->fn_flags & RTN_RTINFO) \
+				goto restart; \
+		} \
+	} \
+} while(0)
+
+static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
+					     struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
-	rt = rt6_device_match(fn->leaf, oif, strict);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+restart:
+	rt = fn->leaf;
+	rt = rt6_device_match(rt, fl->oif, flags);
+	BACKTRACK(&fl->fl6_src);
+out:
 	dst_hold(&rt->u.dst);
-	rt->u.dst.__use++;
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	rt->u.dst.lastuse = jiffies;
-	if (rt->u.dst.error == 0)
-		return rt;
-	dst_release(&rt->u.dst);
+	rt->u.dst.__use++;
+
+	return rt;
+
+}
+
+struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
+			    int oif, int strict)
+{
+	struct flowi fl = {
+		.oif = oif,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *daddr,
+				/* TODO: saddr */
+			},
+		},
+	};
+	struct dst_entry *dst;
+	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
+
+	dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
+	if (dst->error == 0)
+		return (struct rt6_info *) dst;
+
+	dst_release(dst);
+
 	return NULL;
 }
 
-/* ip6_ins_rt is called with FREE rt6_lock.
+/* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
    be destroyed.
  */
 
-int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
-		void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
-	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
-	write_unlock_bh(&rt6_lock);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
+	err = fib6_add(&table->tb6_root, rt, info);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
+int ip6_ins_rt(struct rt6_info *rt)
+{
+	return __ip6_ins_rt(rt, NULL);
+}
+
 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
 				      struct in6_addr *saddr)
 {
@@ -532,116 +622,33 @@
 	return rt;
 }
 
-#define BACKTRACK() \
-if (rt == &ip6_null_entry) { \
-       while ((fn = fn->parent) != NULL) { \
-		if (fn->fn_flags & RTN_ROOT) { \
-			goto out; \
-		} \
-		if (fn->fn_flags & RTN_RTINFO) \
-			goto restart; \
-	} \
-}
-
-
-void ip6_route_input(struct sk_buff *skb)
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+					    struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt, *nrt;
-	int strict;
+	int strict = 0;
 	int attempts = 3;
 	int err;
-	int reachable = RT6_SELECT_F_REACHABLE;
+	int reachable = RT6_LOOKUP_F_REACHABLE;
 
-	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
+	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 relookup:
-	read_lock_bh(&rt6_lock);
+	read_lock_bh(&table->tb6_lock);
 
 restart_2:
-	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
-			 &skb->nh.ipv6h->saddr);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
-	BACKTRACK();
+	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
+	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
-
-	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
-	else {
-#if CLONE_OFFLINK_ROUTE
-		nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
-#else
-		goto out2;
-#endif
-	}
-
-	dst_release(&rt->u.dst);
-	rt = nrt ? : &ip6_null_entry;
-
-	dst_hold(&rt->u.dst);
-	if (nrt) {
-		err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
-		if (!err)
-			goto out2;
-	}
-
-	if (--attempts <= 0)
-		goto out2;
-
-	/*
-	 * Race condition! In the gap, when rt6_lock was
-	 * released someone could insert this route.  Relookup.
-	 */
-	dst_release(&rt->u.dst);
-	goto relookup;
-
-out:
-	if (reachable) {
-		reachable = 0;
-		goto restart_2;
-	}
-	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
-out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
-	skb->dst = (struct dst_entry *) rt;
-	return;
-}
-
-struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
-{
-	struct fib6_node *fn;
-	struct rt6_info *rt, *nrt;
-	int strict;
-	int attempts = 3;
-	int err;
-	int reachable = RT6_SELECT_F_REACHABLE;
-
-	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
-
-relookup:
-	read_lock_bh(&rt6_lock);
-
-restart_2:
-	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
-
-restart:
-	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
-	BACKTRACK();
-	if (rt == &ip6_null_entry ||
-	    rt->rt6i_flags & RTF_CACHE)
-		goto out;
-
-	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
@@ -658,7 +665,7 @@
 
 	dst_hold(&rt->u.dst);
 	if (nrt) {
-		err = ip6_ins_rt(nrt, NULL, NULL, NULL);
+		err = ip6_ins_rt(nrt);
 		if (!err)
 			goto out2;
 	}
@@ -667,7 +674,7 @@
 		goto out2;
 
 	/*
-	 * Race condition! In the gap, when rt6_lock was
+	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
 	dst_release(&rt->u.dst);
@@ -679,11 +686,115 @@
 		goto restart_2;
 	}
 	dst_hold(&rt->u.dst);
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 out2:
 	rt->u.dst.lastuse = jiffies;
 	rt->u.dst.__use++;
-	return &rt->u.dst;
+
+	return rt;
+}
+
+void ip6_route_input(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct flowi fl = {
+		.iif = skb->dev->ifindex,
+		.nl_u = {
+			.ip6_u = {
+				.daddr = iph->daddr,
+				.saddr = iph->saddr,
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+				.fwmark = skb->nfmark,
+#endif
+				.flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+			},
+		},
+		.proto = iph->nexthdr,
+	};
+	int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
+
+	skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
+}
+
+static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
+					     struct flowi *fl, int flags)
+{
+	struct fib6_node *fn;
+	struct rt6_info *rt, *nrt;
+	int strict = 0;
+	int attempts = 3;
+	int err;
+	int reachable = RT6_LOOKUP_F_REACHABLE;
+
+	strict |= flags & RT6_LOOKUP_F_IFACE;
+
+relookup:
+	read_lock_bh(&table->tb6_lock);
+
+restart_2:
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+
+restart:
+	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
+	BACKTRACK(&fl->fl6_src);
+	if (rt == &ip6_null_entry ||
+	    rt->rt6i_flags & RTF_CACHE)
+		goto out;
+
+	dst_hold(&rt->u.dst);
+	read_unlock_bh(&table->tb6_lock);
+
+	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
+		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+	else {
+#if CLONE_OFFLINK_ROUTE
+		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+#else
+		goto out2;
+#endif
+	}
+
+	dst_release(&rt->u.dst);
+	rt = nrt ? : &ip6_null_entry;
+
+	dst_hold(&rt->u.dst);
+	if (nrt) {
+		err = ip6_ins_rt(nrt);
+		if (!err)
+			goto out2;
+	}
+
+	if (--attempts <= 0)
+		goto out2;
+
+	/*
+	 * Race condition! In the gap, when table->tb6_lock was
+	 * released someone could insert this route.  Relookup.
+	 */
+	dst_release(&rt->u.dst);
+	goto relookup;
+
+out:
+	if (reachable) {
+		reachable = 0;
+		goto restart_2;
+	}
+	dst_hold(&rt->u.dst);
+	read_unlock_bh(&table->tb6_lock);
+out2:
+	rt->u.dst.lastuse = jiffies;
+	rt->u.dst.__use++;
+	return rt;
+}
+
+struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
+{
+	int flags = 0;
+
+	if (rt6_need_strict(&fl->fl6_dst))
+		flags |= RT6_LOOKUP_F_IFACE;
+
+	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
 
 
@@ -709,7 +820,7 @@
 
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE)
-			ip6_del_rt(rt, NULL, NULL, NULL);
+			ip6_del_rt(rt);
 		else
 			dst_release(dst);
 	}
@@ -747,8 +858,6 @@
 	}
 }
 
-/* Protected by rt6_lock.  */
-static struct dst_entry *ndisc_dst_gc_list;
 static int ipv6_get_mtu(struct net_device *dev);
 
 static inline unsigned int ipv6_advmss(unsigned int mtu)
@@ -769,6 +878,9 @@
 	return mtu;
 }
 
+static struct dst_entry *ndisc_dst_gc_list;
+static DEFINE_SPINLOCK(ndisc_lock);
+
 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
 				  struct neighbour *neigh,
 				  struct in6_addr *addr,
@@ -809,10 +921,10 @@
 	rt->rt6i_dst.plen = 128;
 #endif
 
-	write_lock_bh(&rt6_lock);
+	spin_lock_bh(&ndisc_lock);
 	rt->u.dst.next = ndisc_dst_gc_list;
 	ndisc_dst_gc_list = &rt->u.dst;
-	write_unlock_bh(&rt6_lock);
+	spin_unlock_bh(&ndisc_lock);
 
 	fib6_force_start_gc();
 
@@ -826,8 +938,11 @@
 	int freed;
 
 	next = NULL;
+ 	freed = 0;
+
+	spin_lock_bh(&ndisc_lock);
 	pprev = &ndisc_dst_gc_list;
-	freed = 0;
+
 	while ((dst = *pprev) != NULL) {
 		if (!atomic_read(&dst->__refcnt)) {
 			*pprev = dst->next;
@@ -839,6 +954,8 @@
 		}
 	}
 
+	spin_unlock_bh(&ndisc_lock);
+
 	return freed;
 }
 
@@ -899,28 +1016,24 @@
  *
  */
 
-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
-		void *_rtattr, struct netlink_skb_parms *req)
+int ip6_route_add(struct fib6_config *cfg)
 {
 	int err;
-	struct rtmsg *r;
-	struct rtattr **rta;
 	struct rt6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
+	struct fib6_table *table;
 	int addr_type;
 
-	rta = (struct rtattr **) _rtattr;
-
-	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
 		return -EINVAL;
 #ifndef CONFIG_IPV6_SUBTREES
-	if (rtmsg->rtmsg_src_len)
+	if (cfg->fc_src_len)
 		return -EINVAL;
 #endif
-	if (rtmsg->rtmsg_ifindex) {
+	if (cfg->fc_ifindex) {
 		err = -ENODEV;
-		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
+		dev = dev_get_by_index(cfg->fc_ifindex);
 		if (!dev)
 			goto out;
 		idev = in6_dev_get(dev);
@@ -928,8 +1041,14 @@
 			goto out;
 	}
 
-	if (rtmsg->rtmsg_metric == 0)
-		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+	if (cfg->fc_metric == 0)
+		cfg->fc_metric = IP6_RT_PRIO_USER;
+
+	table = fib6_new_table(cfg->fc_table);
+	if (table == NULL) {
+		err = -ENOBUFS;
+		goto out;
+	}
 
 	rt = ip6_dst_alloc();
 
@@ -939,14 +1058,13 @@
 	}
 
 	rt->u.dst.obsolete = -1;
-	rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
-	if (nlh && (r = NLMSG_DATA(nlh))) {
-		rt->rt6i_protocol = r->rtm_protocol;
-	} else {
-		rt->rt6i_protocol = RTPROT_BOOT;
-	}
+	rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
 
-	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
+	if (cfg->fc_protocol == RTPROT_UNSPEC)
+		cfg->fc_protocol = RTPROT_BOOT;
+	rt->rt6i_protocol = cfg->fc_protocol;
+
+	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
 		rt->u.dst.input = ip6_mc_input;
@@ -955,24 +1073,22 @@
 
 	rt->u.dst.output = ip6_output;
 
-	ipv6_addr_prefix(&rt->rt6i_dst.addr, 
-			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
-	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
+	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
+	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
 	       rt->u.dst.flags = DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
-	ipv6_addr_prefix(&rt->rt6i_src.addr, 
-			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
-	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
+	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
+	rt->rt6i_src.plen = cfg->fc_src_len;
 #endif
 
-	rt->rt6i_metric = rtmsg->rtmsg_metric;
+	rt->rt6i_metric = cfg->fc_metric;
 
 	/* We cannot add true routes via loopback here,
 	   they would result in kernel looping; promote them to reject routes
 	 */
-	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
+	if ((cfg->fc_flags & RTF_REJECT) ||
 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
 		/* hold loopback dev/idev if we haven't done so. */
 		if (dev != &loopback_dev) {
@@ -995,12 +1111,12 @@
 		goto install_route;
 	}
 
-	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
+	if (cfg->fc_flags & RTF_GATEWAY) {
 		struct in6_addr *gw_addr;
 		int gwa_type;
 
-		gw_addr = &rtmsg->rtmsg_gateway;
-		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
+		gw_addr = &cfg->fc_gateway;
+		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
 		gwa_type = ipv6_addr_type(gw_addr);
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1017,7 +1133,7 @@
 			if (!(gwa_type&IPV6_ADDR_UNICAST))
 				goto out;
 
-			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
+			grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
 
 			err = -EHOSTUNREACH;
 			if (grt == NULL)
@@ -1049,7 +1165,7 @@
 	if (dev == NULL)
 		goto out;
 
-	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
+	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
 		if (IS_ERR(rt->rt6i_nexthop)) {
 			err = PTR_ERR(rt->rt6i_nexthop);
@@ -1058,24 +1174,24 @@
 		}
 	}
 
-	rt->rt6i_flags = rtmsg->rtmsg_flags;
+	rt->rt6i_flags = cfg->fc_flags;
 
 install_route:
-	if (rta && rta[RTA_METRICS-1]) {
-		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
-		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
+	if (cfg->fc_mx) {
+		struct nlattr *nla;
+		int remaining;
 
-		while (RTA_OK(attr, attrlen)) {
-			unsigned flavor = attr->rta_type;
-			if (flavor) {
-				if (flavor > RTAX_MAX) {
+		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+			int type = nla->nla_type;
+
+			if (type) {
+				if (type > RTAX_MAX) {
 					err = -EINVAL;
 					goto out;
 				}
-				rt->u.dst.metrics[flavor-1] =
-					*(u32 *)RTA_DATA(attr);
+
+				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
 			}
-			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
 
@@ -1087,7 +1203,8 @@
 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
 	rt->u.dst.dev = dev;
 	rt->rt6i_idev = idev;
-	return ip6_ins_rt(rt, nlh, _rtattr, req);
+	rt->rt6i_table = table;
+	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
 out:
 	if (dev)
@@ -1099,51 +1216,65 @@
 	return err;
 }
 
-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 {
 	int err;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
+	if (rt == &ip6_null_entry)
+		return -ENOENT;
 
-	err = fib6_del(rt, nlh, _rtattr, req);
+	table = rt->rt6i_table;
+	write_lock_bh(&table->tb6_lock);
+
+	err = fib6_del(rt, info);
 	dst_release(&rt->u.dst);
 
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
 
-static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
+int ip6_del_rt(struct rt6_info *rt)
 {
+	return __ip6_del_rt(rt, NULL);
+}
+
+static int ip6_route_del(struct fib6_config *cfg)
+{
+	struct fib6_table *table;
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 	int err = -ESRCH;
 
-	read_lock_bh(&rt6_lock);
+	table = fib6_get_table(cfg->fc_table);
+	if (table == NULL)
+		return err;
 
-	fn = fib6_locate(&ip6_routing_table,
-			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
-			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
+	read_lock_bh(&table->tb6_lock);
+
+	fn = fib6_locate(&table->tb6_root,
+			 &cfg->fc_dst, cfg->fc_dst_len,
+			 &cfg->fc_src, cfg->fc_src_len);
 	
 	if (fn) {
 		for (rt = fn->leaf; rt; rt = rt->u.next) {
-			if (rtmsg->rtmsg_ifindex &&
+			if (cfg->fc_ifindex &&
 			    (rt->rt6i_dev == NULL ||
-			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
+			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
 				continue;
-			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
-			    !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
+			if (cfg->fc_flags & RTF_GATEWAY &&
+			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
 				continue;
-			if (rtmsg->rtmsg_metric &&
-			    rtmsg->rtmsg_metric != rt->rt6i_metric)
+			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
 			dst_hold(&rt->u.dst);
-			read_unlock_bh(&rt6_lock);
+			read_unlock_bh(&table->tb6_lock);
 
-			return ip6_del_rt(rt, nlh, _rtattr, req);
+			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
@@ -1151,13 +1282,18 @@
 /*
  *	Handle redirects
  */
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
-		  struct neighbour *neigh, u8 *lladdr, int on_link)
+struct ip6rd_flowi {
+	struct flowi fl;
+	struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
+					     struct flowi *fl,
+					     int flags)
 {
-	struct rt6_info *rt, *nrt = NULL;
-	int strict;
+	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+	struct rt6_info *rt;
 	struct fib6_node *fn;
-	struct netevent_redirect netevent;
 
 	/*
 	 * Get the "current" route for this destination and
@@ -1169,10 +1305,9 @@
 	 * is a bit fuzzy and one might need to check all possible
 	 * routes.
 	 */
-	strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
 
-	read_lock_bh(&rt6_lock);
-	fn = fib6_lookup(&ip6_routing_table, dest, NULL);
+	read_lock_bh(&table->tb6_lock);
+	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
 	for (rt = fn->leaf; rt; rt = rt->u.next) {
 		/*
@@ -1187,29 +1322,60 @@
 			continue;
 		if (!(rt->rt6i_flags & RTF_GATEWAY))
 			continue;
-		if (neigh->dev != rt->rt6i_dev)
+		if (fl->oif != rt->rt6i_dev->ifindex)
 			continue;
-		if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
 			continue;
 		break;
 	}
-	if (rt)
-		dst_hold(&rt->u.dst);
-	else if (strict) {
-		while ((fn = fn->parent) != NULL) {
-			if (fn->fn_flags & RTN_ROOT)
-				break;
-			if (fn->fn_flags & RTN_RTINFO)
-				goto restart;
-		}
-	}
-	read_unlock_bh(&rt6_lock);
 
-	if (!rt) {
+	if (!rt)
+		rt = &ip6_null_entry;
+	BACKTRACK(&fl->fl6_src);
+out:
+	dst_hold(&rt->u.dst);
+
+	read_unlock_bh(&table->tb6_lock);
+
+	return rt;
+};
+
+static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
+					   struct in6_addr *src,
+					   struct in6_addr *gateway,
+					   struct net_device *dev)
+{
+	struct ip6rd_flowi rdfl = {
+		.fl = {
+			.oif = dev->ifindex,
+			.nl_u = {
+				.ip6_u = {
+					.daddr = *dest,
+					.saddr = *src,
+				},
+			},
+		},
+		.gateway = *gateway,
+	};
+	int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
+
+	return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
+}
+
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
+		  struct in6_addr *saddr,
+		  struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+	struct rt6_info *rt, *nrt = NULL;
+	struct netevent_redirect netevent;
+
+	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+
+	if (rt == &ip6_null_entry) {
 		if (net_ratelimit())
 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
 			       "for redirect target\n");
-		return;
+		goto out;
 	}
 
 	/*
@@ -1252,7 +1418,7 @@
 	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
 	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
 
-	if (ip6_ins_rt(nrt, NULL, NULL, NULL))
+	if (ip6_ins_rt(nrt))
 		goto out;
 
 	netevent.old = &rt->u.dst;
@@ -1260,7 +1426,7 @@
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags&RTF_CACHE) {
-		ip6_del_rt(rt, NULL, NULL, NULL);
+		ip6_del_rt(rt);
 		return;
 	}
 
@@ -1342,7 +1508,7 @@
 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
-		ip6_ins_rt(nrt, NULL, NULL, NULL);
+		ip6_ins_rt(nrt);
 	}
 out:
 	dst_release(&rt->u.dst);
@@ -1378,6 +1544,7 @@
 #ifdef CONFIG_IPV6_SUBTREES
 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 #endif
+		rt->rt6i_table = ort->rt6i_table;
 	}
 	return rt;
 }
@@ -1388,9 +1555,14 @@
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
+	struct fib6_table *table;
 
-	write_lock_bh(&rt6_lock);
-	fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
+	table = fib6_get_table(RT6_TABLE_INFO);
+	if (table == NULL)
+		return NULL;
+
+	write_lock_bh(&table->tb6_lock);
+	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
 	if (!fn)
 		goto out;
 
@@ -1405,7 +1577,7 @@
 		break;
 	}
 out:
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1413,21 +1585,23 @@
 					   struct in6_addr *gwaddr, int ifindex,
 					   unsigned pref)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table	= RT6_TABLE_INFO,
+		.fc_metric	= 1024,
+		.fc_ifindex	= ifindex,
+		.fc_dst_len	= prefixlen,
+		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
+				  RTF_UP | RTF_PREF(pref),
+	};
 
-	memset(&rtmsg, 0, sizeof(rtmsg));
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
-	rtmsg.rtmsg_dst_len = prefixlen;
-	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
-	rtmsg.rtmsg_metric = 1024;
-	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
+	ipv6_addr_copy(&cfg.fc_dst, prefix);
+	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+
 	/* We should treat it as a default route if prefix length is 0. */
 	if (!prefixlen)
-		rtmsg.rtmsg_flags |= RTF_DEFAULT;
-	rtmsg.rtmsg_ifindex = ifindex;
+		cfg.fc_flags |= RTF_DEFAULT;
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
+	ip6_route_add(&cfg);
 
 	return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
 }
@@ -1436,12 +1610,14 @@
 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 {	
 	struct rt6_info *rt;
-	struct fib6_node *fn;
+	struct fib6_table *table;
 
-	fn = &ip6_routing_table;
+	table = fib6_get_table(RT6_TABLE_DFLT);
+	if (table == NULL)
+		return NULL;
 
-	write_lock_bh(&rt6_lock);
-	for (rt = fn->leaf; rt; rt=rt->u.next) {
+	write_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
@@ -1449,7 +1625,7 @@
 	}
 	if (rt)
 		dst_hold(&rt->u.dst);
-	write_unlock_bh(&rt6_lock);
+	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1457,43 +1633,65 @@
 				     struct net_device *dev,
 				     unsigned int pref)
 {
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg = {
+		.fc_table	= RT6_TABLE_DFLT,
+		.fc_metric	= 1024,
+		.fc_ifindex	= dev->ifindex,
+		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
+				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+	};
 
-	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
-	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
-	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
-	rtmsg.rtmsg_metric = 1024;
-	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
-			    RTF_PREF(pref);
+	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
-	rtmsg.rtmsg_ifindex = dev->ifindex;
+	ip6_route_add(&cfg);
 
-	ip6_route_add(&rtmsg, NULL, NULL, NULL);
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
 void rt6_purge_dflt_routers(void)
 {
 	struct rt6_info *rt;
+	struct fib6_table *table;
+
+	/* NOTE: Keep consistent with rt6_get_dflt_router */
+	table = fib6_get_table(RT6_TABLE_DFLT);
+	if (table == NULL)
+		return;
 
 restart:
-	read_lock_bh(&rt6_lock);
-	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
+	read_lock_bh(&table->tb6_lock);
+	for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 			dst_hold(&rt->u.dst);
-
-			read_unlock_bh(&rt6_lock);
-
-			ip6_del_rt(rt, NULL, NULL, NULL);
-
+			read_unlock_bh(&table->tb6_lock);
+			ip6_del_rt(rt);
 			goto restart;
 		}
 	}
-	read_unlock_bh(&rt6_lock);
+	read_unlock_bh(&table->tb6_lock);
+}
+
+static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
+				 struct fib6_config *cfg)
+{
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->fc_table = RT6_TABLE_MAIN;
+	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
+	cfg->fc_metric = rtmsg->rtmsg_metric;
+	cfg->fc_expires = rtmsg->rtmsg_info;
+	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
+	cfg->fc_src_len = rtmsg->rtmsg_src_len;
+	cfg->fc_flags = rtmsg->rtmsg_flags;
+
+	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
+	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
+	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
 }
 
 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 {
+	struct fib6_config cfg;
 	struct in6_rtmsg rtmsg;
 	int err;
 
@@ -1506,14 +1704,16 @@
 				     sizeof(struct in6_rtmsg));
 		if (err)
 			return -EFAULT;
-			
+
+		rtmsg_to_fib6_config(&rtmsg, &cfg);
+
 		rtnl_lock();
 		switch (cmd) {
 		case SIOCADDRT:
-			err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_add(&cfg);
 			break;
 		case SIOCDELRT:
-			err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
+			err = ip6_route_del(&cfg);
 			break;
 		default:
 			err = -EINVAL;
@@ -1587,6 +1787,7 @@
 
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 	rt->rt6i_dst.plen = 128;
+	rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
 
 	atomic_set(&rt->u.dst.__refcnt, 1);
 
@@ -1605,9 +1806,7 @@
 
 void rt6_ifdown(struct net_device *dev)
 {
-	write_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
-	write_unlock_bh(&rt6_lock);
+	fib6_clean_all(fib6_ifdown, 0, dev);
 }
 
 struct rt6_mtu_change_arg
@@ -1657,90 +1856,124 @@
 
 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 {
-	struct rt6_mtu_change_arg arg;
+	struct rt6_mtu_change_arg arg = {
+		.dev = dev,
+		.mtu = mtu,
+	};
 
-	arg.dev = dev;
-	arg.mtu = mtu;
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 }
 
-static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
-			      struct in6_rtmsg *rtmsg)
+static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
+	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
+	[RTA_OIF]               = { .type = NLA_U32 },
+	[RTA_IIF]		= { .type = NLA_U32 },
+	[RTA_PRIORITY]          = { .type = NLA_U32 },
+	[RTA_METRICS]           = { .type = NLA_NESTED },
+};
+
+static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct fib6_config *cfg)
 {
-	memset(rtmsg, 0, sizeof(*rtmsg));
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
+	int err;
 
-	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
-	rtmsg->rtmsg_src_len = r->rtm_src_len;
-	rtmsg->rtmsg_flags = RTF_UP;
-	if (r->rtm_type == RTN_UNREACHABLE)
-		rtmsg->rtmsg_flags |= RTF_REJECT;
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+	if (err < 0)
+		goto errout;
 
-	if (rta[RTA_GATEWAY-1]) {
-		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
-		rtmsg->rtmsg_flags |= RTF_GATEWAY;
+	err = -EINVAL;
+	rtm = nlmsg_data(nlh);
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->fc_table = rtm->rtm_table;
+	cfg->fc_dst_len = rtm->rtm_dst_len;
+	cfg->fc_src_len = rtm->rtm_src_len;
+	cfg->fc_flags = RTF_UP;
+	cfg->fc_protocol = rtm->rtm_protocol;
+
+	if (rtm->rtm_type == RTN_UNREACHABLE)
+		cfg->fc_flags |= RTF_REJECT;
+
+	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.nlh = nlh;
+
+	if (tb[RTA_GATEWAY]) {
+		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+		cfg->fc_flags |= RTF_GATEWAY;
 	}
-	if (rta[RTA_DST-1]) {
-		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
+
+	if (tb[RTA_DST]) {
+		int plen = (rtm->rtm_dst_len + 7) >> 3;
+
+		if (nla_len(tb[RTA_DST]) < plen)
+			goto errout;
+
+		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
 	}
-	if (rta[RTA_SRC-1]) {
-		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
+
+	if (tb[RTA_SRC]) {
+		int plen = (rtm->rtm_src_len + 7) >> 3;
+
+		if (nla_len(tb[RTA_SRC]) < plen)
+			goto errout;
+
+		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
 	}
-	if (rta[RTA_OIF-1]) {
-		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+
+	if (tb[RTA_OIF])
+		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
+
+	if (tb[RTA_PRIORITY])
+		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
+
+	if (tb[RTA_METRICS]) {
+		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
+		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
 	}
-	if (rta[RTA_PRIORITY-1]) {
-		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
-			return -EINVAL;
-		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
-	}
-	return 0;
+
+	if (tb[RTA_TABLE])
+		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
+
+	err = 0;
+errout:
+	return err;
 }
 
 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtmsg *r = NLMSG_DATA(nlh);
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg;
+	int err;
 
-	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-		return -EINVAL;
-	return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return ip6_route_del(&cfg);
 }
 
 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtmsg *r = NLMSG_DATA(nlh);
-	struct in6_rtmsg rtmsg;
+	struct fib6_config cfg;
+	int err;
 
-	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
-		return -EINVAL;
-	return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
+	err = rtm_to_fib6_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return ip6_route_add(&cfg);
 }
 
-struct rt6_rtnl_dump_arg
-{
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-};
-
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 pid, u32 seq,
 			 int prefix, unsigned int flags)
 {
 	struct rtmsg *rtm;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	struct nlmsghdr *nlh;
 	struct rta_cacheinfo ci;
+	u32 table;
 
 	if (prefix) {	/* user wants prefix routes only */
 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -1749,13 +1982,21 @@
 		}
 	}
 
-	nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
-	rtm = NLMSG_DATA(nlh);
+	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -ENOBUFS;
+
+	rtm = nlmsg_data(nlh);
 	rtm->rtm_family = AF_INET6;
 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
 	rtm->rtm_src_len = rt->rt6i_src.plen;
 	rtm->rtm_tos = 0;
-	rtm->rtm_table = RT_TABLE_MAIN;
+	if (rt->rt6i_table)
+		table = rt->rt6i_table->tb6_id;
+	else
+		table = RT6_TABLE_UNSPEC;
+	rtm->rtm_table = table;
+	NLA_PUT_U32(skb, RTA_TABLE, table);
 	if (rt->rt6i_flags&RTF_REJECT)
 		rtm->rtm_type = RTN_UNREACHABLE;
 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
@@ -1776,31 +2017,35 @@
 		rtm->rtm_flags |= RTM_F_CLONED;
 
 	if (dst) {
-		RTA_PUT(skb, RTA_DST, 16, dst);
+		NLA_PUT(skb, RTA_DST, 16, dst);
 	        rtm->rtm_dst_len = 128;
 	} else if (rtm->rtm_dst_len)
-		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
+		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src) {
-		RTA_PUT(skb, RTA_SRC, 16, src);
+		NLA_PUT(skb, RTA_SRC, 16, src);
 	        rtm->rtm_src_len = 128;
 	} else if (rtm->rtm_src_len)
-		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
+		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 #endif
 	if (iif)
-		RTA_PUT(skb, RTA_IIF, 4, &iif);
+		NLA_PUT_U32(skb, RTA_IIF, iif);
 	else if (dst) {
 		struct in6_addr saddr_buf;
 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
-			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
+
 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
-		goto rtattr_failure;
+		goto nla_put_failure;
+
 	if (rt->u.dst.neighbour)
-		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+
 	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
-	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
+		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
+
+	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 	ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
 	if (rt->rt6i_expires)
 		ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
@@ -1812,23 +2057,21 @@
 	ci.rta_id = 0;
 	ci.rta_ts = 0;
 	ci.rta_tsage = 0;
-	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
+	NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
 
-nlmsg_failure:
-rtattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	return nlmsg_cancel(skb, nlh);
 }
 
-static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
+int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 {
 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 	int prefix;
 
-	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
-		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
+	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
+		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
 	} else
 		prefix = 0;
@@ -1838,108 +2081,56 @@
 		     prefix, NLM_F_MULTI);
 }
 
-static int fib6_dump_node(struct fib6_walker_t *w)
-{
-	int res;
-	struct rt6_info *rt;
-
-	for (rt = w->leaf; rt; rt = rt->u.next) {
-		res = rt6_dump_route(rt, w->args);
-		if (res < 0) {
-			/* Frame is full, suspend walking */
-			w->leaf = rt;
-			return 1;
-		}
-		BUG_TRAP(res!=0);
-	}
-	w->leaf = NULL;
-	return 0;
-}
-
-static void fib6_dump_end(struct netlink_callback *cb)
-{
-	struct fib6_walker_t *w = (void*)cb->args[0];
-
-	if (w) {
-		cb->args[0] = 0;
-		fib6_walker_unlink(w);
-		kfree(w);
-	}
-	cb->done = (void*)cb->args[1];
-	cb->args[1] = 0;
-}
-
-static int fib6_dump_done(struct netlink_callback *cb)
-{
-	fib6_dump_end(cb);
-	return cb->done ? cb->done(cb) : 0;
-}
-
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct rt6_rtnl_dump_arg arg;
-	struct fib6_walker_t *w;
-	int res;
-
-	arg.skb = skb;
-	arg.cb = cb;
-
-	w = (void*)cb->args[0];
-	if (w == NULL) {
-		/* New dump:
-		 * 
-		 * 1. hook callback destructor.
-		 */
-		cb->args[1] = (long)cb->done;
-		cb->done = fib6_dump_done;
-
-		/*
-		 * 2. allocate and initialize walker.
-		 */
-		w = kzalloc(sizeof(*w), GFP_ATOMIC);
-		if (w == NULL)
-			return -ENOMEM;
-		RT6_TRACE("dump<%p", w);
-		w->root = &ip6_routing_table;
-		w->func = fib6_dump_node;
-		w->args = &arg;
-		cb->args[0] = (long)w;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk(w);
-		read_unlock_bh(&rt6_lock);
-	} else {
-		w->args = &arg;
-		read_lock_bh(&rt6_lock);
-		res = fib6_walk_continue(w);
-		read_unlock_bh(&rt6_lock);
-	}
-#if RT6_DEBUG >= 3
-	if (res <= 0 && skb->len == 0)
-		RT6_TRACE("%p>dump end\n", w);
-#endif
-	res = res < 0 ? res : skb->len;
-	/* res < 0 is an error. (really, impossible)
-	   res == 0 means that dump is complete, but skb still can contain data.
-	   res > 0 dump is not complete, but frame is full.
-	 */
-	/* Destroy walker, if dump of this table is complete. */
-	if (res <= 0)
-		fib6_dump_end(cb);
-	return res;
-}
-
 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
-	struct rtattr **rta = arg;
-	int iif = 0;
-	int err = -ENOBUFS;
-	struct sk_buff *skb;
-	struct flowi fl;
+	struct nlattr *tb[RTA_MAX+1];
 	struct rt6_info *rt;
+	struct sk_buff *skb;
+	struct rtmsg *rtm;
+	struct flowi fl;
+	int err, iif = 0;
+
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	memset(&fl, 0, sizeof(fl));
+
+	if (tb[RTA_SRC]) {
+		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+			goto errout;
+
+		ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+	}
+
+	if (tb[RTA_DST]) {
+		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+			goto errout;
+
+		ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+	}
+
+	if (tb[RTA_IIF])
+		iif = nla_get_u32(tb[RTA_IIF]);
+
+	if (tb[RTA_OIF])
+		fl.oif = nla_get_u32(tb[RTA_OIF]);
+
+	if (iif) {
+		struct net_device *dev;
+		dev = __dev_get_by_index(iif);
+		if (!dev) {
+			err = -ENODEV;
+			goto errout;
+		}
+	}
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (skb == NULL)
-		goto out;
+	if (skb == NULL) {
+		err = -ENOBUFS;
+		goto errout;
+	}
 
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
@@ -1947,80 +2138,51 @@
 	skb->mac.raw = skb->data;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
-	memset(&fl, 0, sizeof(fl));
-	if (rta[RTA_SRC-1])
-		ipv6_addr_copy(&fl.fl6_src,
-			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
-	if (rta[RTA_DST-1])
-		ipv6_addr_copy(&fl.fl6_dst,
-			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
-
-	if (rta[RTA_IIF-1])
-		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
-
-	if (iif) {
-		struct net_device *dev;
-		dev = __dev_get_by_index(iif);
-		if (!dev) {
-			err = -ENODEV;
-			goto out_free;
-		}
-	}
-
-	fl.oif = 0;
-	if (rta[RTA_OIF-1])
-		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
-
-	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
-
+	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
 	skb->dst = &rt->u.dst;
 
-	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
-	err = rt6_fill_node(skb, rt, 
-			    &fl.fl6_dst, &fl.fl6_src,
-			    iif,
+	err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
 			    nlh->nlmsg_seq, 0, 0);
 	if (err < 0) {
-		err = -EMSGSIZE;
-		goto out_free;
+		kfree_skb(skb);
+		goto errout;
 	}
 
-	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-out:
+	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
+errout:
 	return err;
-out_free:
-	kfree_skb(skb);
-	goto out;	
 }
 
-void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
-			struct netlink_skb_parms *req)
+void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 {
 	struct sk_buff *skb;
-	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-	u32 pid = current->pid;
-	u32 seq = 0;
+	u32 pid = 0, seq = 0;
+	struct nlmsghdr *nlh = NULL;
+	int payload = sizeof(struct rtmsg) + 256;
+	int err = -ENOBUFS;
 
-	if (req)
-		pid = req->pid;
-	if (nlh)
-		seq = nlh->nlmsg_seq;
-	
-	skb = alloc_skb(size, gfp_any());
-	if (!skb) {
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
-		return;
+	if (info) {
+		pid = info->pid;
+		nlh = info->nlh;
+		if (nlh)
+			seq = nlh->nlmsg_seq;
 	}
-	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
+
+	skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+	if (skb == NULL)
+		goto errout;
+
+	err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
+	if (err < 0) {
 		kfree_skb(skb);
-		netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
-		return;
+		goto errout;
 	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
+
+	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
 }
 
 /*
@@ -2096,16 +2258,13 @@
 
 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
 {
-	struct rt6_proc_arg arg;
-	arg.buffer = buffer;
-	arg.offset = offset;
-	arg.length = length;
-	arg.skip = 0;
-	arg.len = 0;
+	struct rt6_proc_arg arg = {
+		.buffer = buffer,
+		.offset = offset,
+		.length = length,
+	};
 
-	read_lock_bh(&rt6_lock);
-	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
-	read_unlock_bh(&rt6_lock);
+	fib6_clean_all(rt6_info_route, 0, &arg);
 
 	*start = buffer;
 	if (offset)
@@ -2260,13 +2419,9 @@
 {
 	struct proc_dir_entry *p;
 
-	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
-						     sizeof(struct rt6_info),
-						     0, SLAB_HWCACHE_ALIGN,
-						     NULL, NULL);
-	if (!ip6_dst_ops.kmem_cachep)
-		panic("cannot create ip6_dst_cache");
-
+	ip6_dst_ops.kmem_cachep =
+		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 	fib6_init();
 #ifdef 	CONFIG_PROC_FS
 	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
@@ -2278,10 +2433,16 @@
 #ifdef CONFIG_XFRM
 	xfrm6_init();
 #endif
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_init();
+#endif
 }
 
 void ip6_route_cleanup(void)
 {
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	fib6_rules_cleanup();
+#endif
 #ifdef CONFIG_PROC_FS
 	proc_net_remove("ipv6_route");
 	proc_net_remove("rt6_stats");
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 802a1a6..2546fc9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -251,6 +251,8 @@
 		final_p = &final;
 	}
 
+	security_sk_classify_flow(sk, &fl);
+
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
 		goto failure;
@@ -270,7 +272,7 @@
 	inet->rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(sk, dst, NULL);
+	__ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
@@ -374,6 +376,7 @@
 			fl.oif = sk->sk_bound_dev_if;
 			fl.fl_ip_dport = inet->dport;
 			fl.fl_ip_sport = inet->sport;
+			security_skb_classify_flow(skb, &fl);
 
 			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 				sk->sk_err_soft = -err;
@@ -467,6 +470,7 @@
 	fl.oif = treq->iif;
 	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 	fl.fl_ip_sport = inet_sk(sk)->sport;
+	security_req_classify_flow(req, &fl);
 
 	if (dst == NULL) {
 		opt = np->opt;
@@ -541,7 +545,7 @@
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcphdr *th = skb->h.th;
 
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
 		skb->csum = offsetof(struct tcphdr, check);
 	} else {
@@ -566,7 +570,7 @@
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
 				     IPPROTO_TCP, 0);
 	skb->csum = offsetof(struct tcphdr, check);
-	skb->ip_summed = CHECKSUM_HW;
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
@@ -625,6 +629,7 @@
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	/* sk = NULL, but it is safe for now. RST socket required. */
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
@@ -691,6 +696,7 @@
 	fl.oif = inet6_iif(skb);
 	fl.fl_ip_dport = t1->dest;
 	fl.fl_ip_sport = t1->source;
+	security_skb_classify_flow(skb, &fl);
 
 	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
 		if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
@@ -820,6 +826,8 @@
 
 	tcp_rsk(req)->snt_isn = isn;
 
+	security_inet_conn_request(sk, skb, req);
+
 	if (tcp_v6_send_synack(sk, req, NULL))
 		goto drop;
 
@@ -923,6 +931,7 @@
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
 		fl.fl_ip_sport = inet_sk(sk)->sport;
+		security_req_classify_flow(req, &fl);
 
 		if (ip6_dst_lookup(sk, &dst, &fl))
 			goto out;
@@ -945,7 +954,7 @@
 	 */
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(newsk, dst, NULL);
+	__ip6_dst_store(newsk, dst, NULL, NULL);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
 	inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1024,7 +1033,7 @@
 
 static int tcp_v6_checksum_init(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_HW) {
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
 				  &skb->nh.ipv6h->daddr,skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1066,7 +1075,7 @@
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_do_rcv(sk, skb);
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard;
 
 	/*
@@ -1223,7 +1232,7 @@
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	if (sk_filter(sk, skb, 0))
+	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d54f24..9662561 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -61,81 +61,9 @@
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
 
-/* Grrr, addr_type already calculated by caller, but I don't want
- * to add some silly "cookie" argument to this method just for that.
- */
-static int udp_v6_get_port(struct sock *sk, unsigned short snum)
+static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	struct sock *sk2;
-	struct hlist_node *node;
-
-	write_lock_bh(&udp_hash_lock);
-	if (snum == 0) {
-		int best_size_so_far, best, result, i;
-
-		if (udp_port_rover > sysctl_local_port_range[1] ||
-		    udp_port_rover < sysctl_local_port_range[0])
-			udp_port_rover = sysctl_local_port_range[0];
-		best_size_so_far = 32767;
-		best = result = udp_port_rover;
-		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-			int size;
-			struct hlist_head *list;
-
-			list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-			if (hlist_empty(list)) {
-				if (result > sysctl_local_port_range[1])
-					result = sysctl_local_port_range[0] +
-						((result - sysctl_local_port_range[0]) &
-						 (UDP_HTABLE_SIZE - 1));
-				goto gotit;
-			}
-			size = 0;
-			sk_for_each(sk2, node, list)
-				if (++size >= best_size_so_far)
-					goto next;
-			best_size_so_far = size;
-			best = result;
-		next:;
-		}
-		result = best;
-		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
-			if (result > sysctl_local_port_range[1])
-				result = sysctl_local_port_range[0]
-					+ ((result - sysctl_local_port_range[0]) &
-					   (UDP_HTABLE_SIZE - 1));
-			if (!udp_lport_inuse(result))
-				break;
-		}
-		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-			goto fail;
-gotit:
-		udp_port_rover = snum = result;
-	} else {
-		sk_for_each(sk2, node,
-			    &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-			if (inet_sk(sk2)->num == snum &&
-			    sk2 != sk &&
-			    (!sk2->sk_bound_dev_if ||
-			     !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (!sk2->sk_reuse || !sk->sk_reuse) &&
-			    ipv6_rcv_saddr_equal(sk, sk2))
-				goto fail;
-		}
-	}
-
-	inet_sk(sk)->num = snum;
-	if (sk_unhashed(sk)) {
-		sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
-		sock_prot_inc_use(sk->sk_prot);
-	}
-	write_unlock_bh(&udp_hash_lock);
-	return 0;
-
-fail:
-	write_unlock_bh(&udp_hash_lock);
-	return 1;
+	return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
 static void udp_v6_hash(struct sock *sk)
@@ -345,6 +273,8 @@
 
 static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
+	int rc;
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
 		kfree_skb(skb);
 		return -1;
@@ -356,7 +286,10 @@
 		return 0;
 	}
 
-	if (sock_queue_rcv_skb(sk,skb)<0) {
+	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
+		/* Note that an ENOMEM error is charged twice */
+		if (rc == -ENOMEM)
+			UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS);
 		UDP6_INC_STATS_BH(UDP_MIB_INERRORS);
 		kfree_skb(skb);
 		return 0;
@@ -475,7 +408,7 @@
 		uh = skb->h.uh;
 	}
 
-	if (skb->ip_summed == CHECKSUM_HW &&
+	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
@@ -782,6 +715,8 @@
 		connected = 0;
 	}
 
+	security_sk_classify_flow(sk, fl);
+
 	err = ip6_sk_dst_lookup(sk, &dst, fl);
 	if (err)
 		goto out;
@@ -840,7 +775,12 @@
 		if (connected) {
 			ip6_dst_store(sk, dst,
 				      ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
-				      &np->daddr : NULL);
+				      &np->daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+				      ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
+				      &np->saddr :
+#endif
+				      NULL);
 		} else {
 			dst_release(dst);
 		}
@@ -855,6 +795,16 @@
 		UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS);
 		return len;
 	}
+	/*
+	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
+	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
+	 * we don't have a good statistic (IpOutDiscards but it can be too many
+	 * things).  We could add another new stat but at least for now that
+	 * seems like overkill.
+	 */
+	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS);
+	}
 	return err;
 
 do_confirm:
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0405d74..a40a057 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -72,7 +72,7 @@
 		if (x->mode->input(x, skb))
 			goto drop;
 
-		if (x->props.mode) { /* XXX */
+		if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
 			decaps = 1;
 			break;
 		}
@@ -138,3 +138,111 @@
 {
 	return xfrm6_rcv_spi(*pskb, 0);
 }
+
+int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
+		     xfrm_address_t *saddr, u8 proto)
+{
+ 	struct xfrm_state *x = NULL;
+ 	int wildcard = 0;
+	struct in6_addr any;
+	xfrm_address_t *xany;
+	struct xfrm_state *xfrm_vec_one = NULL;
+ 	int nh = 0;
+	int i = 0;
+
+	ipv6_addr_set(&any, 0, 0, 0, 0);
+	xany = (xfrm_address_t *)&any;
+
+	for (i = 0; i < 3; i++) {
+		xfrm_address_t *dst, *src;
+		switch (i) {
+		case 0:
+			dst = daddr;
+			src = saddr;
+			break;
+		case 1:
+			/* lookup state with wild-card source address */
+			wildcard = 1;
+			dst = daddr;
+			src = xany;
+			break;
+		case 2:
+		default:
+ 			/* lookup state with wild-card addresses */
+			wildcard = 1; /* XXX */
+			dst = xany;
+			src = xany;
+			break;
+ 		}
+
+		x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6);
+		if (!x)
+			continue;
+
+		spin_lock(&x->lock);
+
+		if (wildcard) {
+			if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) {
+				spin_unlock(&x->lock);
+				xfrm_state_put(x);
+				x = NULL;
+				continue;
+			}
+		}
+
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			spin_unlock(&x->lock);
+			xfrm_state_put(x);
+ 			x = NULL;
+ 			continue;
+		}
+		if (xfrm_state_check_expire(x)) {
+			spin_unlock(&x->lock);
+			xfrm_state_put(x);
+			x = NULL;
+			continue;
+		}
+
+		nh = x->type->input(x, skb);
+		if (nh <= 0) {
+			spin_unlock(&x->lock);
+			xfrm_state_put(x);
+			x = NULL;
+			continue;
+		}
+
+		x->curlft.bytes += skb->len;
+		x->curlft.packets++;
+
+		spin_unlock(&x->lock);
+
+		xfrm_vec_one = x;
+		break;
+	}
+
+	if (!xfrm_vec_one)
+		goto drop;
+
+	/* Allocate new secpath or COW existing one. */
+	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
+		struct sec_path *sp;
+		sp = secpath_dup(skb->sp);
+		if (!sp)
+			goto drop;
+		if (skb->sp)
+			secpath_put(skb->sp);
+		skb->sp = sp;
+	}
+
+	if (1 + skb->sp->len > XFRM_MAX_DEPTH)
+		goto drop;
+
+	skb->sp->xvec[skb->sp->len] = xfrm_vec_one;
+	skb->sp->len ++;
+
+	return 1;
+drop:
+	if (xfrm_vec_one)
+		xfrm_state_put(xfrm_vec_one);
+	return -1;
+}
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
new file mode 100644
index 0000000..6031c16
--- /dev/null
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -0,0 +1,93 @@
+/*
+ * xfrm6_mode_ro.c - Route optimization mode for IPv6.
+ *
+ * Copyright (C)2003-2006 Helsinki University of Technology
+ * Copyright (C)2003-2006 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * Authors:
+ *	Noriaki TAKAMIYA @USAGI
+ *	Masahide NAKAMURA @USAGI
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph;
+	u8 *prevhdr;
+	int hdr_len;
+
+	skb_push(skb, x->props.header_len);
+	iph = skb->nh.ipv6h;
+
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	skb->nh.raw = prevhdr - x->props.header_len;
+	skb->h.raw = skb->data + hdr_len;
+	memmove(skb->data, iph, hdr_len);
+	return 0;
+}
+
+/*
+ * Do nothing about routing optimization header unlike IPsec.
+ */
+static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static struct xfrm_mode xfrm6_ro_mode = {
+	.input = xfrm6_ro_input,
+	.output = xfrm6_ro_output,
+	.owner = THIS_MODULE,
+	.encap = XFRM_MODE_ROUTEOPTIMIZATION,
+};
+
+static int __init xfrm6_ro_init(void)
+{
+	return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
+}
+
+static void __exit xfrm6_ro_exit(void)
+{
+	int err;
+
+	err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
+	BUG_ON(err);
+}
+
+module_init(xfrm6_ro_init);
+module_exit(xfrm6_ro_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 711d713..3a4b39b 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -25,9 +25,8 @@
  * its absence, that of the top IP header.  The value of skb->data will always
  * point to the top IP header.
  */
-static int xfrm6_transport_output(struct sk_buff *skb)
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct xfrm_state *x = skb->dst->xfrm;
 	struct ipv6hdr *iph;
 	u8 *prevhdr;
 	int hdr_len;
@@ -35,7 +34,7 @@
 	skb_push(skb, x->props.header_len);
 	iph = skb->nh.ipv6h;
 
-	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
 	skb->h.raw = skb->data + hdr_len;
 	memmove(skb->data, iph, hdr_len);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 8af79be..5e7d8a7 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -37,10 +37,9 @@
  * its absence, that of the top IP header.  The value of skb->data will always
  * point to the top IP header.
  */
-static int xfrm6_tunnel_output(struct sk_buff *skb)
+static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct xfrm_state *x = dst->xfrm;
 	struct ipv6hdr *iph, *top_iph;
 	int dsfield;
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c8c8b44..c260ea1 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,12 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
+int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
+			  u8 **prevhdr)
+{
+	return ip6_find_1stfragopt(skb, prevhdr);
+}
+
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
@@ -41,13 +47,13 @@
 	struct xfrm_state *x = dst->xfrm;
 	int err;
 	
-	if (skb->ip_summed == CHECKSUM_HW) {
-		err = skb_checksum_help(skb, 0);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
 		if (err)
 			goto error_nolock;
 	}
 
-	if (x->props.mode) {
+	if (x->props.mode == XFRM_MODE_TUNNEL) {
 		err = xfrm6_tunnel_check_size(skb);
 		if (err)
 			goto error_nolock;
@@ -59,7 +65,7 @@
 		if (err)
 			goto error;
 
-		err = x->mode->output(skb);
+		err = x->mode->output(x, skb);
 		if (err)
 			goto error;
 
@@ -69,6 +75,8 @@
 
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
+		if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
+			x->lastused = (u64)xtime.tv_sec;
 
 		spin_unlock_bh(&x->lock);
 
@@ -80,7 +88,7 @@
 		}
 		dst = skb->dst;
 		x = dst->xfrm;
-	} while (x && !x->props.mode);
+	} while (x && (x->props.mode != XFRM_MODE_TUNNEL));
 
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
 	err = 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 73cd250..6a252e2 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -18,6 +18,9 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#ifdef CONFIG_IPV6_MIP6
+#include <net/mip6.h>
+#endif
 
 static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
@@ -31,6 +34,26 @@
 	return err;
 }
 
+static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr)
+{
+	struct rt6_info *rt;
+	struct flowi fl_tunnel = {
+		.nl_u = {
+			.ip6_u = {
+				.daddr = *(struct in6_addr *)&daddr->a6,
+			},
+		},
+	};
+
+	if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) {
+		ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6,
+			       (struct in6_addr *)&saddr->a6);
+		dst_release(&rt->u.dst);
+		return 0;
+	}
+	return -EHOSTUNREACH;
+}
+
 static struct dst_entry *
 __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 {
@@ -50,7 +73,9 @@
 				 xdst->u.rt6.rt6i_src.plen);
 		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
 		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET6)) {
+		    xfrm_bundle_ok(xdst, fl, AF_INET6,
+				   (xdst->u.rt6.rt6i_dst.plen != 128 ||
+				    xdst->u.rt6.rt6i_src.plen != 128))) {
 			dst_clone(dst);
 			break;
 		}
@@ -59,6 +84,40 @@
 	return dst;
 }
 
+static inline struct in6_addr*
+__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr)
+{
+	return (x->type->remote_addr) ?
+		(struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) :
+		(struct in6_addr*)&x->id.daddr;
+}
+
+static inline struct in6_addr*
+__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr)
+{
+	return (x->type->local_addr) ?
+		(struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) :
+		(struct in6_addr*)&x->props.saddr;
+}
+
+static inline void
+__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x)
+{
+	if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
+		*nflen += x->props.header_len;
+	else
+		*len += x->props.header_len;
+}
+
+static inline void
+__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x)
+{
+	if (x->type->flags & XFRM_TYPE_NON_FRAGMENT)
+		*nflen -= x->props.header_len;
+	else
+		*len -= x->props.header_len;
+}
+
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -83,6 +142,7 @@
 	int i;
 	int err = 0;
 	int header_len = 0;
+	int nfheader_len = 0;
 	int trailer_len = 0;
 
 	dst = dst_prev = NULL;
@@ -109,17 +169,18 @@
 
 		xdst = (struct xfrm_dst *)dst1;
 		xdst->route = &rt->u.dst;
+		xdst->genid = xfrm[i]->genid;
 		if (rt->rt6i_node)
 			xdst->route_cookie = rt->rt6i_node->fn_sernum;
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode) {
-			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
-			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
+		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+			remote = __xfrm6_bundle_addr_remote(xfrm[i], remote);
+			local  = __xfrm6_bundle_addr_local(xfrm[i], local);
 			tunnel = 1;
 		}
-		header_len += xfrm[i]->props.header_len;
+		__xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]);
 		trailer_len += xfrm[i]->props.trailer_len;
 
 		if (tunnel) {
@@ -154,6 +215,7 @@
 		dst_prev->flags	       |= DST_HOST;
 		dst_prev->lastuse	= jiffies;
 		dst_prev->header_len	= header_len;
+		dst_prev->nfheader_len	= nfheader_len;
 		dst_prev->trailer_len	= trailer_len;
 		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));
 
@@ -172,7 +234,7 @@
 		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
 		x->u.rt6.rt6i_idev     = rt0->rt6i_idev;
 		in6_dev_hold(rt0->rt6i_idev);
-		header_len -= x->u.dst.xfrm->props.header_len;
+		__xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm);
 		trailer_len -= x->u.dst.xfrm->props.trailer_len;
 	}
 
@@ -232,6 +294,18 @@
 			fl->proto = nexthdr;
 			return;
 
+#ifdef CONFIG_IPV6_MIP6
+		case IPPROTO_MH:
+			if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+				struct ip6_mh *mh;
+				mh = (struct ip6_mh *)exthdr;
+
+				fl->fl_mh_type = mh->ip6mh_type;
+			}
+			fl->proto = nexthdr;
+			return;
+#endif
+
 		/* XXX Why are there these headers? */
 		case IPPROTO_AH:
 		case IPPROTO_ESP:
@@ -308,6 +382,7 @@
 	.family =		AF_INET6,
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
+	.get_saddr = 		xfrm6_get_saddr,
 	.find_bundle =		__xfrm6_find_bundle,
 	.bundle_create =	__xfrm6_bundle_create,
 	.decode_session =	_decode_session6,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b33296b..711bfaf 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -42,102 +42,135 @@
 	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
 		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
-	if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) {
-		struct rt6_info *rt;
-		struct flowi fl_tunnel = {
-			.nl_u = {
-				.ip6_u = {
-					.daddr = *(struct in6_addr *)daddr,
-				}
-			}
-		};
-		if (!xfrm_dst_lookup((struct xfrm_dst **)&rt,
-		                     &fl_tunnel, AF_INET6)) {
-			ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr,
-			               (struct in6_addr *)&x->props.saddr);
-			dst_release(&rt->u.dst);
-		}
-	}
 	x->props.mode = tmpl->mode;
 	x->props.reqid = tmpl->reqid;
 	x->props.family = AF_INET6;
 }
 
-static struct xfrm_state *
-__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
+static int
+__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n)
 {
-	unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
-	struct xfrm_state *x;
+	int i;
+	int j = 0;
 
-	list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
-		if (x->props.family == AF_INET6 &&
-		    spi == x->id.spi &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    proto == x->id.proto) {
-			xfrm_state_hold(x);
-			return x;
+	/* Rule 1: select IPsec transport except AH */
+	for (i = 0; i < n; i++) {
+		if (src[i]->props.mode == XFRM_MODE_TRANSPORT &&
+		    src[i]->id.proto != IPPROTO_AH) {
+			dst[j++] = src[i];
+			src[i] = NULL;
 		}
 	}
-	return NULL;
+	if (j == n)
+		goto end;
+
+	/* Rule 2: select MIPv6 RO or inbound trigger */
+#ifdef CONFIG_IPV6_MIP6
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+		     src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+#endif
+
+	/* Rule 3: select IPsec transport AH */
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    src[i]->props.mode == XFRM_MODE_TRANSPORT &&
+		    src[i]->id.proto == IPPROTO_AH) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
+
+	/* Rule 4: select IPsec tunnel */
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    src[i]->props.mode == XFRM_MODE_TUNNEL) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (likely(j == n))
+		goto end;
+
+	/* Final rule */
+	for (i = 0; i < n; i++) {
+		if (src[i]) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+
+ end:
+	return 0;
 }
 
-static struct xfrm_state *
-__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 
-		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
-		 int create)
+static int
+__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n)
 {
-	struct xfrm_state *x, *x0;
-	unsigned h = __xfrm6_dst_hash(daddr);
+	int i;
+	int j = 0;
 
-	x0 = NULL;
+	/* Rule 1: select IPsec transport */
+	for (i = 0; i < n; i++) {
+		if (src[i]->mode == XFRM_MODE_TRANSPORT) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+	if (j == n)
+		goto end;
 
-	list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
-		if (x->props.family == AF_INET6 &&
-		    ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
-		    mode == x->props.mode &&
-		    proto == x->id.proto &&
-		    ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
-		    reqid == x->props.reqid &&
-		    x->km.state == XFRM_STATE_ACQ &&
-		    !x->id.spi) {
-			    x0 = x;
-			    break;
-		    }
+	/* Rule 2: select MIPv6 RO or inbound trigger */
+#ifdef CONFIG_IPV6_MIP6
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+		     src[i]->mode == XFRM_MODE_IN_TRIGGER)) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
 	}
-	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
-		ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6,
-			       (struct in6_addr *)daddr);
-		ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6,
-			       (struct in6_addr *)saddr);
-		x0->sel.prefixlen_d = 128;
-		x0->sel.prefixlen_s = 128;
-		ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6,
-			       (struct in6_addr *)saddr);
-		x0->km.state = XFRM_STATE_ACQ;
-		ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6,
-			       (struct in6_addr *)daddr);
-		x0->id.proto = proto;
-		x0->props.family = AF_INET6;
-		x0->props.mode = mode;
-		x0->props.reqid = reqid;
-		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-		xfrm_state_hold(x0);
-		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
-		add_timer(&x0->timer);
-		xfrm_state_hold(x0);
-		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
-		wake_up(&km_waitq);
+	if (j == n)
+		goto end;
+#endif
+
+	/* Rule 3: select IPsec tunnel */
+	for (i = 0; i < n; i++) {
+		if (src[i] &&
+		    src[i]->mode == XFRM_MODE_TUNNEL) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
 	}
-	if (x0)
-		xfrm_state_hold(x0);
-	return x0;
+	if (likely(j == n))
+		goto end;
+
+	/* Final rule */
+	for (i = 0; i < n; i++) {
+		if (src[i]) {
+			dst[j++] = src[i];
+			src[i] = NULL;
+		}
+	}
+
+ end:
+	return 0;
 }
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.init_tempsel		= __xfrm6_init_tempsel,
-	.state_lookup		= __xfrm6_state_lookup,
-	.find_acq		= __xfrm6_find_acq,
+	.tmpl_sort		= __xfrm6_tmpl_sort,
+	.state_sort		= __xfrm6_state_sort,
 };
 
 void __init xfrm6_state_init(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index c8f9369..59685ee 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -307,7 +307,7 @@
 
 static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 {
-	if (!x->props.mode)
+	if (x->props.mode != XFRM_MODE_TUNNEL)
 		return -EINVAL;
 
 	if (x->encap)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3a95b2e..83b443d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1731,7 +1731,8 @@
 		++reqid;
 		if (reqid == 0)
 			reqid = IPSEC_MANUAL_REQID_MAX+1;
-		if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST)
+		if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid,
+				     (void*)&reqid) != -EEXIST)
 			return reqid;
 	} while (reqid != start);
 	return 0;
@@ -1765,7 +1766,7 @@
 	}
 
 	/* addresses present only in tunnel mode */
-	if (t->mode) {
+	if (t->mode == XFRM_MODE_TUNNEL) {
 		switch (xp->family) {
 		case AF_INET:
 			sin = (void*)(rq+1);
@@ -1997,7 +1998,7 @@
 		int req_size;
 
 		req_size = sizeof(struct sadb_x_ipsecrequest);
-		if (t->mode)
+		if (t->mode == XFRM_MODE_TUNNEL)
 			req_size += 2*socklen;
 		else
 			size -= 2*socklen;
@@ -2013,7 +2014,7 @@
 		if (t->optional)
 			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
 		rq->sadb_x_ipsecrequest_reqid = t->reqid;
-		if (t->mode) {
+		if (t->mode == XFRM_MODE_TUNNEL) {
 			switch (xp->family) {
 			case AF_INET:
 				sin = (void*)(rq+1);
@@ -2268,7 +2269,8 @@
 			return err;
 	}
 
-	xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
+	xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1,
+				   &sel, tmp.security, 1);
 	security_xfrm_policy_free(&tmp);
 	if (xp == NULL)
 		return -ENOENT;
@@ -2330,7 +2332,7 @@
 	if (dir >= XFRM_POLICY_MAX)
 		return -EINVAL;
 
-	xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id,
+	xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id,
 			      hdr->sadb_msg_type == SADB_X_SPDDELETE2);
 	if (xp == NULL)
 		return -ENOENT;
@@ -2378,7 +2380,7 @@
 {
 	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
 
-	return xfrm_policy_walk(dump_sp, &data);
+	return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data);
 }
 
 static int key_notify_policy_flush(struct km_event *c)
@@ -2405,7 +2407,8 @@
 {
 	struct km_event c;
 
-	xfrm_policy_flush();
+	xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN);
+	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
 	c.seq = hdr->sadb_msg_seq;
@@ -2667,6 +2670,9 @@
 
 static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
 {
+	if (xp && xp->type != XFRM_POLICY_TYPE_MAIN)
+		return 0;
+
 	switch (c->event) {
 	case XFRM_MSG_POLEXPIRE:
 		return key_notify_policy_expire(xp, c);
@@ -2675,6 +2681,8 @@
 	case XFRM_MSG_UPDPOLICY:
 		return key_notify_policy(xp, dir, c);
 	case XFRM_MSG_FLUSHPOLICY:
+		if (c->data.type != XFRM_POLICY_TYPE_MAIN)
+			break;
 		return key_notify_policy_flush(c);
 	default:
 		printk("pfkey: Unknown policy event %d\n", c->event);
@@ -2708,6 +2716,9 @@
 #endif
 	int sockaddr_size;
 	int size;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
+	int ctx_size = 0;
 	
 	sockaddr_size = pfkey_sockaddr_size(x->props.family);
 	if (!sockaddr_size)
@@ -2723,6 +2734,11 @@
 	else if (x->id.proto == IPPROTO_ESP)
 		size += count_esp_combs(t);
 
+	if ((xfrm_ctx = x->security)) {
+		ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+		size +=  sizeof(struct sadb_x_sec_ctx) + ctx_size;
+	}
+
 	skb =  alloc_skb(size + 16, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
@@ -2818,17 +2834,31 @@
 	else if (x->id.proto == IPPROTO_ESP)
 		dump_esp_combs(skb, t);
 
+	/* security context */
+	if (xfrm_ctx) {
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx->sadb_x_sec_len =
+		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
 }
 
-static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
+static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
                                                 u8 *data, int len, int *dir)
 {
 	struct xfrm_policy *xp;
 	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
 	struct sadb_x_sec_ctx *sec_ctx;
 
-	switch (family) {
+	switch (sk->sk_family) {
 	case AF_INET:
 		if (opt != IP_IPSEC_POLICY) {
 			*dir = -EOPNOTSUPP;
@@ -2869,7 +2899,7 @@
 	xp->lft.hard_byte_limit = XFRM_INF;
 	xp->lft.soft_packet_limit = XFRM_INF;
 	xp->lft.hard_packet_limit = XFRM_INF;
-	xp->family = family;
+	xp->family = sk->sk_family;
 
 	xp->xfrm_nr = 0;
 	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
@@ -2885,8 +2915,10 @@
 		p += pol->sadb_x_policy_len*8;
 		sec_ctx = (struct sadb_x_sec_ctx *)p;
 		if (len < pol->sadb_x_policy_len*8 +
-		    sec_ctx->sadb_x_sec_len)
+		    sec_ctx->sadb_x_sec_len) {
+			*dir = -EINVAL;
 			goto out;
+		}
 		if ((*dir = verify_sec_ctx_len(p)))
 			goto out;
 		uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
@@ -2896,6 +2928,11 @@
 		if (*dir)
 			goto out;
 	}
+	else {
+		*dir = security_xfrm_sock_policy_alloc(xp, sk);
+		if (*dir)
+			goto out;
+	}
 
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a9894ddf..0a28d2c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -148,6 +148,18 @@
 	  <file:Documentation/modules.txt>.  The module will be called
 	  ipt_CONNMARK.o.  If unsure, say `N'.
 
+config NETFILTER_XT_TARGET_DSCP
+	tristate '"DSCP" target support'
+	depends on NETFILTER_XTABLES
+	depends on IP_NF_MANGLE || IP6_NF_MANGLE
+	help
+	  This option adds a `DSCP' target, which allows you to manipulate
+	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
 	depends on NETFILTER_XTABLES
@@ -263,6 +275,17 @@
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_DSCP
+	tristate '"DSCP" match support'
+	depends on NETFILTER_XTABLES
+	help
+	  This option adds a `DSCP' match, which allows you to match against
+	  the IPv4/IPv6 header DSCP field (differentiated services codepoint).
+
+	  The DSCP field can have any value between 0x0 and 0x3f inclusive.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_ESP
 	tristate '"ESP" match support'
 	depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6fa4b75..a74be49 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -25,6 +25,7 @@
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -37,6 +38,7 @@
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5d29d5e..d80b935 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -182,7 +182,7 @@
 		ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-		if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
+		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 	}
@@ -222,6 +222,28 @@
 }
 EXPORT_SYMBOL(skb_make_writable);
 
+u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum)
+{
+	u_int32_t diff[] = { oldval, newval };
+
+	return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum));
+}
+EXPORT_SYMBOL(nf_csum_update);
+
+u_int16_t nf_proto_csum_update(struct sk_buff *skb,
+			       u_int32_t oldval, u_int32_t newval,
+			       u_int16_t csum, int pseudohdr)
+{
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		csum = nf_csum_update(oldval, newval, csum);
+		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+			skb->csum = nf_csum_update(oldval, newval, skb->csum);
+	} else if (pseudohdr)
+		csum = ~nf_csum_update(oldval, newval, ~csum);
+
+	return csum;
+}
+EXPORT_SYMBOL(nf_proto_csum_update);
 
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8f22619..093b3dd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -57,7 +57,6 @@
 #include <net/netfilter/nf_conntrack_protocol.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
@@ -74,17 +73,17 @@
 
 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
 LIST_HEAD(nf_conntrack_expect_list);
-struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
-struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
+struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly;
 static LIST_HEAD(helpers);
-unsigned int nf_conntrack_htable_size = 0;
-int nf_conntrack_max;
-struct list_head *nf_conntrack_hash;
-static kmem_cache_t *nf_conntrack_expect_cachep;
+unsigned int nf_conntrack_htable_size __read_mostly = 0;
+int nf_conntrack_max __read_mostly;
+struct list_head *nf_conntrack_hash __read_mostly;
+static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
 struct nf_conn nf_conntrack_untracked;
-unsigned int nf_ct_log_invalid;
+unsigned int nf_ct_log_invalid __read_mostly;
 static LIST_HEAD(unconfirmed);
-static int nf_conntrack_vmalloc;
+static int nf_conntrack_vmalloc __read_mostly;
 
 static unsigned int nf_conntrack_next_id;
 static unsigned int nf_conntrack_expect_next_id;
@@ -539,15 +538,10 @@
 static void
 clean_from_lists(struct nf_conn *ct)
 {
-	unsigned int ho, hr;
-	
 	DEBUGP("clean_from_lists(%p)\n", ct);
 	ASSERT_WRITE_LOCK(&nf_conntrack_lock);
-
-	ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-	LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-	LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
 
 	/* Destroy all pending expectations */
 	nf_ct_remove_expectations(ct);
@@ -617,16 +611,6 @@
 	nf_ct_put(ct);
 }
 
-static inline int
-conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
-		    const struct nf_conntrack_tuple *tuple,
-		    const struct nf_conn *ignored_conntrack)
-{
-	ASSERT_READ_LOCK(&nf_conntrack_lock);
-	return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
-		&& nf_ct_tuple_equal(tuple, &i->tuple);
-}
-
 struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack)
@@ -636,7 +620,8 @@
 
 	ASSERT_READ_LOCK(&nf_conntrack_lock);
 	list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
-		if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
+		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
+		    nf_ct_tuple_equal(tuple, &h->tuple)) {
 			NF_CT_STAT_INC(found);
 			return h;
 		}
@@ -667,10 +652,10 @@
 				       unsigned int repl_hash) 
 {
 	ct->id = ++nf_conntrack_next_id;
-	list_prepend(&nf_conntrack_hash[hash],
-		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_prepend(&nf_conntrack_hash[repl_hash],
-		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
+		 &nf_conntrack_hash[hash]);
+	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
+		 &nf_conntrack_hash[repl_hash]);
 }
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
@@ -690,7 +675,9 @@
 __nf_conntrack_confirm(struct sk_buff **pskb)
 {
 	unsigned int hash, repl_hash;
+	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
+	struct nf_conn_help *help;
 	enum ip_conntrack_info ctinfo;
 
 	ct = nf_ct_get(*pskb, &ctinfo);
@@ -720,41 +707,41 @@
 	/* See if there's one in the list already, including reverse:
 	   NAT could have grabbed it without realizing, since we're
 	   not in the hash.  If there is, we lost race. */
-	if (!LIST_FIND(&nf_conntrack_hash[hash],
-		       conntrack_tuple_cmp,
-		       struct nf_conntrack_tuple_hash *,
-		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-	    && !LIST_FIND(&nf_conntrack_hash[repl_hash],
-			  conntrack_tuple_cmp,
-			  struct nf_conntrack_tuple_hash *,
-			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
-		struct nf_conn_help *help;
-		/* Remove from unconfirmed list */
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_for_each_entry(h, &nf_conntrack_hash[hash], list)
+		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				      &h->tuple))
+			goto out;
+	list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list)
+		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+				      &h->tuple))
+			goto out;
 
-		__nf_conntrack_hash_insert(ct, hash, repl_hash);
-		/* Timer relative to confirmation time, not original
-		   setting time, otherwise we'd get timer wrap in
-		   weird delay cases. */
-		ct->timeout.expires += jiffies;
-		add_timer(&ct->timeout);
-		atomic_inc(&ct->ct_general.use);
-		set_bit(IPS_CONFIRMED_BIT, &ct->status);
-		NF_CT_STAT_INC(insert);
-		write_unlock_bh(&nf_conntrack_lock);
-		help = nfct_help(ct);
-		if (help && help->helper)
-			nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+	/* Remove from unconfirmed list */
+	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+
+	__nf_conntrack_hash_insert(ct, hash, repl_hash);
+	/* Timer relative to confirmation time, not original
+	   setting time, otherwise we'd get timer wrap in
+	   weird delay cases. */
+	ct->timeout.expires += jiffies;
+	add_timer(&ct->timeout);
+	atomic_inc(&ct->ct_general.use);
+	set_bit(IPS_CONFIRMED_BIT, &ct->status);
+	NF_CT_STAT_INC(insert);
+	write_unlock_bh(&nf_conntrack_lock);
+	help = nfct_help(ct);
+	if (help && help->helper)
+		nf_conntrack_event_cache(IPCT_HELPER, *pskb);
 #ifdef CONFIG_NF_NAT_NEEDED
-		if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-		    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+		nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
 #endif
-		nf_conntrack_event_cache(master_ct(ct) ?
-					 IPCT_RELATED : IPCT_NEW, *pskb);
-		return NF_ACCEPT;
-	}
+	nf_conntrack_event_cache(master_ct(ct) ?
+				 IPCT_RELATED : IPCT_NEW, *pskb);
+	return NF_ACCEPT;
 
+out:
 	NF_CT_STAT_INC(insert_failed);
 	write_unlock_bh(&nf_conntrack_lock);
 	return NF_DROP;
@@ -777,24 +764,21 @@
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
-{
-	return !(test_bit(IPS_ASSURED_BIT,
-			  &nf_ct_tuplehash_to_ctrack(i)->status));
-}
-
 static int early_drop(struct list_head *chain)
 {
 	/* Traverse backwards: gives us oldest, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
-	struct nf_conn *ct = NULL;
+	struct nf_conn *ct = NULL, *tmp;
 	int dropped = 0;
 
 	read_lock_bh(&nf_conntrack_lock);
-	h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
-	if (h) {
-		ct = nf_ct_tuplehash_to_ctrack(h);
-		atomic_inc(&ct->ct_general.use);
+	list_for_each_entry_reverse(h, chain, list) {
+		tmp = nf_ct_tuplehash_to_ctrack(h);
+		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
+			ct = tmp;
+			atomic_inc(&ct->ct_general.use);
+			break;
+		}
 	}
 	read_unlock_bh(&nf_conntrack_lock);
 
@@ -810,18 +794,16 @@
 	return dropped;
 }
 
-static inline int helper_cmp(const struct nf_conntrack_helper *i,
-			     const struct nf_conntrack_tuple *rtuple)
-{
-	return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
 static struct nf_conntrack_helper *
 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
-	return LIST_FIND(&helpers, helper_cmp,
-			 struct nf_conntrack_helper *,
-			 tuple);
+	struct nf_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
+			return h;
+	}
+	return NULL;
 }
 
 struct nf_conntrack_helper *
@@ -866,11 +848,15 @@
 		nf_conntrack_hash_rnd_initted = 1;
 	}
 
+	/* We don't want any race condition at early drop stage */
+	atomic_inc(&nf_conntrack_count);
+
 	if (nf_conntrack_max
-	    && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
+	    && atomic_read(&nf_conntrack_count) > nf_conntrack_max) {
 		unsigned int hash = hash_conntrack(orig);
 		/* Try dropping from this hash chain. */
 		if (!early_drop(&nf_conntrack_hash[hash])) {
+			atomic_dec(&nf_conntrack_count);
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "nf_conntrack: table full, dropping"
@@ -921,10 +907,12 @@
 	init_timer(&conntrack->timeout);
 	conntrack->timeout.data = (unsigned long)conntrack;
 	conntrack->timeout.function = death_by_timeout;
+	read_unlock_bh(&nf_ct_cache_lock);
 
-	atomic_inc(&nf_conntrack_count);
+	return conntrack;
 out:
 	read_unlock_bh(&nf_ct_cache_lock);
+	atomic_dec(&nf_conntrack_count);
 	return conntrack;
 }
 
@@ -1323,7 +1311,7 @@
 		return ret;
 	}
 	write_lock_bh(&nf_conntrack_lock);
-	list_prepend(&helpers, me);
+	list_add(&me->list, &helpers);
 	write_unlock_bh(&nf_conntrack_lock);
 
 	return 0;
@@ -1342,8 +1330,8 @@
 	return NULL;
 }
 
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
-			 const struct nf_conntrack_helper *me)
+static inline void unhelp(struct nf_conntrack_tuple_hash *i,
+			  const struct nf_conntrack_helper *me)
 {
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
 	struct nf_conn_help *help = nfct_help(ct);
@@ -1352,17 +1340,17 @@
 		nf_conntrack_event(IPCT_HELPER, ct);
 		help->helper = NULL;
 	}
-	return 0;
 }
 
 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 {
 	unsigned int i;
+	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp, *tmp;
 
 	/* Need write lock here, to delete helper. */
 	write_lock_bh(&nf_conntrack_lock);
-	LIST_DELETE(&helpers, me);
+	list_del(&me->list);
 
 	/* Get rid of expectations */
 	list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
@@ -1374,10 +1362,12 @@
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++)
-		LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
-			    struct nf_conntrack_tuple_hash *, me);
+	list_for_each_entry(h, &unconfirmed, list)
+		unhelp(h, me);
+	for (i = 0; i < nf_conntrack_htable_size; i++) {
+		list_for_each_entry(h, &nf_conntrack_hash[i], list)
+			unhelp(h, me);
+	}
 	write_unlock_bh(&nf_conntrack_lock);
 
 	/* Someone could be still looking at the helper in a bh. */
@@ -1510,37 +1500,40 @@
 }
 
 /* Bring out ya dead! */
-static struct nf_conntrack_tuple_hash *
+static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
 		void *data, unsigned int *bucket)
 {
-	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 
 	write_lock_bh(&nf_conntrack_lock);
 	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
-		h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
-				struct nf_conntrack_tuple_hash *, iter, data);
-		if (h)
-			break;
+		list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) {
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (iter(ct, data))
+				goto found;
+		}
  	}
-	if (!h)
-		h = LIST_FIND_W(&unconfirmed, do_iter,
-				struct nf_conntrack_tuple_hash *, iter, data);
-	if (h)
-		atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
+	list_for_each_entry(h, &unconfirmed, list) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (iter(ct, data))
+			goto found;
+	}
+	return NULL;
+found:
+	atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
 	write_unlock_bh(&nf_conntrack_lock);
-
-	return h;
+	return ct;
 }
 
 void
 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
 {
-	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 	unsigned int bucket = 0;
 
-	while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
-		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
 			death_by_timeout((unsigned long)ct);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 960972d..0c17a5b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -21,6 +21,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/ctype.h>
+#include <linux/inet.h>
 #include <net/checksum.h>
 #include <net/tcp.h>
 
@@ -111,101 +112,14 @@
 	},
 };
 
-/* This code is based on inet_pton() in glibc-2.2.4 */
 static int
 get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
 {
-	static const char xdigits[] = "0123456789abcdef";
-	u_int8_t tmp[16], *tp, *endp, *colonp;
-	int ch, saw_xdigit;
-	u_int32_t val;
-	size_t clen = 0;
-
-	tp = memset(tmp, '\0', sizeof(tmp));
-	endp = tp + sizeof(tmp);
-	colonp = NULL;
-
-	/* Leading :: requires some special handling. */
-	if (*src == ':'){
-		if (*++src != ':') {
-			DEBUGP("invalid \":\" at the head of addr\n");
-			return 0;
-		}
-		clen++;
-	}
-
-	saw_xdigit = 0;
-	val = 0;
-	while ((clen < dlen) && (*src != term)) {
-		const char *pch;
-
-		ch = tolower(*src++);
-		clen++;
-
-                pch = strchr(xdigits, ch);
-                if (pch != NULL) {
-                        val <<= 4;
-                        val |= (pch - xdigits);
-                        if (val > 0xffff)
-                                return 0;
-
-			saw_xdigit = 1;
-                        continue;
-                }
-		if (ch != ':') {
-			DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch);
-			return 0;
-		}
-
-		if (!saw_xdigit) {
-			if (colonp) {
-				DEBUGP("invalid location of \"::\".\n");
-				return 0;
-			}
-			colonp = tp;
-			continue;
-		} else if (*src == term) {
-			DEBUGP("trancated IPv6 addr\n");
-			return 0;
-		}
-
-		if (tp + 2 > endp)
-			return 0;
-		*tp++ = (u_int8_t) (val >> 8) & 0xff;
-		*tp++ = (u_int8_t) val & 0xff;
-
-		saw_xdigit = 0;
-		val = 0;
-		continue;
-        }
-        if (saw_xdigit) {
-                if (tp + 2 > endp)
-                        return 0;
-                *tp++ = (u_int8_t) (val >> 8) & 0xff;
-                *tp++ = (u_int8_t) val & 0xff;
-        }
-        if (colonp != NULL) {
-                /*
-                 * Since some memmove()'s erroneously fail to handle
-                 * overlapping regions, we'll do the shift by hand.
-                 */
-                const int n = tp - colonp;
-                int i;
-
-                if (tp == endp)
-                        return 0;
-
-                for (i = 1; i <= n; i++) {
-                        endp[- i] = colonp[n - i];
-                        colonp[n - i] = 0;
-                }
-                tp = endp;
-        }
-        if (tp != endp || (*src != term))
-                return 0;
-
-        memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr));
-        return clen;
+	const char *end;
+	int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end);
+	if (ret > 0)
+		return (int)(end - src);
+	return 0;
 }
 
 static int try_number(const char *data, size_t dlen, u_int32_t array[],
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6527d4e..1721f7c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -339,11 +339,7 @@
 		/* dump everything */
 		events = ~0UL;
 		group = NFNLGRP_CONNTRACK_NEW;
-	} else  if (events & (IPCT_STATUS |
-		      IPCT_PROTOINFO |
-		      IPCT_HELPER |
-		      IPCT_HELPINFO |
-		      IPCT_NATINFO)) {
+	} else  if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
 		type = IPCTNL_MSG_CT_NEW;
 		group = NFNLGRP_CONNTRACK_UPDATE;
 	} else
@@ -395,6 +391,10 @@
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
 		goto nfattr_failure;
 
+	if (events & IPCT_MARK
+	    && ctnetlink_dump_mark(skb, ct) < 0)
+		goto nfattr_failure;
+
 	nlh->nlmsg_len = skb->tail - b;
 	nfnetlink_send(skb, 0, group, 0);
 	return NOTIFY_DONE;
@@ -455,6 +455,11 @@
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
+#ifdef CONFIG_NF_CT_ACCT
+			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
+						IPCTNL_MSG_CT_GET_CTRZERO)
+				memset(&ct->counters, 0, sizeof(ct->counters));
+#endif
 		}
 		if (cb->args[1]) {
 			cb->args[1] = 0;
@@ -470,50 +475,6 @@
 	return skb->len;
 }
 
-#ifdef CONFIG_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct nf_conn *ct = NULL;
-	struct nf_conntrack_tuple_hash *h;
-	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[1];
-	struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
-	u_int8_t l3proto = nfmsg->nfgen_family;	
-
-	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
-			cb->args[0], *id);
-
-	write_lock_bh(&nf_conntrack_lock);
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
-		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
-			h = (struct nf_conntrack_tuple_hash *) i;
-			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
-				continue;
-			ct = nf_ct_tuplehash_to_ctrack(h);
-			if (l3proto && L3PROTO(ct) != l3proto)
-				continue;
-			if (ct->id <= *id)
-				continue;
-			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
-		                        	cb->nlh->nlmsg_seq,
-						IPCTNL_MSG_CT_NEW,
-						1, ct) < 0)
-				goto out;
-			*id = ct->id;
-
-			memset(&ct->counters, 0, sizeof(ct->counters));
-		}
-	}
-out:	
-	write_unlock_bh(&nf_conntrack_lock);
-
-	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
-	return skb->len;
-}
-#endif
-
 static inline int
 ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
 {
@@ -788,22 +749,14 @@
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		u32 rlen;
 
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
-					IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_NF_CT_ACCT
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table_w,
-						ctnetlink_done)) != 0)
-				return -EINVAL;
-#else
+#ifndef CONFIG_NF_CT_ACCT
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		} else {
-			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-		      		                        ctnetlink_dump_table,
-		                                	ctnetlink_done)) != 0)
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+						ctnetlink_dump_table,
+						ctnetlink_done)) != 0)
 			return -EINVAL;
-		}
 
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (rlen > skb->len)
@@ -1274,6 +1227,9 @@
 	} else
 		return NOTIFY_DONE;
 
+	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+		return NOTIFY_DONE;
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!skb)
 		return NOTIFY_DONE;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 46bc27e..26408bb 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack_protocol.h>
 
-unsigned int nf_ct_generic_timeout = 600*HZ;
+unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
 
 static int generic_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 9bd8a78..af56877 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -64,13 +64,13 @@
 #define HOURS * 60 MINS
 #define DAYS  * 24 HOURS
 
-static unsigned int nf_ct_sctp_timeout_closed            =  10 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_wait       =   3 SECS;
-static unsigned int nf_ct_sctp_timeout_cookie_echoed     =   3 SECS;
-static unsigned int nf_ct_sctp_timeout_established       =   5 DAYS;
-static unsigned int nf_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
-static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_closed __read_mostly          =  10 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly     =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly   =   3 SECS;
+static unsigned int nf_ct_sctp_timeout_established __read_mostly     =   5 DAYS;
+static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly   = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly   = 300 SECS / 1000;
+static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
 
 static unsigned int * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index af8adcb..238bbb5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -57,19 +57,19 @@
 /* "Be conservative in what you do, 
     be liberal in what you accept from others." 
     If it's non-zero, we mark only out of window RST segments as INVALID. */
-int nf_ct_tcp_be_liberal = 0;
+int nf_ct_tcp_be_liberal __read_mostly = 0;
 
 /* When connection is picked up from the middle, how many packets are required
    to pass in each direction when we assume we are in sync - if any side uses
    window scaling, we lost the game. 
    If it is set to zero, we disable picking up already established 
    connections. */
-int nf_ct_tcp_loose = 3;
+int nf_ct_tcp_loose __read_mostly = 3;
 
 /* Max number of the retransmitted packets without receiving an (acceptable) 
    ACK from the destination. If this number is reached, a shorter timer 
    will be started. */
-int nf_ct_tcp_max_retrans = 3;
+int nf_ct_tcp_max_retrans __read_mostly = 3;
 
   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
      closely.  They're more complex. --RR */
@@ -92,19 +92,19 @@
 #define HOURS * 60 MINS
 #define DAYS * 24 HOURS
 
-unsigned int nf_ct_tcp_timeout_syn_sent =      2 MINS;
-unsigned int nf_ct_tcp_timeout_syn_recv =     60 SECS;
-unsigned int nf_ct_tcp_timeout_established =   5 DAYS;
-unsigned int nf_ct_tcp_timeout_fin_wait =      2 MINS;
-unsigned int nf_ct_tcp_timeout_close_wait =   60 SECS;
-unsigned int nf_ct_tcp_timeout_last_ack =     30 SECS;
-unsigned int nf_ct_tcp_timeout_time_wait =     2 MINS;
-unsigned int nf_ct_tcp_timeout_close =        10 SECS;
+unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
+unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
+unsigned int nf_ct_tcp_timeout_established __read_mostly =   5 DAYS;
+unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
+unsigned int nf_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
+unsigned int nf_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
+unsigned int nf_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
+unsigned int nf_ct_tcp_timeout_close __read_mostly =        10 SECS;
 
 /* RFC1122 says the R2 limit should be at least 100 seconds.
    Linux uses 15 packets as limit, which corresponds 
    to ~13-30min depending on RTO. */
-unsigned int nf_ct_tcp_timeout_max_retrans =     5 MINS;
+unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
  
 static unsigned int * tcp_timeouts[]
 = { NULL,                              /* TCP_CONNTRACK_NONE */
@@ -688,13 +688,15 @@
 			if (state->last_dir == dir
 			    && state->last_seq == seq
 			    && state->last_ack == ack
-			    && state->last_end == end)
+			    && state->last_end == end
+			    && state->last_win == win)
 				state->retrans++;
 			else {
 				state->last_dir = dir;
 				state->last_seq = seq;
 				state->last_ack = ack;
 				state->last_end = end;
+				state->last_win = win;
 				state->retrans = 0;
 			}
 		}
@@ -823,8 +825,7 @@
   
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there 
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (nf_conntrack_checksum &&
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index ae07ebe..d28981c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -27,8 +27,8 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_protocol.h>
 
-unsigned int nf_ct_udp_timeout = 30*HZ;
-unsigned int nf_ct_udp_timeout_stream = 180*HZ;
+unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
+unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
 
 static int udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
@@ -131,8 +131,7 @@
 
 	/* Checksum invalid? Ignore.
 	 * We skip checking packets on the outgoing path
-	 * because the semantic of CHECKSUM_HW is different there
-	 * and moreover root might send raw packets.
+	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (nf_conntrack_checksum &&
 	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4ef8366..5954f67 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -37,7 +37,6 @@
 #include <net/netfilter/nf_conntrack_protocol.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
 
 #if 0
 #define DEBUGP printk
@@ -428,7 +427,7 @@
 
 /* Sysctl support */
 
-int nf_conntrack_checksum = 1;
+int nf_conntrack_checksum __read_mostly = 1;
 
 #ifdef CONFIG_SYSCTL
 
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 86e392bf..a981971 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -23,7 +23,7 @@
 				int hook_thresh);
 
 /* nf_queue.c */
-extern int nf_queue(struct sk_buff **skb, 
+extern int nf_queue(struct sk_buff *skb,
 		    struct list_head *elem, 
 		    int pf, unsigned int hook,
 		    struct net_device *indev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 662a869..4d8936e 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -74,13 +74,13 @@
  * Any packet that leaves via this function must come back 
  * through nf_reinject().
  */
-int nf_queue(struct sk_buff **skb, 
-	     struct list_head *elem, 
-	     int pf, unsigned int hook,
-	     struct net_device *indev,
-	     struct net_device *outdev,
-	     int (*okfn)(struct sk_buff *),
-	     unsigned int queuenum)
+static int __nf_queue(struct sk_buff *skb,
+		      struct list_head *elem,
+		      int pf, unsigned int hook,
+		      struct net_device *indev,
+		      struct net_device *outdev,
+		      int (*okfn)(struct sk_buff *),
+		      unsigned int queuenum)
 {
 	int status;
 	struct nf_info *info;
@@ -94,14 +94,14 @@
 	read_lock(&queue_handler_lock);
 	if (!queue_handler[pf]) {
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
 	afinfo = nf_get_afinfo(pf);
 	if (!afinfo) {
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
@@ -109,9 +109,9 @@
 	if (!info) {
 		if (net_ratelimit())
 			printk(KERN_ERR "OOM queueing packet %p\n",
-			       *skb);
+			       skb);
 		read_unlock(&queue_handler_lock);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 		return 1;
 	}
 
@@ -130,15 +130,15 @@
 	if (outdev) dev_hold(outdev);
 
 #ifdef CONFIG_BRIDGE_NETFILTER
-	if ((*skb)->nf_bridge) {
-		physindev = (*skb)->nf_bridge->physindev;
+	if (skb->nf_bridge) {
+		physindev = skb->nf_bridge->physindev;
 		if (physindev) dev_hold(physindev);
-		physoutdev = (*skb)->nf_bridge->physoutdev;
+		physoutdev = skb->nf_bridge->physoutdev;
 		if (physoutdev) dev_hold(physoutdev);
 	}
 #endif
-	afinfo->saveroute(*skb, info);
-	status = queue_handler[pf]->outfn(*skb, info, queuenum,
+	afinfo->saveroute(skb, info);
+	status = queue_handler[pf]->outfn(skb, info, queuenum,
 					  queue_handler[pf]->data);
 
 	read_unlock(&queue_handler_lock);
@@ -153,7 +153,7 @@
 #endif
 		module_put(info->elem->owner);
 		kfree(info);
-		kfree_skb(*skb);
+		kfree_skb(skb);
 
 		return 1;
 	}
@@ -161,6 +161,46 @@
 	return 1;
 }
 
+int nf_queue(struct sk_buff *skb,
+	     struct list_head *elem,
+	     int pf, unsigned int hook,
+	     struct net_device *indev,
+	     struct net_device *outdev,
+	     int (*okfn)(struct sk_buff *),
+	     unsigned int queuenum)
+{
+	struct sk_buff *segs;
+
+	if (!skb_is_gso(skb))
+		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+				  queuenum);
+
+	switch (pf) {
+	case AF_INET:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case AF_INET6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
+
+	segs = skb_gso_segment(skb, 0);
+	kfree_skb(skb);
+	if (unlikely(IS_ERR(segs)))
+		return 1;
+
+	do {
+		struct sk_buff *nskb = segs->next;
+
+		segs->next = NULL;
+		if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
+				queuenum))
+			kfree_skb(segs);
+		segs = nskb;
+	} while (segs);
+	return 1;
+}
+
 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 		 unsigned int verdict)
 {
@@ -224,9 +264,9 @@
 	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
-		if (!nf_queue(&skb, elem, info->pf, info->hook, 
-			      info->indev, info->outdev, info->okfn,
-			      verdict >> NF_VERDICT_BITS))
+		if (!__nf_queue(skb, elem, info->pf, info->hook,
+				info->indev, info->outdev, info->okfn,
+				verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 		break;
 	default:
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49ef41e..8eb2473 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -377,9 +377,9 @@
 		break;
 	
 	case NFQNL_COPY_PACKET:
-		if (entskb->ip_summed == CHECKSUM_HW &&
-		    (*errp = skb_checksum_help(entskb,
-		                               outdev == NULL))) {
+		if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
+		     entskb->ip_summed == CHECKSUM_COMPLETE) &&
+		    (*errp = skb_checksum_help(entskb))) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
 		}
@@ -584,7 +584,7 @@
                 queue->queue_dropped++;
 		status = -ENOSPC;
 		if (net_ratelimit())
-		          printk(KERN_WARNING "ip_queue: full at %d entries, "
+		          printk(KERN_WARNING "nf_queue: full at %d entries, "
 				 "dropping packets(s). Dropped: %d\n", 
 				 queue->queue_total, queue->queue_dropped);
 		goto err_out_free_nskb;
@@ -635,7 +635,7 @@
 			                         diff,
 			                         GFP_ATOMIC);
 			if (newskb == NULL) {
-				printk(KERN_WARNING "ip_queue: OOM "
+				printk(KERN_WARNING "nf_queue: OOM "
 				      "in mangle, dropping packet\n");
 				return -ENOMEM;
 			}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 174e8f9..58522fc 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -81,12 +81,42 @@
 	int af = target->family;
 
 	mutex_lock(&xt[af].mutex);
-	LIST_DELETE(&xt[af].target, target);
+	list_del(&target->list);
 	mutex_unlock(&xt[af].mutex);
 }
 EXPORT_SYMBOL(xt_unregister_target);
 
 int
+xt_register_targets(struct xt_target *target, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = xt_register_target(&target[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		xt_unregister_targets(target, i);
+	return err;
+}
+EXPORT_SYMBOL(xt_register_targets);
+
+void
+xt_unregister_targets(struct xt_target *target, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		xt_unregister_target(&target[i]);
+}
+EXPORT_SYMBOL(xt_unregister_targets);
+
+int
 xt_register_match(struct xt_match *match)
 {
 	int ret, af = match->family;
@@ -108,11 +138,41 @@
 	int af =  match->family;
 
 	mutex_lock(&xt[af].mutex);
-	LIST_DELETE(&xt[af].match, match);
+	list_del(&match->list);
 	mutex_unlock(&xt[af].mutex);
 }
 EXPORT_SYMBOL(xt_unregister_match);
 
+int
+xt_register_matches(struct xt_match *match, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = xt_register_match(&match[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		xt_unregister_matches(match, i);
+	return err;
+}
+EXPORT_SYMBOL(xt_register_matches);
+
+void
+xt_unregister_matches(struct xt_match *match, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		xt_unregister_match(&match[i]);
+}
+EXPORT_SYMBOL(xt_unregister_matches);
+
 
 /*
  * These are weird, but module loading must not be done with mutex
@@ -273,52 +333,65 @@
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_match(void *match, void **dstptr, int *size, int convert)
+int xt_compat_match_offset(struct xt_match *match)
 {
-	struct xt_match *m;
-	struct compat_xt_entry_match *pcompat_m;
-	struct xt_entry_match *pm;
-	u_int16_t msize;
-	int off, ret;
-
-	ret = 0;
-	m = ((struct xt_entry_match *)match)->u.kernel.match;
-	off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize);
-	switch (convert) {
-		case COMPAT_TO_USER:
-			pm = (struct xt_entry_match *)match;
-			msize = pm->u.user.match_size;
-			if (copy_to_user(*dstptr, pm, msize)) {
-				ret = -EFAULT;
-				break;
-			}
-			msize -= off;
-			if (put_user(msize, (u_int16_t *)*dstptr))
-				ret = -EFAULT;
-			*size -= off;
-			*dstptr += msize;
-			break;
-		case COMPAT_FROM_USER:
-			pcompat_m = (struct compat_xt_entry_match *)match;
-			pm = (struct xt_entry_match *)*dstptr;
-			msize = pcompat_m->u.user.match_size;
-			memcpy(pm, pcompat_m, msize);
-			msize += off;
-			pm->u.user.match_size = msize;
-			*size += off;
-			*dstptr += msize;
-			break;
-		case COMPAT_CALC_SIZE:
-			*size += off;
-			break;
-		default:
-			ret = -ENOPROTOOPT;
-			break;
-	}
-	return ret;
+	u_int16_t csize = match->compatsize ? : match->matchsize;
+	return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize);
 }
-EXPORT_SYMBOL_GPL(xt_compat_match);
-#endif
+EXPORT_SYMBOL_GPL(xt_compat_match_offset);
+
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			       int *size)
+{
+	struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
+	int pad, off = xt_compat_match_offset(match);
+	u_int16_t msize = cm->u.user.match_size;
+
+	m = *dstptr;
+	memcpy(m, cm, sizeof(*cm));
+	if (match->compat_from_user)
+		match->compat_from_user(m->data, cm->data);
+	else
+		memcpy(m->data, cm->data, msize - sizeof(*cm));
+	pad = XT_ALIGN(match->matchsize) - match->matchsize;
+	if (pad > 0)
+		memset(m->data + match->matchsize, 0, pad);
+
+	msize += off;
+	m->u.user.match_size = msize;
+
+	*size += off;
+	*dstptr += msize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
+
+int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
+			    int *size)
+{
+	struct xt_match *match = m->u.kernel.match;
+	struct compat_xt_entry_match __user *cm = *dstptr;
+	int off = xt_compat_match_offset(match);
+	u_int16_t msize = m->u.user.match_size - off;
+
+	if (copy_to_user(cm, m, sizeof(*cm)) ||
+	    put_user(msize, &cm->u.user.match_size))
+	    	return -EFAULT;
+
+	if (match->compat_to_user) {
+		if (match->compat_to_user((void __user *)cm->data, m->data))
+			return -EFAULT;
+	} else {
+		if (copy_to_user(cm->data, m->data, msize - sizeof(*cm)))
+			return -EFAULT;
+	}
+
+	*size -= off;
+	*dstptr += msize;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+#endif /* CONFIG_COMPAT */
 
 int xt_check_target(const struct xt_target *target, unsigned short family,
 		    unsigned int size, const char *table, unsigned int hook_mask,
@@ -350,51 +423,64 @@
 EXPORT_SYMBOL_GPL(xt_check_target);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_target(void *target, void **dstptr, int *size, int convert)
+int xt_compat_target_offset(struct xt_target *target)
 {
-	struct xt_target *t;
-	struct compat_xt_entry_target *pcompat;
-	struct xt_entry_target *pt;
-	u_int16_t tsize;
-	int off, ret;
-
-	ret = 0;
-	t = ((struct xt_entry_target *)target)->u.kernel.target;
-	off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize);
-	switch (convert) {
-		case COMPAT_TO_USER:
-			pt = (struct xt_entry_target *)target;
-			tsize = pt->u.user.target_size;
-			if (copy_to_user(*dstptr, pt, tsize)) {
-				ret = -EFAULT;
-				break;
-			}
-			tsize -= off;
-			if (put_user(tsize, (u_int16_t *)*dstptr))
-				ret = -EFAULT;
-			*size -= off;
-			*dstptr += tsize;
-			break;
-		case COMPAT_FROM_USER:
-			pcompat = (struct compat_xt_entry_target *)target;
-			pt = (struct xt_entry_target *)*dstptr;
-			tsize = pcompat->u.user.target_size;
-			memcpy(pt, pcompat, tsize);
-			tsize += off;
-			pt->u.user.target_size = tsize;
-			*size += off;
-			*dstptr += tsize;
-			break;
-		case COMPAT_CALC_SIZE:
-			*size += off;
-			break;
-		default:
-			ret = -ENOPROTOOPT;
-			break;
-	}
-	return ret;
+	u_int16_t csize = target->compatsize ? : target->targetsize;
+	return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize);
 }
-EXPORT_SYMBOL_GPL(xt_compat_target);
+EXPORT_SYMBOL_GPL(xt_compat_target_offset);
+
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+			        int *size)
+{
+	struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
+	int pad, off = xt_compat_target_offset(target);
+	u_int16_t tsize = ct->u.user.target_size;
+
+	t = *dstptr;
+	memcpy(t, ct, sizeof(*ct));
+	if (target->compat_from_user)
+		target->compat_from_user(t->data, ct->data);
+	else
+		memcpy(t->data, ct->data, tsize - sizeof(*ct));
+	pad = XT_ALIGN(target->targetsize) - target->targetsize;
+	if (pad > 0)
+		memset(t->data + target->targetsize, 0, pad);
+
+	tsize += off;
+	t->u.user.target_size = tsize;
+
+	*size += off;
+	*dstptr += tsize;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
+
+int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
+			     int *size)
+{
+	struct xt_target *target = t->u.kernel.target;
+	struct compat_xt_entry_target __user *ct = *dstptr;
+	int off = xt_compat_target_offset(target);
+	u_int16_t tsize = t->u.user.target_size - off;
+
+	if (copy_to_user(ct, t, sizeof(*ct)) ||
+	    put_user(tsize, &ct->u.user.target_size))
+	    	return -EFAULT;
+
+	if (target->compat_to_user) {
+		if (target->compat_to_user((void __user *)ct->data, t->data))
+			return -EFAULT;
+	} else {
+		if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct)))
+			return -EFAULT;
+	}
+
+	*size -= off;
+	*dstptr += tsize;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
 #endif
 
 struct xt_table_info *xt_alloc_table_info(unsigned int size)
@@ -515,15 +601,18 @@
 {
 	int ret;
 	struct xt_table_info *private;
+	struct xt_table *t;
 
 	ret = mutex_lock_interruptible(&xt[table->af].mutex);
 	if (ret != 0)
 		return ret;
 
 	/* Don't autoload: we'd eat our tail... */
-	if (list_named_find(&xt[table->af].tables, table->name)) {
-		ret = -EEXIST;
-		goto unlock;
+	list_for_each_entry(t, &xt[table->af].tables, list) {
+		if (strcmp(t->name, table->name) == 0) {
+			ret = -EEXIST;
+			goto unlock;
+		}
 	}
 
 	/* Simplifies replace_table code. */
@@ -538,7 +627,7 @@
 	/* save number of initial entries */
 	private->initial_entries = private->number;
 
-	list_prepend(&xt[table->af].tables, table);
+	list_add(&table->list, &xt[table->af].tables);
 
 	ret = 0;
  unlock:
@@ -553,7 +642,7 @@
 
 	mutex_lock(&xt[table->af].mutex);
 	private = table->private;
-	LIST_DELETE(&xt[table->af].tables, table);
+	list_del(&table->list);
 	mutex_unlock(&xt[table->af].mutex);
 
 	return private;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index e54e577..50de965 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -29,8 +29,7 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct xt_classify_target_info *clinfo = targinfo;
 
@@ -40,47 +39,41 @@
 	return XT_CONTINUE;
 }
 
-static struct xt_target classify_reg = { 
-	.name 		= "CLASSIFY", 
-	.target 	= target,
-	.targetsize	= sizeof(struct xt_classify_target_info),
-	.table		= "mangle",
-	.hooks		= (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
-		          (1 << NF_IP_POST_ROUTING),
-	.family		= AF_INET,
-	.me 		= THIS_MODULE,
+static struct xt_target xt_classify_target[] = {
+	{
+		.family		= AF_INET,
+		.name 		= "CLASSIFY",
+		.target 	= target,
+		.targetsize	= sizeof(struct xt_classify_target_info),
+		.table		= "mangle",
+		.hooks		= (1 << NF_IP_LOCAL_OUT) |
+				  (1 << NF_IP_FORWARD) |
+			          (1 << NF_IP_POST_ROUTING),
+		.me 		= THIS_MODULE,
+	},
+	{
+		.name 		= "CLASSIFY",
+		.family		= AF_INET6,
+		.target 	= target,
+		.targetsize	= sizeof(struct xt_classify_target_info),
+		.table		= "mangle",
+		.hooks		= (1 << NF_IP_LOCAL_OUT) |
+				  (1 << NF_IP_FORWARD) |
+			          (1 << NF_IP_POST_ROUTING),
+		.me 		= THIS_MODULE,
+	},
 };
-static struct xt_target classify6_reg = { 
-	.name 		= "CLASSIFY", 
-	.target 	= target,
-	.targetsize	= sizeof(struct xt_classify_target_info),
-	.table		= "mangle",
-	.hooks		= (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) |
-		          (1 << NF_IP_POST_ROUTING),
-	.family		= AF_INET6,
-	.me 		= THIS_MODULE,
-};
-
 
 static int __init xt_classify_init(void)
 {
-	int ret;
-
-	ret = xt_register_target(&classify_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&classify6_reg);
-	if (ret)
-		xt_unregister_target(&classify_reg);
-
-	return ret;
+	return xt_register_targets(xt_classify_target,
+				   ARRAY_SIZE(xt_classify_target));
 }
 
 static void __exit xt_classify_fini(void)
 {
-	xt_unregister_target(&classify_reg);
-	xt_unregister_target(&classify6_reg);
+	xt_unregister_targets(xt_classify_target,
+			      ARRAY_SIZE(xt_classify_target));
 }
 
 module_init(xt_classify_init);
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 60c375d..c01524f 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -38,8 +38,7 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct xt_connmark_target_info *markinfo = targinfo;
 	u_int32_t diff;
@@ -49,24 +48,37 @@
 	u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
 
 	if (ctmark) {
-	    switch(markinfo->mode) {
-	    case XT_CONNMARK_SET:
-		newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
-		if (newmark != *ctmark)
-		    *ctmark = newmark;
-		break;
-	    case XT_CONNMARK_SAVE:
-		newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
-		if (*ctmark != newmark)
-		    *ctmark = newmark;
-		break;
-	    case XT_CONNMARK_RESTORE:
-		nfmark = (*pskb)->nfmark;
-		diff = (*ctmark ^ nfmark) & markinfo->mask;
-		if (diff != 0)
-		    (*pskb)->nfmark = nfmark ^ diff;
-		break;
-	    }
+		switch(markinfo->mode) {
+		case XT_CONNMARK_SET:
+			newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
+			if (newmark != *ctmark) {
+				*ctmark = newmark;
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+				ip_conntrack_event_cache(IPCT_MARK, *pskb);
+#else
+				nf_conntrack_event_cache(IPCT_MARK, *pskb);
+#endif
+		}
+			break;
+		case XT_CONNMARK_SAVE:
+			newmark = (*ctmark & ~markinfo->mask) |
+				  ((*pskb)->nfmark & markinfo->mask);
+			if (*ctmark != newmark) {
+				*ctmark = newmark;
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+				ip_conntrack_event_cache(IPCT_MARK, *pskb);
+#else
+				nf_conntrack_event_cache(IPCT_MARK, *pskb);
+#endif
+			}
+			break;
+		case XT_CONNMARK_RESTORE:
+			nfmark = (*pskb)->nfmark;
+			diff = (*ctmark ^ nfmark) & markinfo->mask;
+			if (diff != 0)
+				(*pskb)->nfmark = nfmark ^ diff;
+			break;
+		}
 	}
 
 	return XT_CONTINUE;
@@ -77,65 +89,91 @@
 	   const void *entry,
 	   const struct xt_target *target,
 	   void *targinfo,
-	   unsigned int targinfosize,
 	   unsigned int hook_mask)
 {
 	struct xt_connmark_target_info *matchinfo = targinfo;
 
 	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
-	    if (strcmp(tablename, "mangle") != 0) {
-		    printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
-		    return 0;
-	    }
+		if (strcmp(tablename, "mangle") != 0) {
+			printk(KERN_WARNING "CONNMARK: restore can only be "
+			       "called from \"mangle\" table, not \"%s\"\n",
+			       tablename);
+			return 0;
+		}
 	}
-
 	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
 		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
 		return 0;
 	}
-
 	return 1;
 }
 
-static struct xt_target connmark_reg = {
-	.name		= "CONNMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connmark_target_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_target_info {
+	compat_ulong_t	mark, mask;
+	u_int8_t	mode;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
 };
 
-static struct xt_target connmark6_reg = {
-	.name		= "CONNMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connmark_target_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_connmark_target_info *cm = src;
+	struct xt_connmark_target_info m = {
+		.mark	= cm->mark,
+		.mask	= cm->mask,
+		.mode	= cm->mode,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_connmark_target_info *m = src;
+	struct compat_xt_connmark_target_info cm = {
+		.mark	= m->mark,
+		.mask	= m->mask,
+		.mode	= m->mode,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target xt_connmark_target[] = {
+	{
+		.name		= "CONNMARK",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connmark_target_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "CONNMARK",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connmark_target_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_connmark_init(void)
 {
-	int ret;
-
 	need_conntrack();
-
-	ret = xt_register_target(&connmark_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&connmark6_reg);
-	if (ret)
-		xt_unregister_target(&connmark_reg);
-
-	return ret;
+	return xt_register_targets(xt_connmark_target,
+				   ARRAY_SIZE(xt_connmark_target));
 }
 
 static void __exit xt_connmark_fini(void)
 {
-	xt_unregister_target(&connmark_reg);
-	xt_unregister_target(&connmark6_reg);
+	xt_unregister_targets(xt_connmark_target,
+			      ARRAY_SIZE(xt_connmark_target));
 }
 
 module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 8c011e0..4673862 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -66,7 +66,7 @@
 static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo, void *userinfo)
+			   const void *targinfo)
 {
 	struct sk_buff *skb = *pskb;
 	const struct xt_connsecmark_target_info *info = targinfo;
@@ -89,7 +89,7 @@
 
 static int checkentry(const char *tablename, const void *entry,
 		      const struct xt_target *target, void *targinfo,
-		      unsigned int targinfosize, unsigned int hook_mask)
+		      unsigned int hook_mask)
 {
 	struct xt_connsecmark_target_info *info = targinfo;
 
@@ -106,49 +106,38 @@
 	return 1;
 }
 
-static struct xt_target ipt_connsecmark_reg = {
-	.name		= "CONNSECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connsecmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 0,
-};
-
-static struct xt_target ip6t_connsecmark_reg = {
-	.name		= "CONNSECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_connsecmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET6,
-	.revision	= 0,
+static struct xt_target xt_connsecmark_target[] = {
+	{
+		.name		= "CONNSECMARK",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connsecmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "CONNSECMARK",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_connsecmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_connsecmark_init(void)
 {
-	int err;
-
 	need_conntrack();
-
-	err = xt_register_target(&ipt_connsecmark_reg);
-	if (err)
-		return err;
-
-	err = xt_register_target(&ip6t_connsecmark_reg);
-	if (err)
-		xt_unregister_target(&ipt_connsecmark_reg);
-
-	return err;
+	return xt_register_targets(xt_connsecmark_target,
+				   ARRAY_SIZE(xt_connsecmark_target));
 }
 
 static void __exit xt_connsecmark_fini(void)
 {
-	xt_unregister_target(&ip6t_connsecmark_reg);
-	xt_unregister_target(&ipt_connsecmark_reg);
+	xt_unregister_targets(xt_connsecmark_target,
+			      ARRAY_SIZE(xt_connsecmark_target));
 }
 
 module_init(xt_connsecmark_init);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
new file mode 100644
index 0000000..a7cc75a
--- /dev/null
+++ b/net/netfilter/xt_DSCP.c
@@ -0,0 +1,118 @@
+/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * See RFC2474 for a description of the DSCP field within the IP Header.
+ *
+ * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_DSCP.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("x_tables DSCP modification module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_DSCP");
+MODULE_ALIAS("ip6t_DSCP");
+
+static unsigned int target(struct sk_buff **pskb,
+			   const struct net_device *in,
+			   const struct net_device *out,
+			   unsigned int hooknum,
+			   const struct xt_target *target,
+			   const void *targinfo)
+{
+	const struct xt_DSCP_info *dinfo = targinfo;
+	u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+
+	if (dscp != dinfo->dscp) {
+		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+			return NF_DROP;
+
+		ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+				    dinfo->dscp << XT_DSCP_SHIFT);
+
+	}
+	return XT_CONTINUE;
+}
+
+static unsigned int target6(struct sk_buff **pskb,
+			    const struct net_device *in,
+			    const struct net_device *out,
+			    unsigned int hooknum,
+			    const struct xt_target *target,
+			    const void *targinfo)
+{
+	const struct xt_DSCP_info *dinfo = targinfo;
+	u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+
+	if (dscp != dinfo->dscp) {
+		if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+			return NF_DROP;
+
+		ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+				    dinfo->dscp << XT_DSCP_SHIFT);
+	}
+	return XT_CONTINUE;
+}
+
+static int checkentry(const char *tablename,
+		      const void *e_void,
+		      const struct xt_target *target,
+		      void *targinfo,
+		      unsigned int hook_mask)
+{
+	const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp;
+
+	if ((dscp > XT_DSCP_MAX)) {
+		printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
+		return 0;
+	}
+	return 1;
+}
+
+static struct xt_target xt_dscp_target[] = {
+	{
+		.name		= "DSCP",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_DSCP_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DSCP",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target6,
+		.targetsize	= sizeof(struct xt_DSCP_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_dscp_target_init(void)
+{
+	return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
+}
+
+static void __exit xt_dscp_target_fini(void)
+{
+	xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target));
+}
+
+module_init(xt_dscp_target_init);
+module_exit(xt_dscp_target_fini);
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index ee9c34e..c6e860a 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -27,8 +27,7 @@
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	const struct xt_mark_target_info *markinfo = targinfo;
 
@@ -44,8 +43,7 @@
 	  const struct net_device *out,
 	  unsigned int hooknum,
 	  const struct xt_target *target,
-	  const void *targinfo,
-	  void *userinfo)
+	  const void *targinfo)
 {
 	const struct xt_mark_target_info_v1 *markinfo = targinfo;
 	int mark = 0;
@@ -76,7 +74,6 @@
 	      const void *entry,
 	      const struct xt_target *target,
 	      void *targinfo,
-	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
 	struct xt_mark_target_info *markinfo = targinfo;
@@ -93,7 +90,6 @@
 	      const void *entry,
 	      const struct xt_target *target,
 	      void *targinfo,
-	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
 	struct xt_mark_target_info_v1 *markinfo = targinfo;
@@ -112,65 +108,81 @@
 	return 1;
 }
 
-static struct xt_target ipt_mark_reg_v0 = {
-	.name		= "MARK",
-	.target		= target_v0,
-	.targetsize	= sizeof(struct xt_mark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry_v0,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 0,
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_target_info_v1 {
+	compat_ulong_t	mark;
+	u_int8_t	mode;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
 };
 
-static struct xt_target ipt_mark_reg_v1 = {
-	.name		= "MARK",
-	.target		= target_v1,
-	.targetsize	= sizeof(struct xt_mark_target_info_v1),
-	.table		= "mangle",
-	.checkentry	= checkentry_v1,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 1,
-};
+static void compat_from_user_v1(void *dst, void *src)
+{
+	struct compat_xt_mark_target_info_v1 *cm = src;
+	struct xt_mark_target_info_v1 m = {
+		.mark	= cm->mark,
+		.mode	= cm->mode,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
 
-static struct xt_target ip6t_mark_reg_v0 = {
-	.name		= "MARK",
-	.target		= target_v0,
-	.targetsize	= sizeof(struct xt_mark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry_v0,
-	.me		= THIS_MODULE,
-	.family		= AF_INET6,
-	.revision	= 0,
+static int compat_to_user_v1(void __user *dst, void *src)
+{
+	struct xt_mark_target_info_v1 *m = src;
+	struct compat_xt_mark_target_info_v1 cm = {
+		.mark	= m->mark,
+		.mode	= m->mode,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target xt_mark_target[] = {
+	{
+		.name		= "MARK",
+		.family		= AF_INET,
+		.revision	= 0,
+		.checkentry	= checkentry_v0,
+		.target		= target_v0,
+		.targetsize	= sizeof(struct xt_mark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "MARK",
+		.family		= AF_INET,
+		.revision	= 1,
+		.checkentry	= checkentry_v1,
+		.target		= target_v1,
+		.targetsize	= sizeof(struct xt_mark_target_info_v1),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
+		.compat_from_user = compat_from_user_v1,
+		.compat_to_user	= compat_to_user_v1,
+#endif
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "MARK",
+		.family		= AF_INET6,
+		.revision	= 0,
+		.checkentry	= checkentry_v0,
+		.target		= target_v0,
+		.targetsize	= sizeof(struct xt_mark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_mark_init(void)
 {
-	int err;
-
-	err = xt_register_target(&ipt_mark_reg_v0);
-	if (err)
-		return err;
-
-	err = xt_register_target(&ipt_mark_reg_v1);
-	if (err)
-		xt_unregister_target(&ipt_mark_reg_v0);
-
-	err = xt_register_target(&ip6t_mark_reg_v0);
-	if (err) {
-		xt_unregister_target(&ipt_mark_reg_v0);
-		xt_unregister_target(&ipt_mark_reg_v1);
-	}
-
-	return err;
+	return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
 }
 
 static void __exit xt_mark_fini(void)
 {
-	xt_unregister_target(&ipt_mark_reg_v0);
-	xt_unregister_target(&ipt_mark_reg_v1);
-	xt_unregister_target(&ip6t_mark_reg_v0);
+	xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
 }
 
 module_init(xt_mark_init);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 86ccceb..db9b896 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -29,65 +29,46 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	const struct xt_NFQ_info *tinfo = targinfo;
 
 	return NF_QUEUE_NR(tinfo->queuenum);
 }
 
-static struct xt_target ipt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_NFQ_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target ip6t_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_NFQ_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target arpt_NFQ_reg = {
-	.name		= "NFQUEUE",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_NFQ_info),
-	.family		= NF_ARP,
-	.me		= THIS_MODULE,
+static struct xt_target xt_nfqueue_target[] = {
+	{
+		.name		= "NFQUEUE",
+		.family		= AF_INET,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.family		= AF_INET6,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NFQUEUE",
+		.family		= NF_ARP,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_NFQ_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_nfqueue_init(void)
 {
-	int ret;
-	ret = xt_register_target(&ipt_NFQ_reg);
-	if (ret)
-		return ret;
-	ret = xt_register_target(&ip6t_NFQ_reg);
-	if (ret)
-		goto out_ip;
-	ret = xt_register_target(&arpt_NFQ_reg);
-	if (ret)
-		goto out_ip6;
-
-	return ret;
-out_ip6:
-	xt_unregister_target(&ip6t_NFQ_reg);
-out_ip:
-	xt_unregister_target(&ipt_NFQ_reg);
-
-	return ret;
+	return xt_register_targets(xt_nfqueue_target,
+				   ARRAY_SIZE(xt_nfqueue_target));
 }
 
 static void __exit xt_nfqueue_fini(void)
 {
-	xt_unregister_target(&arpt_NFQ_reg);
-	xt_unregister_target(&ip6t_NFQ_reg);
-	xt_unregister_target(&ipt_NFQ_reg);
+	xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target));
 }
 
 module_init(xt_nfqueue_init);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 98f4b53..6d00dca 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -16,8 +16,7 @@
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
-       const void *targinfo,
-       void *userinfo)
+       const void *targinfo)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if ((*pskb)->nfct != NULL)
@@ -34,43 +33,32 @@
 	return XT_CONTINUE;
 }
 
-static struct xt_target notrack_reg = {
-	.name		= "NOTRACK",
-	.target		= target,
-	.targetsize	= 0,
-	.table		= "raw",
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_target notrack6_reg = {
-	.name		= "NOTRACK",
-	.target		= target,
-	.targetsize	= 0,
-	.table		= "raw",
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_target xt_notrack_target[] = {
+	{
+		.name		= "NOTRACK",
+		.family		= AF_INET,
+		.target		= target,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "NOTRACK",
+		.family		= AF_INET6,
+		.target		= target,
+		.table		= "raw",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_notrack_init(void)
 {
-	int ret;
-
-	ret = xt_register_target(&notrack_reg);
-	if (ret)
-		return ret;
-
-	ret = xt_register_target(&notrack6_reg);
-	if (ret)
-		xt_unregister_target(&notrack_reg);
-
-	return ret;
+	return xt_register_targets(xt_notrack_target,
+				   ARRAY_SIZE(xt_notrack_target));
 }
 
 static void __exit xt_notrack_fini(void)
 {
-	xt_unregister_target(&notrack6_reg);
-	xt_unregister_target(&notrack_reg);
+	xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target));
 }
 
 module_init(xt_notrack_init);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index de9537a..add7521 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -31,7 +31,7 @@
 static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
-			   const void *targinfo, void *userinfo)
+			   const void *targinfo)
 {
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = targinfo;
@@ -85,7 +85,7 @@
 
 static int checkentry(const char *tablename, const void *entry,
 		      const struct xt_target *target, void *targinfo,
-		      unsigned int targinfosize, unsigned int hook_mask)
+		      unsigned int hook_mask)
 {
 	struct xt_secmark_target_info *info = targinfo;
 
@@ -111,47 +111,36 @@
 	return 1;
 }
 
-static struct xt_target ipt_secmark_reg = {
-	.name		= "SECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_secmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET,
-	.revision	= 0,
-};
-
-static struct xt_target ip6t_secmark_reg = {
-	.name		= "SECMARK",
-	.target		= target,
-	.targetsize	= sizeof(struct xt_secmark_target_info),
-	.table		= "mangle",
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE,
-	.family		= AF_INET6,
-	.revision	= 0,
+static struct xt_target xt_secmark_target[] = {
+	{
+		.name		= "SECMARK",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_secmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "SECMARK",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.target		= target,
+		.targetsize	= sizeof(struct xt_secmark_target_info),
+		.table		= "mangle",
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_secmark_init(void)
 {
-	int err;
-
-	err = xt_register_target(&ipt_secmark_reg);
-	if (err)
-		return err;
-
-	err = xt_register_target(&ip6t_secmark_reg);
-	if (err)
-		xt_unregister_target(&ipt_secmark_reg);
-
-	return err;
+	return xt_register_targets(xt_secmark_target,
+				   ARRAY_SIZE(xt_secmark_target));
 }
 
 static void __exit xt_secmark_fini(void)
 {
-	xt_unregister_target(&ip6t_secmark_reg);
-	xt_unregister_target(&ipt_secmark_reg);
+	xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target));
 }
 
 module_init(xt_secmark_init);
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 197609c..7db492d 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -29,41 +29,32 @@
 	return 1;
 }
 
-static struct xt_match comment_match = {
-	.name		= "comment",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_comment_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match comment6_match = {
-	.name		= "comment",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_comment_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_comment_match[] = {
+	{
+		.name		= "comment",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_comment_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "comment",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_comment_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_comment_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&comment_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&comment6_match);
-	if (ret)
-		xt_unregister_match(&comment_match);
-
-	return ret;
+	return xt_register_matches(xt_comment_match,
+				   ARRAY_SIZE(xt_comment_match));
 }
 
 static void __exit xt_comment_fini(void)
 {
-	xt_unregister_match(&comment_match);
-	xt_unregister_match(&comment6_match);
+	xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match));
 }
 
 module_init(xt_comment_init);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1396fe2..dcc497e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -125,7 +125,6 @@
 		 const void *ip,
 		 const struct xt_match *match,
 		 void *matchinfo,
-		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
@@ -143,40 +142,35 @@
 	return 1;
 }
 
-static struct xt_match connbytes_match = {
-	.name		= "connbytes",
-	.match		= match,
-	.checkentry	= check,
-	.matchsize	= sizeof(struct xt_connbytes_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-static struct xt_match connbytes6_match = {
-	.name		= "connbytes",
-	.match		= match,
-	.checkentry	= check,
-	.matchsize	= sizeof(struct xt_connbytes_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_connbytes_match[] = {
+	{
+		.name		= "connbytes",
+		.family		= AF_INET,
+		.checkentry	= check,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_connbytes_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "connbytes",
+		.family		= AF_INET6,
+		.checkentry	= check,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_connbytes_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_connbytes_init(void)
 {
-	int ret;
-	ret = xt_register_match(&connbytes_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&connbytes6_match);
-	if (ret)
-		xt_unregister_match(&connbytes_match);
-	return ret;
+	return xt_register_matches(xt_connbytes_match,
+				   ARRAY_SIZE(xt_connbytes_match));
 }
 
 static void __exit xt_connbytes_fini(void)
 {
-	xt_unregister_match(&connbytes_match);
-	xt_unregister_match(&connbytes6_match);
+	xt_unregister_matches(xt_connbytes_match,
+			      ARRAY_SIZE(xt_connbytes_match));
 }
 
 module_init(xt_connbytes_init);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 56324c8a..92a5726 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -55,7 +55,6 @@
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	struct xt_connmark_info *cm = matchinfo;
@@ -75,53 +74,80 @@
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
 #endif
 }
 
-static struct xt_match connmark_match = {
-	.name		= "connmark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_connmark_info),
-	.checkentry	= checkentry,
-	.destroy	= destroy,
-	.family		= AF_INET,
-	.me		= THIS_MODULE
+#ifdef CONFIG_COMPAT
+struct compat_xt_connmark_info {
+	compat_ulong_t	mark, mask;
+	u_int8_t	invert;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
 };
 
-static struct xt_match connmark6_match = {
-	.name		= "connmark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_connmark_info),
-	.checkentry	= checkentry,
-	.destroy	= destroy,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_connmark_info *cm = src;
+	struct xt_connmark_info m = {
+		.mark	= cm->mark,
+		.mask	= cm->mask,
+		.invert	= cm->invert,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_connmark_info *m = src;
+	struct compat_xt_connmark_info cm = {
+		.mark	= m->mark,
+		.mask	= m->mask,
+		.invert	= m->invert,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match xt_connmark_match[] = {
+	{
+		.name		= "connmark",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_connmark_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_connmark_info),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "connmark",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_connmark_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_connmark_init(void)
 {
-	int ret;
-
 	need_conntrack();
-
-	ret = xt_register_match(&connmark_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&connmark6_match);
-	if (ret)
-		xt_unregister_match(&connmark_match);
-	return ret;
+	return xt_register_matches(xt_connmark_match,
+				   ARRAY_SIZE(xt_connmark_match));
 }
 
 static void __exit xt_connmark_fini(void)
 {
-	xt_unregister_match(&connmark6_match);
-	xt_unregister_match(&connmark_match);
+	xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match));
 }
 
 module_init(xt_connmark_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 145489a..0ea501a 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -45,7 +45,7 @@
 
 	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
 
-#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
 
 	if (ct == &ip_conntrack_untracked)
 		statebit = XT_CONNTRACK_STATE_UNTRACKED;
@@ -54,63 +54,72 @@
  	else
  		statebit = XT_CONNTRACK_STATE_INVALID;
  
-	if(sinfo->flags & XT_CONNTRACK_STATE) {
+	if (sinfo->flags & XT_CONNTRACK_STATE) {
 		if (ct) {
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
+			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_SNAT;
-
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
+			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_DNAT;
 		}
-
-		if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
+		if (FWINV((statebit & sinfo->statemask) == 0,
+			  XT_CONNTRACK_STATE))
 			return 0;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
-                	return 0;
+	if (ct == NULL) {
+		if (sinfo->flags & ~XT_CONNTRACK_STATE)
+			return 0;
+		return 1;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
+	if (sinfo->flags & XT_CONNTRACK_PROTO &&
+	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+		  XT_CONNTRACK_PROTO))
+                return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
+		   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+		  XT_CONNTRACK_ORIGSRC))
+		return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
+		   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+		  XT_CONNTRACK_ORIGDST))
+		return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
+		   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+		  XT_CONNTRACK_REPLSRC))
+		return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
+		   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+		  XT_CONNTRACK_REPLDST))
+		return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_STATUS &&
+	    FWINV((ct->status & sinfo->statusmask) == 0,
+		  XT_CONNTRACK_STATUS))
+		return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
+		unsigned long expires = timer_pending(&ct->timeout) ?
+					(ct->timeout.expires - jiffies)/HZ : 0;
+
+		if (FWINV(!(expires >= sinfo->expires_min &&
+			    expires <= sinfo->expires_max),
+			  XT_CONNTRACK_EXPIRES))
 			return 0;
 	}
-
-	if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
-			return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
-			return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
-			return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
-			return 0;
-	}
-
-	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires;
-
-		if(!ct)
-			return 0;
-
-		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
-			return 0;
-	}
-
 	return 1;
 }
 
@@ -141,63 +150,72 @@
  	else
  		statebit = XT_CONNTRACK_STATE_INVALID;
  
-	if(sinfo->flags & XT_CONNTRACK_STATE) {
+	if (sinfo->flags & XT_CONNTRACK_STATE) {
 		if (ct) {
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
+			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_SNAT;
-
-			if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
-			    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
+			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
 				statebit |= XT_CONNTRACK_STATE_DNAT;
 		}
-
-		if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE))
+		if (FWINV((statebit & sinfo->statemask) == 0,
+			  XT_CONNTRACK_STATE))
 			return 0;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_PROTO) {
-		if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO))
-                	return 0;
+	if (ct == NULL) {
+		if (sinfo->flags & ~XT_CONNTRACK_STATE)
+			return 0;
+		return 1;
 	}
 
-	if(sinfo->flags & XT_CONNTRACK_ORIGSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_PROTO &&
+	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
+	    	  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
+		  XT_CONNTRACK_PROTO))
+                return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_ORIGDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
+	    	   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
+		  XT_CONNTRACK_ORIGSRC))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_REPLSRC) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
+	    	   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
+		  XT_CONNTRACK_ORIGDST))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_REPLDST) {
-		if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
+	    	   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
+		  XT_CONNTRACK_REPLSRC))
+		return 0;
 
-	if(sinfo->flags & XT_CONNTRACK_STATUS) {
-		if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS))
-			return 0;
-	}
+	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
+	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
+	    	   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
+		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
+		  XT_CONNTRACK_REPLDST))
+		return 0;
+
+	if (sinfo->flags & XT_CONNTRACK_STATUS &&
+	    FWINV((ct->status & sinfo->statusmask) == 0,
+	    	  XT_CONNTRACK_STATUS))
+		return 0;
 
 	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires;
+		unsigned long expires = timer_pending(&ct->timeout) ?
+					(ct->timeout.expires - jiffies)/HZ : 0;
 
-		if(!ct)
-			return 0;
-
-		expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES))
+		if (FWINV(!(expires >= sinfo->expires_min &&
+			    expires <= sinfo->expires_max),
+			  XT_CONNTRACK_EXPIRES))
 			return 0;
 	}
-
 	return 1;
 }
 
@@ -208,7 +226,6 @@
 	   const void *ip,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -221,8 +238,7 @@
 	return 1;
 }
 
-static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+static void destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
@@ -241,11 +257,8 @@
 
 static int __init xt_conntrack_init(void)
 {
-	int ret;
 	need_conntrack();
-	ret = xt_register_match(&conntrack_match);
-
-	return ret;
+	return xt_register_match(&conntrack_match);
 }
 
 static void __exit xt_conntrack_fini(void)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 2e2f825..3e6cf43 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -131,7 +131,6 @@
 	   const void *inf,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct xt_dccp_info *info = matchinfo;
@@ -141,27 +140,26 @@
 		&& !(info->invflags & ~info->flags);
 }
 
-static struct xt_match dccp_match = 
-{ 
-	.name 		= "dccp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_dccp_info),
-	.proto		= IPPROTO_DCCP,
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me 		= THIS_MODULE,
+static struct xt_match xt_dccp_match[] = {
+	{
+		.name 		= "dccp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_dccp_info),
+		.proto		= IPPROTO_DCCP,
+		.me 		= THIS_MODULE,
+	},
+	{
+		.name 		= "dccp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_dccp_info),
+		.proto		= IPPROTO_DCCP,
+		.me 		= THIS_MODULE,
+	},
 };
-static struct xt_match dccp6_match = 
-{ 
-	.name 		= "dccp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_dccp_info),
-	.proto		= IPPROTO_DCCP,
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me 		= THIS_MODULE,
-};
-
 
 static int __init xt_dccp_init(void)
 {
@@ -173,27 +171,19 @@
 	dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL);
 	if (!dccp_optbuf)
 		return -ENOMEM;
-	ret = xt_register_match(&dccp_match);
+	ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
 	if (ret)
 		goto out_kfree;
-	ret = xt_register_match(&dccp6_match);
-	if (ret)
-		goto out_unreg;
-
 	return ret;
 
-out_unreg:
-	xt_unregister_match(&dccp_match);
 out_kfree:
 	kfree(dccp_optbuf);
-
 	return ret;
 }
 
 static void __exit xt_dccp_fini(void)
 {
-	xt_unregister_match(&dccp6_match);
-	xt_unregister_match(&dccp_match);
+	xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match));
 	kfree(dccp_optbuf);
 }
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
new file mode 100644
index 0000000..26c7f4a
--- /dev/null
+++ b/net/netfilter/xt_dscp.c
@@ -0,0 +1,103 @@
+/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
+ *
+ * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+
+#include <linux/netfilter/xt_dscp.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("x_tables DSCP matching module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_dscp");
+MODULE_ALIAS("ip6t_dscp");
+
+static int match(const struct sk_buff *skb,
+		 const struct net_device *in,
+		 const struct net_device *out,
+		 const struct xt_match *match,
+		 const void *matchinfo,
+		 int offset,
+		 unsigned int protoff,
+		 int *hotdrop)
+{
+	const struct xt_dscp_info *info = matchinfo;
+	u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int match6(const struct sk_buff *skb,
+		  const struct net_device *in,
+		  const struct net_device *out,
+		  const struct xt_match *match,
+		  const void *matchinfo,
+		  int offset,
+		  unsigned int protoff,
+		  int *hotdrop)
+{
+	const struct xt_dscp_info *info = matchinfo;
+	u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+
+	return (dscp == info->dscp) ^ !!info->invert;
+}
+
+static int checkentry(const char *tablename,
+		      const void *info,
+		      const struct xt_match *match,
+		      void *matchinfo,
+		      unsigned int hook_mask)
+{
+	const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp;
+
+	if (dscp > XT_DSCP_MAX) {
+		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp);
+		return 0;
+	}
+
+	return 1;
+}
+
+static struct xt_match xt_dscp_match[] = {
+	{
+		.name		= "dscp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_dscp_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "dscp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match6,
+		.matchsize	= sizeof(struct xt_dscp_info),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_dscp_match_init(void)
+{
+	return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
+}
+
+static void __exit xt_dscp_match_fini(void)
+{
+	xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match));
+}
+
+module_init(xt_dscp_match_init);
+module_exit(xt_dscp_match_fini);
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 9dad628..7c95f14 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -79,7 +79,6 @@
 	   const void *ip_void,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchinfosize,
 	   unsigned int hook_mask)
 {
 	const struct xt_esp *espinfo = matchinfo;
@@ -92,44 +91,35 @@
 	return 1;
 }
 
-static struct xt_match esp_match = {
-	.name		= "esp",
-	.family		= AF_INET,
-	.proto		= IPPROTO_ESP,
-	.match		= &match,
-	.matchsize	= sizeof(struct xt_esp),
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match esp6_match = {
-	.name		= "esp",
-	.family		= AF_INET6,
-	.proto		= IPPROTO_ESP,
-	.match		= &match,
-	.matchsize	= sizeof(struct xt_esp),
-	.checkentry	= &checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match xt_esp_match[] = {
+	{
+		.name		= "esp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_esp),
+		.proto		= IPPROTO_ESP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "esp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_esp),
+		.proto		= IPPROTO_ESP,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_esp_init(void)
 {
-	int ret;
-	ret = xt_register_match(&esp_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&esp6_match);
-	if (ret)
-		xt_unregister_match(&esp_match);
-
-	return ret;
+	return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
 }
 
 static void __exit xt_esp_cleanup(void)
 {
-	xt_unregister_match(&esp_match);
-	xt_unregister_match(&esp6_match);
+	xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match));
 }
 
 module_init(xt_esp_init);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 799c2a4..5d7818b 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -139,7 +139,6 @@
 		 const void *inf,
 		 const struct xt_match *match,
 		 void *matchinfo,
-		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
 	struct xt_helper_info *info = matchinfo;
@@ -156,52 +155,44 @@
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
 #endif
 }
 
-static struct xt_match helper_match = {
-	.name		= "helper",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_helper_info),
-	.checkentry	= check,
-	.destroy	= destroy,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-static struct xt_match helper6_match = {
-	.name		= "helper",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_helper_info),
-	.checkentry	= check,
-	.destroy	= destroy,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_helper_match[] = {
+	{
+		.name		= "helper",
+		.family		= AF_INET,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_helper_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "helper",
+		.family		= AF_INET6,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_helper_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_helper_init(void)
 {
-	int ret;
 	need_conntrack();
-
-	ret = xt_register_match(&helper_match);
-	if (ret < 0)
-		return ret;
-
-	ret = xt_register_match(&helper6_match);
-	if (ret < 0)
-		xt_unregister_match(&helper_match);
-
-	return ret;
+	return xt_register_matches(xt_helper_match,
+				   ARRAY_SIZE(xt_helper_match));
 }
 
 static void __exit xt_helper_fini(void)
 {
-	xt_unregister_match(&helper_match);
-	xt_unregister_match(&helper6_match);
+	xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match));
 }
 
 module_init(xt_helper_init);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 109132c..67fd30d 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -52,39 +52,32 @@
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
 
-static struct xt_match length_match = {
-	.name		= "length",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_length_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match length6_match = {
-	.name		= "length",
-	.match		= match6,
-	.matchsize	= sizeof(struct xt_length_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_length_match[] = {
+	{
+		.name		= "length",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_length_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "length",
+		.family		= AF_INET6,
+		.match		= match6,
+		.matchsize	= sizeof(struct xt_length_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_length_init(void)
 {
-	int ret;
-	ret = xt_register_match(&length_match);
-	if (ret)
-		return ret;
-	ret = xt_register_match(&length6_match);
-	if (ret)
-		xt_unregister_match(&length_match);
-
-	return ret;
+	return xt_register_matches(xt_length_match,
+				   ARRAY_SIZE(xt_length_match));
 }
 
 static void __exit xt_length_fini(void)
 {
-	xt_unregister_match(&length_match);
-	xt_unregister_match(&length6_match);
+	xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match));
 }
 
 module_init(xt_length_init);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index ce7fdb7..fda7b7d 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -110,7 +110,6 @@
 		     const void *inf,
 		     const struct xt_match *match,
 		     void *matchinfo,
-		     unsigned int matchsize,
 		     unsigned int hook_mask)
 {
 	struct xt_rateinfo *r = matchinfo;
@@ -123,55 +122,95 @@
 		return 0;
 	}
 
-	/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
-	   128. */
-	r->prev = jiffies;
-	r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
-	r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
-	r->cost = user2credits(r->avg);
-
 	/* For SMP, we only want to use one set of counters. */
 	r->master = r;
-
+	if (r->cost == 0) {
+		/* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+		   128. */
+		r->prev = jiffies;
+		r->credit = user2credits(r->avg * r->burst);	 /* Credits full. */
+		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
+		r->cost = user2credits(r->avg);
+	}
 	return 1;
 }
 
-static struct xt_match ipt_limit_reg = {
-	.name		= "limit",
-	.match		= ipt_limit_match,
-	.matchsize	= sizeof(struct xt_rateinfo),
-	.checkentry	= ipt_limit_checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
+#ifdef CONFIG_COMPAT
+struct compat_xt_rateinfo {
+	u_int32_t avg;
+	u_int32_t burst;
+
+	compat_ulong_t prev;
+	u_int32_t credit;
+	u_int32_t credit_cap, cost;
+
+	u_int32_t master;
 };
-static struct xt_match limit6_reg = {
-	.name		= "limit",
-	.match		= ipt_limit_match,
-	.matchsize	= sizeof(struct xt_rateinfo),
-	.checkentry	= ipt_limit_checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+
+/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
+ * master pointer, which does not need to be preserved. */
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_rateinfo *cm = src;
+	struct xt_rateinfo m = {
+		.avg		= cm->avg,
+		.burst		= cm->burst,
+		.prev		= cm->prev | (unsigned long)cm->master << 32,
+		.credit		= cm->credit,
+		.credit_cap	= cm->credit_cap,
+		.cost		= cm->cost,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_rateinfo *m = src;
+	struct compat_xt_rateinfo cm = {
+		.avg		= m->avg,
+		.burst		= m->burst,
+		.prev		= m->prev,
+		.credit		= m->credit,
+		.credit_cap	= m->credit_cap,
+		.cost		= m->cost,
+		.master		= m->prev >> 32,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match xt_limit_match[] = {
+	{
+		.name		= "limit",
+		.family		= AF_INET,
+		.checkentry	= ipt_limit_checkentry,
+		.match		= ipt_limit_match,
+		.matchsize	= sizeof(struct xt_rateinfo),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_rateinfo),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "limit",
+		.family		= AF_INET6,
+		.checkentry	= ipt_limit_checkentry,
+		.match		= ipt_limit_match,
+		.matchsize	= sizeof(struct xt_rateinfo),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_limit_init(void)
 {
-	int ret;
-	
-	ret = xt_register_match(&ipt_limit_reg);
-	if (ret)
-		return ret;
-	
-	ret = xt_register_match(&limit6_reg);
-	if (ret)
-		xt_unregister_match(&ipt_limit_reg);
-
-	return ret;
+	return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
 }
 
 static void __exit xt_limit_fini(void)
 {
-	xt_unregister_match(&ipt_limit_reg);
-	xt_unregister_match(&limit6_reg);
+	xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match));
 }
 
 module_init(xt_limit_init);
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 356290f..425fc21 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -43,43 +43,37 @@
 		^ info->invert));
 }
 
-static struct xt_match mac_match = {
-	.name		= "mac",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mac_info),
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) |
-			  (1 << NF_IP_FORWARD),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-static struct xt_match mac6_match = {
-	.name		= "mac",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mac_info),
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) |
-			  (1 << NF_IP_FORWARD),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_mac_match[] = {
+	{
+		.name		= "mac",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mac_info),
+		.hooks		= (1 << NF_IP_PRE_ROUTING) |
+				  (1 << NF_IP_LOCAL_IN) |
+				  (1 << NF_IP_FORWARD),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "mac",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mac_info),
+		.hooks		= (1 << NF_IP_PRE_ROUTING) |
+				  (1 << NF_IP_LOCAL_IN) |
+				  (1 << NF_IP_FORWARD),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_mac_init(void)
 {
-	int ret;
-	ret = xt_register_match(&mac_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&mac6_match);
-	if (ret)
-		xt_unregister_match(&mac_match);
-
-	return ret;
+	return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
 }
 
 static void __exit xt_mac_fini(void)
 {
-	xt_unregister_match(&mac_match);
-	xt_unregister_match(&mac6_match);
+	xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match));
 }
 
 module_init(xt_mac_init);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 876bc57..934dddf 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -39,7 +39,6 @@
            const void *entry,
 	   const struct xt_match *match,
            void *matchinfo,
-           unsigned int matchsize,
            unsigned int hook_mask)
 {
 	const struct xt_mark_info *minfo = matchinfo;
@@ -51,42 +50,69 @@
 	return 1;
 }
 
-static struct xt_match mark_match = {
-	.name		= "mark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mark_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
+#ifdef CONFIG_COMPAT
+struct compat_xt_mark_info {
+	compat_ulong_t	mark, mask;
+	u_int8_t	invert;
+	u_int8_t	__pad1;
+	u_int16_t	__pad2;
 };
 
-static struct xt_match mark6_match = {
-	.name		= "mark",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_mark_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_xt_mark_info *cm = src;
+	struct xt_mark_info m = {
+		.mark	= cm->mark,
+		.mask	= cm->mask,
+		.invert	= cm->invert,
+	};
+	memcpy(dst, &m, sizeof(m));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct xt_mark_info *m = src;
+	struct compat_xt_mark_info cm = {
+		.mark	= m->mark,
+		.mask	= m->mask,
+		.invert	= m->invert,
+	};
+	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct xt_match xt_mark_match[] = {
+	{
+		.name		= "mark",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mark_info),
+#ifdef CONFIG_COMPAT
+		.compatsize	= sizeof(struct compat_xt_mark_info),
+		.compat_from_user = compat_from_user,
+		.compat_to_user	= compat_to_user,
+#endif
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "mark",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_mark_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_mark_init(void)
 {
-	int ret;
-	ret = xt_register_match(&mark_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&mark6_match);
-	if (ret)
-		xt_unregister_match(&mark_match);
-
-	return ret;
+	return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
 }
 
 static void __exit xt_mark_fini(void)
 {
-	xt_unregister_match(&mark_match);
-	xt_unregister_match(&mark6_match);
+	xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match));
 }
 
 module_init(xt_mark_init);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 1ff0a25..d3aefd3 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -176,7 +176,6 @@
 	   const void *info,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct ipt_ip *ip = info;
@@ -191,7 +190,6 @@
 	      const void *info,
 	      const struct xt_match *match,
 	      void *matchinfo,
-	      unsigned int matchsize,
 	      unsigned int hook_mask)
 {
 	const struct ipt_ip *ip = info;
@@ -206,7 +204,6 @@
 	    const void *info,
 	    const struct xt_match *match,
 	    void *matchinfo,
-	    unsigned int matchsize,
 	    unsigned int hook_mask)
 {
 	const struct ip6t_ip6 *ip = info;
@@ -221,7 +218,6 @@
 	       const void *info,
 	       const struct xt_match *match,
 	       void *matchinfo,
-	       unsigned int matchsize,
 	       unsigned int hook_mask)
 {
 	const struct ip6t_ip6 *ip = info;
@@ -231,84 +227,55 @@
 		     multiinfo->count);
 }
 
-static struct xt_match multiport_match = {
-	.name		= "multiport",
-	.revision	= 0,
-	.matchsize	= sizeof(struct xt_multiport),
-	.match		= &match,
-	.checkentry	= &checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match multiport_match_v1 = {
-	.name		= "multiport",
-	.revision	= 1,
-	.matchsize	= sizeof(struct xt_multiport_v1),
-	.match		= &match_v1,
-	.checkentry	= &checkentry_v1,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match multiport6_match = {
-	.name		= "multiport",
-	.revision	= 0,
-	.matchsize	= sizeof(struct xt_multiport),
-	.match		= &match,
-	.checkentry	= &checkentry6,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match multiport6_match_v1 = {
-	.name		= "multiport",
-	.revision	= 1,
-	.matchsize	= sizeof(struct xt_multiport_v1),
-	.match		= &match_v1,
-	.checkentry	= &checkentry6_v1,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_multiport_match[] = {
+	{
+		.name		= "multiport",
+		.family		= AF_INET,
+		.revision	= 0,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_multiport),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= AF_INET,
+		.revision	= 1,
+		.checkentry	= checkentry_v1,
+		.match		= match_v1,
+		.matchsize	= sizeof(struct xt_multiport_v1),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= AF_INET6,
+		.revision	= 0,
+		.checkentry	= checkentry6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_multiport),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "multiport",
+		.family		= AF_INET6,
+		.revision	= 1,
+		.checkentry	= checkentry6_v1,
+		.match		= match_v1,
+		.matchsize	= sizeof(struct xt_multiport_v1),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_multiport_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&multiport_match);
-	if (ret)
-		goto out;
-
-	ret = xt_register_match(&multiport_match_v1);
-	if (ret)
-		goto out_unreg_multi_v0;
-
-	ret = xt_register_match(&multiport6_match);
-	if (ret)
-		goto out_unreg_multi_v1;
-
-	ret = xt_register_match(&multiport6_match_v1);
-	if (ret)
-		goto out_unreg_multi6_v0;
-
-	return ret;
-
-out_unreg_multi6_v0:
-	xt_unregister_match(&multiport6_match);
-out_unreg_multi_v1:
-	xt_unregister_match(&multiport_match_v1);
-out_unreg_multi_v0:
-	xt_unregister_match(&multiport_match);
-out:
-	return ret;
+	return xt_register_matches(xt_multiport_match,
+				   ARRAY_SIZE(xt_multiport_match));
 }
 
 static void __exit xt_multiport_fini(void)
 {
-	xt_unregister_match(&multiport_match);
-	xt_unregister_match(&multiport_match_v1);
-	xt_unregister_match(&multiport6_match);
-	xt_unregister_match(&multiport6_match_v1);
+	xt_unregister_matches(xt_multiport_match,
+			      ARRAY_SIZE(xt_multiport_match));
 }
 
 module_init(xt_multiport_init);
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 63a9654..fd8f954 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -106,7 +106,6 @@
 		       const void *ip,
 		       const struct xt_match *match,
 		       void *matchinfo,
-		       unsigned int matchsize,
 		       unsigned int hook_mask)
 {
 	const struct xt_physdev_info *info = matchinfo;
@@ -132,43 +131,34 @@
 	return 1;
 }
 
-static struct xt_match physdev_match = {
-	.name		= "physdev",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_physdev_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match physdev6_match = {
-	.name		= "physdev",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_physdev_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_physdev_match[] = {
+	{
+		.name		= "physdev",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_physdev_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "physdev",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_physdev_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_physdev_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&physdev_match);
-	if (ret < 0)
-		return ret;
-
-	ret = xt_register_match(&physdev6_match);
-	if (ret < 0)
-		xt_unregister_match(&physdev_match);
-
-	return ret;
+	return xt_register_matches(xt_physdev_match,
+				   ARRAY_SIZE(xt_physdev_match));
 }
 
 static void __exit xt_physdev_fini(void)
 {
-	xt_unregister_match(&physdev_match);
-	xt_unregister_match(&physdev6_match);
+	xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match));
 }
 
 module_init(xt_physdev_init);
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index d2f5320..16e7b08 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -43,40 +43,32 @@
 	return (type == info->pkttype) ^ info->invert;
 }
 
-static struct xt_match pkttype_match = {
-	.name		= "pkttype",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_pkttype_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match pkttype6_match = {
-	.name		= "pkttype",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_pkttype_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_pkttype_match[] = {
+	{
+		.name		= "pkttype",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_pkttype_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "pkttype",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_pkttype_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_pkttype_init(void)
 {
-	int ret;
-	ret = xt_register_match(&pkttype_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&pkttype6_match);
-	if (ret)
-		xt_unregister_match(&pkttype_match);
-
-	return ret;
+	return xt_register_matches(xt_pkttype_match,
+				   ARRAY_SIZE(xt_pkttype_match));
 }
 
 static void __exit xt_pkttype_fini(void)
 {
-	xt_unregister_match(&pkttype_match);
-	xt_unregister_match(&pkttype6_match);
+	xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match));
 }
 
 module_init(xt_pkttype_init);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index ba1ca03..46bde2b 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -135,8 +135,7 @@
 
 static int checkentry(const char *tablename, const void *ip_void,
                       const struct xt_match *match,
-                      void *matchinfo, unsigned int matchsize,
-                      unsigned int hook_mask)
+                      void *matchinfo, unsigned int hook_mask)
 {
 	struct xt_policy_info *info = matchinfo;
 
@@ -165,43 +164,34 @@
 	return 1;
 }
 
-static struct xt_match policy_match = {
-	.name		= "policy",
-	.family		= AF_INET,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_policy_info),
-	.checkentry 	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match policy6_match = {
-	.name		= "policy",
-	.family		= AF_INET6,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_policy_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_policy_match[] = {
+	{
+		.name		= "policy",
+		.family		= AF_INET,
+		.checkentry 	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_policy_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "policy",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_policy_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&policy_match);
-	if (ret)
-		return ret;
-	ret = xt_register_match(&policy6_match);
-	if (ret)
-		xt_unregister_match(&policy_match);
-	return ret;
+	return xt_register_matches(xt_policy_match,
+				   ARRAY_SIZE(xt_policy_match));
 }
 
 static void __exit fini(void)
 {
-	xt_unregister_match(&policy6_match);
-	xt_unregister_match(&policy_match);
+	xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match));
 }
 
 module_init(init);
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 4cdba74..b75fa2c 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,6 +11,8 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_ALIAS("ipt_quota");
+MODULE_ALIAS("ip6t_quota");
 
 static DEFINE_SPINLOCK(quota_lock);
 
@@ -39,7 +41,7 @@
 static int
 checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
-	   unsigned int matchsize, unsigned int hook_mask)
+	   unsigned int hook_mask)
 {
 	struct xt_quota_info *q = (struct xt_quota_info *)matchinfo;
 
@@ -50,46 +52,33 @@
 	return 1;
 }
 
-static struct xt_match quota_match = {
-	.name		= "quota",
-	.family		= AF_INET,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_quota_info),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match quota_match6 = {
-	.name		= "quota",
-	.family		= AF_INET6,
-	.match		= match,
-	.matchsize	= sizeof(struct xt_quota_info),
-	.checkentry	= checkentry,
-	.me		= THIS_MODULE
+static struct xt_match xt_quota_match[] = {
+	{
+		.name		= "quota",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_quota_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "quota",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_quota_info),
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_quota_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&quota_match);
-	if (ret)
-		goto err1;
-	ret = xt_register_match(&quota_match6);
-	if (ret)
-		goto err2;
-	return ret;
-
-err2:
-	xt_unregister_match(&quota_match);
-err1:
-	return ret;
+	return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
 }
 
 static void __exit xt_quota_fini(void)
 {
-	xt_unregister_match(&quota_match6);
-	xt_unregister_match(&quota_match);
+	xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match));
 }
 
 module_init(xt_quota_init);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 843383e..7956aca 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -163,7 +163,6 @@
 	   const void *inf,
 	   const struct xt_match *match,
 	   void *matchinfo,
-	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
 	const struct xt_sctp_info *info = matchinfo;
@@ -178,44 +177,35 @@
 				| SCTP_CHUNK_MATCH_ONLY)));
 }
 
-static struct xt_match sctp_match = {
-	.name		= "sctp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_sctp_info),
-	.proto		= IPPROTO_SCTP,
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE
-};
-
-static struct xt_match sctp6_match = {
-	.name		= "sctp",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_sctp_info),
-	.proto		= IPPROTO_SCTP,
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE
+static struct xt_match xt_sctp_match[] = {
+	{
+		.name		= "sctp",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_sctp_info),
+		.proto		= IPPROTO_SCTP,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "sctp",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_sctp_info),
+		.proto		= IPPROTO_SCTP,
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_sctp_init(void)
 {
-	int ret;
-	ret = xt_register_match(&sctp_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&sctp6_match);
-	if (ret)
-		xt_unregister_match(&sctp_match);
-
-	return ret;
+	return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
 }
 
 static void __exit xt_sctp_fini(void)
 {
-	xt_unregister_match(&sctp6_match);
-	xt_unregister_match(&sctp_match);
+	xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match));
 }
 
 module_init(xt_sctp_init);
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index f9e304d..d9010b1 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -48,7 +48,6 @@
 		 const void *inf,
 		 const struct xt_match *match,
 		 void *matchinfo,
-		 unsigned int matchsize,
 		 unsigned int hook_mask)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -62,54 +61,43 @@
 }
 
 static void
-destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize)
+destroy(const struct xt_match *match, void *matchinfo)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_ct_l3proto_module_put(match->family);
 #endif
 }
 
-static struct xt_match state_match = {
-	.name		= "state",
-	.match		= match,
-	.checkentry	= check,
-	.destroy	= destroy,
-	.matchsize	= sizeof(struct xt_state_info),
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match state6_match = {
-	.name		= "state",
-	.match		= match,
-	.checkentry	= check,
-	.destroy	= destroy,
-	.matchsize	= sizeof(struct xt_state_info),
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_state_match[] = {
+	{
+		.name		= "state",
+		.family		= AF_INET,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_state_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "state",
+		.family		= AF_INET6,
+		.checkentry	= check,
+		.match		= match,
+		.destroy	= destroy,
+		.matchsize	= sizeof(struct xt_state_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_state_init(void)
 {
-	int ret;
-
 	need_conntrack();
-
-	ret = xt_register_match(&state_match);
-	if (ret < 0)
-		return ret;
-
-	ret = xt_register_match(&state6_match);
-	if (ret < 0)
-		xt_unregister_match(&state_match);
-
-	return ret;
+	return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
 }
 
 static void __exit xt_state_fini(void)
 {
-	xt_unregister_match(&state_match);
-	xt_unregister_match(&state6_match);
+	xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match));
 }
 
 module_init(xt_state_init);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index de1037f..091a9f8 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -55,7 +55,7 @@
 static int
 checkentry(const char *tablename, const void *entry,
 	   const struct xt_match *match, void *matchinfo,
-	   unsigned int matchsize, unsigned int hook_mask)
+	   unsigned int hook_mask)
 {
 	struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo;
 
@@ -66,46 +66,35 @@
 	return 1;
 }
 
-static struct xt_match statistic_match = {
-	.name		= "statistic",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_statistic_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match statistic_match6 = {
-	.name		= "statistic",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_statistic_info),
-	.checkentry	= checkentry,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
+static struct xt_match xt_statistic_match[] = {
+	{
+		.name		= "statistic",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_statistic_info),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "statistic",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_statistic_info),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_statistic_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&statistic_match);
-	if (ret)
-		goto err1;
-
-	ret = xt_register_match(&statistic_match6);
-	if (ret)
-		goto err2;
-	return ret;
-err2:
-	xt_unregister_match(&statistic_match);
-err1:
-	return ret;
+	return xt_register_matches(xt_statistic_match,
+				   ARRAY_SIZE(xt_statistic_match));
 }
 
 static void __exit xt_statistic_fini(void)
 {
-	xt_unregister_match(&statistic_match6);
-	xt_unregister_match(&statistic_match);
+	xt_unregister_matches(xt_statistic_match,
+			      ARRAY_SIZE(xt_statistic_match));
 }
 
 module_init(xt_statistic_init);
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 275330f..4453252 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -46,7 +46,6 @@
 		      const void *ip,
 		      const struct xt_match *match,
 		      void *matchinfo,
-		      unsigned int matchsize,
 		      unsigned int hook_mask)
 {
 	struct xt_string_info *conf = matchinfo;
@@ -69,49 +68,40 @@
 	return 1;
 }
 
-static void destroy(const struct xt_match *match, void *matchinfo,
-		    unsigned int matchsize)
+static void destroy(const struct xt_match *match, void *matchinfo)
 {
 	textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
 }
 
-static struct xt_match string_match = {
-	.name 		= "string",
-	.match 		= match,
-	.matchsize	= sizeof(struct xt_string_info),
-	.checkentry	= checkentry,
-	.destroy 	= destroy,
-	.family		= AF_INET,
-	.me 		= THIS_MODULE
-};
-static struct xt_match string6_match = {
-	.name 		= "string",
-	.match 		= match,
-	.matchsize	= sizeof(struct xt_string_info),
-	.checkentry	= checkentry,
-	.destroy 	= destroy,
-	.family		= AF_INET6,
-	.me 		= THIS_MODULE
+static struct xt_match xt_string_match[] = {
+	{
+		.name 		= "string",
+		.family		= AF_INET,
+		.checkentry	= checkentry,
+		.match 		= match,
+		.destroy 	= destroy,
+		.matchsize	= sizeof(struct xt_string_info),
+		.me 		= THIS_MODULE
+	},
+	{
+		.name 		= "string",
+		.family		= AF_INET6,
+		.checkentry	= checkentry,
+		.match 		= match,
+		.destroy 	= destroy,
+		.matchsize	= sizeof(struct xt_string_info),
+		.me 		= THIS_MODULE
+	},
 };
 
 static int __init xt_string_init(void)
 {
-	int ret;
-
-	ret = xt_register_match(&string_match);
-	if (ret)
-		return ret;
-	ret = xt_register_match(&string6_match);
-	if (ret)
-		xt_unregister_match(&string_match);
-
-	return ret;
+	return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
 }
 
 static void __exit xt_string_fini(void)
 {
-	xt_unregister_match(&string_match);
-	xt_unregister_match(&string6_match);
+	xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match));
 }
 
 module_init(xt_string_init);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index cf7d335..a3682fe 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -18,21 +18,22 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 
-#define TH_SYN 0x02
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables TCP MSS match module");
 MODULE_ALIAS("ipt_tcpmss");
 
-/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */
-static inline int
-mssoption_match(u_int16_t min, u_int16_t max,
-		const struct sk_buff *skb,
-		unsigned int protoff,
-		int invert,
-		int *hotdrop)
+static int
+match(const struct sk_buff *skb,
+      const struct net_device *in,
+      const struct net_device *out,
+      const struct xt_match *match,
+      const void *matchinfo,
+      int offset,
+      unsigned int protoff,
+      int *hotdrop)
 {
+	const struct xt_tcpmss_match_info *info = matchinfo;
 	struct tcphdr _tcph, *th;
 	/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
 	u8 _opt[15 * 4 - sizeof(_tcph)], *op;
@@ -64,72 +65,50 @@
 
 			mssval = (op[i+2] << 8) | op[i+3];
 			
-			return (mssval >= min && mssval <= max) ^ invert;
+			return (mssval >= info->mss_min &&
+			        mssval <= info->mss_max) ^ info->invert;
 		}
-		if (op[i] < 2) i++;
-		else i += op[i+1]?:1;
+		if (op[i] < 2)
+			i++;
+		else
+			i += op[i+1] ? : 1;
 	}
 out:
-	return invert;
+	return info->invert;
 
- dropit:
+dropit:
 	*hotdrop = 1;
 	return 0;
 }
 
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct xt_tcpmss_match_info *info = matchinfo;
-
-	return mssoption_match(info->mss_min, info->mss_max, skb, protoff,
-			       info->invert, hotdrop);
-}
-
-static struct xt_match tcpmss_match = {
-	.name		= "tcpmss",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_tcpmss_match_info),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET,
-	.me		= THIS_MODULE,
+static struct xt_match xt_tcpmss_match[] = {
+	{
+		.name		= "tcpmss",
+		.family		= AF_INET,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_tcpmss_match_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tcpmss",
+		.family		= AF_INET6,
+		.match		= match,
+		.matchsize	= sizeof(struct xt_tcpmss_match_info),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
 };
 
-static struct xt_match tcpmss6_match = {
-	.name		= "tcpmss",
-	.match		= match,
-	.matchsize	= sizeof(struct xt_tcpmss_match_info),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET6,
-	.me		= THIS_MODULE,
-};
-
-
 static int __init xt_tcpmss_init(void)
 {
-	int ret;
-	ret = xt_register_match(&tcpmss_match);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&tcpmss6_match);
-	if (ret)
-		xt_unregister_match(&tcpmss_match);
-
-	return ret;
+	return xt_register_matches(xt_tcpmss_match,
+				   ARRAY_SIZE(xt_tcpmss_match));
 }
 
 static void __exit xt_tcpmss_fini(void)
 {
-	xt_unregister_match(&tcpmss6_match);
-	xt_unregister_match(&tcpmss_match);
+	xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match));
 }
 
 module_init(xt_tcpmss_init);
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index a9a63aa..e76a68e 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -141,7 +141,6 @@
 	       const void *info,
 	       const struct xt_match *match,
 	       void *matchinfo,
-	       unsigned int matchsize,
 	       unsigned int hook_mask)
 {
 	const struct xt_tcp *tcpinfo = matchinfo;
@@ -190,7 +189,6 @@
 	       const void *info,
 	       const struct xt_match *match,
 	       void *matchinfo,
-	       unsigned int matchsize,
 	       unsigned int hook_mask)
 {
 	const struct xt_tcp *udpinfo = matchinfo;
@@ -199,81 +197,54 @@
 	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
 }
 
-static struct xt_match tcp_matchstruct = {
-	.name		= "tcp",
-	.match		= tcp_match,
-	.matchsize	= sizeof(struct xt_tcp),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET,
-	.checkentry	= tcp_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match tcp6_matchstruct = {
-	.name		= "tcp",
-	.match		= tcp_match,
-	.matchsize	= sizeof(struct xt_tcp),
-	.proto		= IPPROTO_TCP,
-	.family		= AF_INET6,
-	.checkentry	= tcp_checkentry,
-	.me		= THIS_MODULE,
-};
-
-static struct xt_match udp_matchstruct = {
-	.name		= "udp",
-	.match		= udp_match,
-	.matchsize	= sizeof(struct xt_udp),
-	.proto		= IPPROTO_UDP,
-	.family		= AF_INET,
-	.checkentry	= udp_checkentry,
-	.me		= THIS_MODULE,
-};
-static struct xt_match udp6_matchstruct = {
-	.name		= "udp",
-	.match		= udp_match,
-	.matchsize	= sizeof(struct xt_udp),
-	.proto		= IPPROTO_UDP,
-	.family		= AF_INET6,
-	.checkentry	= udp_checkentry,
-	.me		= THIS_MODULE,
+static struct xt_match xt_tcpudp_match[] = {
+	{
+		.name		= "tcp",
+		.family		= AF_INET,
+		.checkentry	= tcp_checkentry,
+		.match		= tcp_match,
+		.matchsize	= sizeof(struct xt_tcp),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "tcp",
+		.family		= AF_INET6,
+		.checkentry	= tcp_checkentry,
+		.match		= tcp_match,
+		.matchsize	= sizeof(struct xt_tcp),
+		.proto		= IPPROTO_TCP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udp",
+		.family		= AF_INET,
+		.checkentry	= udp_checkentry,
+		.match		= udp_match,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDP,
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "udp",
+		.family		= AF_INET6,
+		.checkentry	= udp_checkentry,
+		.match		= udp_match,
+		.matchsize	= sizeof(struct xt_udp),
+		.proto		= IPPROTO_UDP,
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init xt_tcpudp_init(void)
 {
-	int ret;
-	ret = xt_register_match(&tcp_matchstruct);
-	if (ret)
-		return ret;
-
-	ret = xt_register_match(&tcp6_matchstruct);
-	if (ret)
-		goto out_unreg_tcp;
-
-	ret = xt_register_match(&udp_matchstruct);
-	if (ret)
-		goto out_unreg_tcp6;
-	
-	ret = xt_register_match(&udp6_matchstruct);
-	if (ret)
-		goto out_unreg_udp;
-
-	return ret;
-
-out_unreg_udp:
-	xt_unregister_match(&udp_matchstruct);
-out_unreg_tcp6:
-	xt_unregister_match(&tcp6_matchstruct);
-out_unreg_tcp:
-	xt_unregister_match(&tcp_matchstruct);
-	return ret;
+	return xt_register_matches(xt_tcpudp_match,
+				   ARRAY_SIZE(xt_tcpudp_match));
 }
 
 static void __exit xt_tcpudp_fini(void)
 {
-	xt_unregister_match(&udp6_matchstruct);
-	xt_unregister_match(&udp_matchstruct);
-	xt_unregister_match(&tcp6_matchstruct);
-	xt_unregister_match(&tcp_matchstruct);
+	xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match));
 }
 
 module_init(xt_tcpudp_init);
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
new file mode 100644
index 0000000..fe23cb7
--- /dev/null
+++ b/net/netlabel/Kconfig
@@ -0,0 +1,14 @@
+#
+# NetLabel configuration
+#
+
+config NETLABEL
+	bool "NetLabel subsystem support"
+	depends on NET && SECURITY
+	default n
+	---help---
+	  NetLabel provides support for explicit network packet labeling
+	  protocols such as CIPSO and RIPSO.  For more information see
+	  Documentation/netlabel.
+
+	  If you are unsure, say N.
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
new file mode 100644
index 0000000..8af18c0
--- /dev/null
+++ b/net/netlabel/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for the NetLabel subsystem.
+#
+# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
+#
+
+# base objects
+obj-y	:= netlabel_user.o netlabel_kapi.o netlabel_domainhash.o
+
+# management objects
+obj-y	+= netlabel_mgmt.o
+
+# protocol modules
+obj-y	+= netlabel_unlabeled.o
+obj-y	+= netlabel_cipso_v4.o
+
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
new file mode 100644
index 0000000..a4f40ad
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -0,0 +1,542 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_user.h"
+#include "netlabel_cipso_v4.h"
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_cipsov4_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_CIPSOV4_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * Helper Functions
+ */
+
+/**
+ * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void netlbl_cipsov4_doi_free(struct rcu_head *entry)
+{
+	struct cipso_v4_doi *ptr;
+
+	ptr = container_of(entry, struct cipso_v4_doi, rcu);
+	switch (ptr->type) {
+	case CIPSO_V4_MAP_STD:
+		kfree(ptr->map.std->lvl.cipso);
+		kfree(ptr->map.std->lvl.local);
+		kfree(ptr->map.std->cat.cipso);
+		kfree(ptr->map.std->cat.local);
+		break;
+	}
+	kfree(ptr);
+}
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition
+ * @doi: the DOI value
+ * @msg: the ADD message data
+ * @msg_size: the size of the ADD message buffer
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size)
+{
+	int ret_val = -EINVAL;
+	int msg_len = msg_size;
+	u32 num_tags;
+	u32 num_lvls;
+	u32 num_cats;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 iter;
+	u32 tmp_val_a;
+	u32 tmp_val_b;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_std_failure;
+	num_tags = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
+		goto add_std_failure;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
+	if (doi_def->map.std == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->type = CIPSO_V4_MAP_STD;
+
+	for (iter = 0; iter < num_tags; iter++) {
+		if (msg_len < NETLBL_LEN_U8)
+			goto add_std_failure;
+		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			break;
+		default:
+			goto add_std_failure;
+		}
+	}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	if (msg_len < 6 * NETLBL_LEN_U32)
+		goto add_std_failure;
+
+	num_lvls = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_lvls == 0)
+		goto add_std_failure;
+	doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len);
+	if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS)
+		goto add_std_failure;
+	doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->lvl.local == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len);
+	if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS)
+		goto add_std_failure;
+	doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->lvl.cipso == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+
+	num_cats = netlbl_getinc_u32(&msg, &msg_len);
+	doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len);
+	if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS)
+		goto add_std_failure;
+	doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->cat.local == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+	doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len);
+	if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS)
+		goto add_std_failure;
+	doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size,
+					      sizeof(u32),
+					      GFP_KERNEL);
+	if (doi_def->map.std->cat.cipso == NULL) {
+		ret_val = -ENOMEM;
+		goto add_std_failure;
+	}
+
+	if (msg_len <
+	    num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) +
+	    num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16))
+		goto add_std_failure;
+
+	for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++)
+		doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL;
+	for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++)
+		doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL;
+	for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++)
+		doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT;
+	for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++)
+		doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT;
+
+	for (iter = 0; iter < num_lvls; iter++) {
+		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
+		tmp_val_b = netlbl_getinc_u8(&msg, &msg_len);
+
+		if (tmp_val_a >= doi_def->map.std->lvl.local_size ||
+		    tmp_val_b >= doi_def->map.std->lvl.cipso_size)
+			goto add_std_failure;
+
+		doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a;
+		doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b;
+	}
+
+	for (iter = 0; iter < num_cats; iter++) {
+		tmp_val_a = netlbl_getinc_u32(&msg, &msg_len);
+		tmp_val_b = netlbl_getinc_u16(&msg, &msg_len);
+
+		if (tmp_val_a >= doi_def->map.std->cat.local_size ||
+		    tmp_val_b >= doi_def->map.std->cat.cipso_size)
+			goto add_std_failure;
+
+		doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a;
+		doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b;
+	}
+
+	doi_def->doi = doi;
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_std_failure;
+	return 0;
+
+add_std_failure:
+	if (doi_def)
+		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition
+ * @doi: the DOI value
+ * @msg: the ADD message data
+ * @msg_size: the size of the ADD message buffer
+ *
+ * Description:
+ * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message
+ * and add it to the CIPSO V4 engine.  Return zero on success and non-zero on
+ * error.
+ *
+ */
+static int netlbl_cipsov4_add_pass(u32 doi,
+				   struct nlattr *msg,
+				   size_t msg_size)
+{
+	int ret_val = -EINVAL;
+	int msg_len = msg_size;
+	u32 num_tags;
+	struct cipso_v4_doi *doi_def = NULL;
+	u32 iter;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_pass_failure;
+	num_tags = netlbl_getinc_u32(&msg, &msg_len);
+	if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT)
+		goto add_pass_failure;
+
+	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
+	if (doi_def == NULL) {
+		ret_val = -ENOMEM;
+		goto add_pass_failure;
+	}
+	doi_def->type = CIPSO_V4_MAP_PASS;
+
+	for (iter = 0; iter < num_tags; iter++) {
+		if (msg_len < NETLBL_LEN_U8)
+			goto add_pass_failure;
+		doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len);
+		switch (doi_def->tags[iter]) {
+		case CIPSO_V4_TAG_RBITMAP:
+			break;
+		default:
+			goto add_pass_failure;
+		}
+	}
+	if (iter < CIPSO_V4_TAG_MAXCNT)
+		doi_def->tags[iter] = CIPSO_V4_TAG_INVALID;
+
+	doi_def->doi = doi;
+	ret_val = cipso_v4_doi_add(doi_def);
+	if (ret_val != 0)
+		goto add_pass_failure;
+	return 0;
+
+add_pass_failure:
+	if (doi_def)
+		netlbl_cipsov4_doi_free(&doi_def->rcu);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Create a new DOI definition based on the given ADD message and add it to the
+ * CIPSO V4 engine.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
+
+{
+	int ret_val = -EINVAL;
+	u32 doi;
+	u32 map_type;
+	int msg_len = netlbl_netlink_payload_len(skb);
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto add_return;
+
+	if (msg_len < 2 * NETLBL_LEN_U32)
+		goto add_return;
+
+	doi = netlbl_getinc_u32(&msg, &msg_len);
+	map_type = netlbl_getinc_u32(&msg, &msg_len);
+	switch (map_type) {
+	case CIPSO_V4_MAP_STD:
+		ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len);
+		break;
+	case CIPSO_V4_MAP_PASS:
+		ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len);
+		break;
+	}
+
+add_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond accordingly.  Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	u32 doi;
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+	struct sk_buff *ans_skb;
+
+	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32)
+		goto list_failure;
+
+	doi = nla_get_u32(msg);
+	ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL) {
+		ret_val = -ENOMEM;
+		goto list_failure;
+	}
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_LIST);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_listall - Handle a LISTALL message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTALL message and respond accordingly.  Returns
+ * zero on success and negative values on error.
+ *
+ */
+static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct sk_buff *ans_skb;
+
+	ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL) {
+		ret_val = -ENOMEM;
+		goto listall_failure;
+	}
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_LISTALL);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listall_failure;
+
+	return 0;
+
+listall_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_cipsov4_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+	u32 doi;
+	struct nlattr *msg = netlbl_netlink_payload_data(skb);
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto remove_return;
+
+	if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) {
+		ret_val = -EINVAL;
+		goto remove_return;
+	}
+
+	doi = nla_get_u32(msg);
+	ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
+
+remove_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_cipsov4_gnl_family.id,
+				NLBL_CIPSOV4_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_cipsov4_genl_c_add = {
+	.cmd = NLBL_CIPSOV4_C_ADD,
+	.flags = 0,
+	.doit = netlbl_cipsov4_add,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_remove = {
+	.cmd = NLBL_CIPSOV4_C_REMOVE,
+	.flags = 0,
+	.doit = netlbl_cipsov4_remove,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_list = {
+	.cmd = NLBL_CIPSOV4_C_LIST,
+	.flags = 0,
+	.doit = netlbl_cipsov4_list,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_cipsov4_genl_c_listall = {
+	.cmd = NLBL_CIPSOV4_C_LISTALL,
+	.flags = 0,
+	.doit = netlbl_cipsov4_listall,
+	.dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component
+ *
+ * Description:
+ * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_cipsov4_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_cipsov4_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_add);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_remove);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family,
+				    &netlbl_cipsov4_genl_c_listall);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
new file mode 100644
index 0000000..4c6ff4b
--- /dev/null
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -0,0 +1,217 @@
+/*
+ * NetLabel CIPSO/IPv4 Support
+ *
+ * This file defines the CIPSO/IPv4 functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_CIPSO_V4
+#define _NETLABEL_CIPSO_V4
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the CIPSO subsystem, all
+ * of which are preceeded by the nlmsghdr struct.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ADD:
+ *   Sent by an application to add a new DOI mapping table, after completion
+ *   of the task the kernel should ACK this message.
+ *
+ *   +---------------+--------------------+---------------------+
+ *   | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ...
+ *   +---------------+--------------------+---------------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *   +-------------- ---- --- -- -
+ *   | mapping data
+ *   +-------------- ---- --- -- -
+ *
+ *     DOI:          the DOI value
+ *     map type:     the mapping table type (defined in the cipso_ipv4.h header
+ *                   as CIPSO_V4_MAP_*)
+ *     tag count:    the number of tags, must be greater than zero
+ *     tag:          the CIPSO tag for the DOI, tags listed first are given
+ *                   higher priorirty when sending packets
+ *     mapping data: specific to the map type (see below)
+ *
+ *   CIPSO_V4_MAP_STD
+ *
+ *   +------------------+-----------------------+----------------------+
+ *   | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ...
+ *   +------------------+-----------------------+----------------------+
+ *
+ *   +----------------------+---------------------+---------------------+
+ *   | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ...
+ *   +----------------------+---------------------+---------------------+
+ *
+ *   +--------------------------+-------------------------+
+ *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
+ *   +--------------------------+-------------------------+
+ *
+ *   +-----------------------------+-----------------------------+
+ *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
+ *   +-----------------------------+-----------------------------+
+ *
+ *     levels:         the number of level mappings
+ *     max l level:    the highest local level
+ *     max r level:    the highest remote/CIPSO level
+ *     categories:     the number of category mappings
+ *     max l cat:      the highest local category
+ *     max r cat:      the highest remote/CIPSO category
+ *     local level:    the local part of a level mapping
+ *     CIPSO level:    the remote/CIPSO part of a level mapping
+ *     local category: the local part of a category mapping
+ *     CIPSO category: the remote/CIPSO part of a category mapping
+ *
+ *   CIPSO_V4_MAP_PASS
+ *
+ *   No mapping data is needed for this map type.
+ *
+ * o REMOVE:
+ *   Sent by an application to remove a specific DOI mapping table from the
+ *   CIPSO V4 system.  The kernel should ACK this message.
+ *
+ *   +---------------+
+ *   | DOI (32 bits) |
+ *   +---------------+
+ *
+ *     DOI:          the DOI value
+ *
+ * o LIST:
+ *   Sent by an application to list the details of a DOI definition.  The
+ *   kernel should send an ACK on error or a response as indicated below.  The
+ *   application generated message format is shown below.
+ *
+ *   +---------------+
+ *   | DOI (32 bits) |
+ *   +---------------+
+ *
+ *     DOI:          the DOI value
+ *
+ *   The valid response message format depends on the type of the DOI mapping,
+ *   the known formats are shown below.
+ *
+ *   +--------------------+
+ *   | map type (32 bits) | ...
+ *   +--------------------+
+ *
+ *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
+ *                     header as CIPSO_V4_MAP_*)
+ *
+ *   (map type == CIPSO_V4_MAP_STD)
+ *
+ *   +----------------+------------------+----------------------+
+ *   | tags (32 bits) | levels (32 bits) | categories (32 bits) | ...
+ *   +----------------+------------------+----------------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *   +--------------------------+-------------------------+
+ *   | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated
+ *   +--------------------------+-------------------------+
+ *
+ *   +-----------------------------+-----------------------------+
+ *   | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated
+ *   +-----------------------------+-----------------------------+
+ *
+ *     tags:           the number of CIPSO tag types
+ *     levels:         the number of level mappings
+ *     categories:     the number of category mappings
+ *     tag:            the tag number, tags listed first are given higher
+ *                     priority when sending packets
+ *     local level:    the local part of a level mapping
+ *     CIPSO level:    the remote/CIPSO part of a level mapping
+ *     local category: the local part of a category mapping
+ *     CIPSO category: the remote/CIPSO part of a category mapping
+ *
+ *   (map type == CIPSO_V4_MAP_PASS)
+ *
+ *   +----------------+
+ *   | tags (32 bits) | ...
+ *   +----------------+
+ *
+ *   +-----------------+
+ *   | tag #X (8 bits) | ... repeated
+ *   +-----------------+
+ *
+ *     tags:           the number of CIPSO tag types
+ *     tag:            the tag number, tags listed first are given higher
+ *                     priority when sending packets
+ *
+ * o LISTALL:
+ *   This message is sent by an application to list the valid DOIs on the
+ *   system.  There is no payload and the kernel should respond with an ACK
+ *   or the following message.
+ *
+ *   +---------------------+------------------+-----------------------+
+ *   | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) |
+ *   +---------------------+------------------+-----------------------+
+ *
+ *   +-----------------------+
+ *   | map type #X (32 bits) | ...
+ *   +-----------------------+
+ *
+ *     DOI count:      the number of DOIs
+ *     DOI:            the DOI value
+ *     map type:       the DOI mapping table type (defined in the cipso_ipv4.h
+ *                     header as CIPSO_V4_MAP_*)
+ *
+ */
+
+/* NetLabel CIPSOv4 commands */
+enum {
+	NLBL_CIPSOV4_C_UNSPEC,
+	NLBL_CIPSOV4_C_ACK,
+	NLBL_CIPSOV4_C_ADD,
+	NLBL_CIPSOV4_C_REMOVE,
+	NLBL_CIPSOV4_C_LIST,
+	NLBL_CIPSOV4_C_LISTALL,
+	__NLBL_CIPSOV4_C_MAX,
+};
+#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_cipsov4_genl_init(void);
+
+#endif
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
new file mode 100644
index 0000000..0489a13
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.c
@@ -0,0 +1,513 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
+
+struct netlbl_domhsh_tbl {
+	struct list_head *tbl;
+	u32 size;
+};
+
+/* Domain hash table */
+/* XXX - updates should be so rare that having one spinlock for the entire
+ * hash table should be okay */
+static DEFINE_SPINLOCK(netlbl_domhsh_lock);
+static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
+
+/* Default domain mapping */
+static DEFINE_SPINLOCK(netlbl_domhsh_def_lock);
+static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
+
+/*
+ * Domain Hash Table Helper Functions
+ */
+
+/**
+ * netlbl_domhsh_free_entry - Frees a domain hash table entry
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to a hash table entry can be released
+ * safely.
+ *
+ */
+static void netlbl_domhsh_free_entry(struct rcu_head *entry)
+{
+	struct netlbl_dom_map *ptr;
+
+	ptr = container_of(entry, struct netlbl_dom_map, rcu);
+	kfree(ptr->domain);
+	kfree(ptr);
+}
+
+/**
+ * netlbl_domhsh_hash - Hashing function for the domain hash table
+ * @domain: the domain name to hash
+ *
+ * Description:
+ * This is the hashing function for the domain hash table, it returns the
+ * correct bucket number for the domain.  The caller is responsibile for
+ * calling the rcu_read_[un]lock() functions.
+ *
+ */
+static u32 netlbl_domhsh_hash(const char *key)
+{
+	u32 iter;
+	u32 val;
+	u32 len;
+
+	/* This is taken (with slight modification) from
+	 * security/selinux/ss/symtab.c:symhash() */
+
+	for (iter = 0, val = 0, len = strlen(key); iter < len; iter++)
+		val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter];
+	return val & (rcu_dereference(netlbl_domhsh)->size - 1);
+}
+
+/**
+ * netlbl_domhsh_search - Search for a domain entry
+ * @domain: the domain
+ * @def: return default if no match is found
+ *
+ * Description:
+ * Searches the domain hash table and returns a pointer to the hash table
+ * entry if found, otherwise NULL is returned.  If @def is non-zero and a
+ * match is not found in the domain hash table the default mapping is returned
+ * if it exists.  The caller is responsibile for the rcu hash table locks
+ * (i.e. the caller much call rcu_read_[un]lock()).
+ *
+ */
+static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
+{
+	u32 bkt;
+	struct netlbl_dom_map *iter;
+
+	if (domain != NULL) {
+		bkt = netlbl_domhsh_hash(domain);
+		list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list)
+			if (iter->valid && strcmp(iter->domain, domain) == 0)
+				return iter;
+	}
+
+	if (def != 0) {
+		iter = rcu_dereference(netlbl_domhsh_def);
+		if (iter != NULL && iter->valid)
+			return iter;
+	}
+
+	return NULL;
+}
+
+/*
+ * Domain Hash Table Functions
+ */
+
+/**
+ * netlbl_domhsh_init - Init for the domain hash
+ * @size: the number of bits to use for the hash buckets
+ *
+ * Description:
+ * Initializes the domain hash table, should be called only by
+ * netlbl_user_init() during initialization.  Returns zero on success, non-zero
+ * values on error.
+ *
+ */
+int netlbl_domhsh_init(u32 size)
+{
+	u32 iter;
+	struct netlbl_domhsh_tbl *hsh_tbl;
+
+	if (size == 0)
+		return -EINVAL;
+
+	hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL);
+	if (hsh_tbl == NULL)
+		return -ENOMEM;
+	hsh_tbl->size = 1 << size;
+	hsh_tbl->tbl = kcalloc(hsh_tbl->size,
+			       sizeof(struct list_head),
+			       GFP_KERNEL);
+	if (hsh_tbl->tbl == NULL) {
+		kfree(hsh_tbl);
+		return -ENOMEM;
+	}
+	for (iter = 0; iter < hsh_tbl->size; iter++)
+		INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
+
+	rcu_read_lock();
+	spin_lock(&netlbl_domhsh_lock);
+	rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
+	spin_unlock(&netlbl_domhsh_lock);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * netlbl_domhsh_add - Adds a entry to the domain hash table
+ * @entry: the entry to add
+ *
+ * Description:
+ * Adds a new entry to the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry)
+{
+	int ret_val;
+	u32 bkt;
+
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = 0;
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
+						  entry->domain);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret_val != 0)
+		return ret_val;
+
+	entry->valid = 1;
+	INIT_RCU_HEAD(&entry->rcu);
+
+	ret_val = 0;
+	rcu_read_lock();
+	if (entry->domain != NULL) {
+		bkt = netlbl_domhsh_hash(entry->domain);
+		spin_lock(&netlbl_domhsh_lock);
+		if (netlbl_domhsh_search(entry->domain, 0) == NULL)
+			list_add_tail_rcu(&entry->list,
+					  &netlbl_domhsh->tbl[bkt]);
+		else
+			ret_val = -EEXIST;
+		spin_unlock(&netlbl_domhsh_lock);
+	} else if (entry->domain == NULL) {
+		INIT_LIST_HEAD(&entry->list);
+		spin_lock(&netlbl_domhsh_def_lock);
+		if (rcu_dereference(netlbl_domhsh_def) == NULL)
+			rcu_assign_pointer(netlbl_domhsh_def, entry);
+		else
+			ret_val = -EEXIST;
+		spin_unlock(&netlbl_domhsh_def_lock);
+	} else
+		ret_val = -EINVAL;
+	rcu_read_unlock();
+
+	if (ret_val != 0) {
+		switch (entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+						       entry->domain) != 0)
+				BUG();
+			break;
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
+ * @entry: the entry to add
+ *
+ * Description:
+ * Adds a new default entry to the domain hash table and handles any updates
+ * to the lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry)
+{
+	return netlbl_domhsh_add(entry);
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain)
+{
+	int ret_val = -ENOENT;
+	struct netlbl_dom_map *entry;
+
+	rcu_read_lock();
+	if (domain != NULL)
+		entry = netlbl_domhsh_search(domain, 0);
+	else
+		entry = netlbl_domhsh_search(domain, 1);
+	if (entry == NULL)
+		goto remove_return;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
+						     entry->domain);
+		if (ret_val != 0)
+			goto remove_return;
+		break;
+	}
+	ret_val = 0;
+	if (entry != rcu_dereference(netlbl_domhsh_def)) {
+		spin_lock(&netlbl_domhsh_lock);
+		if (entry->valid) {
+			entry->valid = 0;
+			list_del_rcu(&entry->list);
+		} else
+			ret_val = -ENOENT;
+		spin_unlock(&netlbl_domhsh_lock);
+	} else {
+		spin_lock(&netlbl_domhsh_def_lock);
+		if (entry->valid) {
+			entry->valid = 0;
+			rcu_assign_pointer(netlbl_domhsh_def, NULL);
+		} else
+			ret_val = -ENOENT;
+		spin_unlock(&netlbl_domhsh_def_lock);
+	}
+	if (ret_val == 0)
+		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+
+remove_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove_default - Removes the default entry from the table
+ *
+ * Description:
+ * Removes/resets the default entry for the domain hash table and handles any
+ * updates to the lower level protocol handler (i.e. CIPSO).  Returns zero on
+ * success, non-zero on failure.
+ *
+ */
+int netlbl_domhsh_remove_default(void)
+{
+	return netlbl_domhsh_remove(NULL);
+}
+
+/**
+ * netlbl_domhsh_getentry - Get an entry from the domain hash table
+ * @domain: the domain name to search for
+ *
+ * Description:
+ * Look through the domain hash table searching for an entry to match @domain,
+ * return a pointer to a copy of the entry or NULL.  The caller is responsibile
+ * for ensuring that rcu_read_[un]lock() is called.
+ *
+ */
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain)
+{
+	return netlbl_domhsh_search(domain, 1);
+}
+
+/**
+ * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff
+ *
+ * Description:
+ * Dump the domain hash table into a buffer suitable for returning to an
+ * application in response to a NetLabel management DOMAIN message.  This
+ * function may fail if another process is growing the hash table at the same
+ * time.  The returned sk_buff has room at the front of the sk_buff for
+ * @headroom bytes.  See netlabel.h for the DOMAIN message format.  Returns a
+ * pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *netlbl_domhsh_dump(size_t headroom)
+{
+	struct sk_buff *skb = NULL;
+	ssize_t buf_len;
+	u32 bkt_iter;
+	u32 dom_cnt = 0;
+	struct netlbl_domhsh_tbl *hsh_tbl;
+	struct netlbl_dom_map *list_iter;
+	ssize_t tmp_len;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	hsh_tbl = rcu_dereference(netlbl_domhsh);
+	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
+		list_for_each_entry_rcu(list_iter,
+					&hsh_tbl->tbl[bkt_iter], list) {
+			buf_len += NETLBL_LEN_U32 +
+				nla_total_size(strlen(list_iter->domain) + 1);
+			switch (list_iter->type) {
+			case NETLBL_NLTYPE_UNLABELED:
+				break;
+			case NETLBL_NLTYPE_CIPSOV4:
+				buf_len += 2 * NETLBL_LEN_U32;
+				break;
+			}
+			dom_cnt++;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto dump_failure;
+
+	if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0)
+		goto dump_failure;
+	buf_len -= NETLBL_LEN_U32;
+	hsh_tbl = rcu_dereference(netlbl_domhsh);
+	for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++)
+		list_for_each_entry_rcu(list_iter,
+					&hsh_tbl->tbl[bkt_iter], list) {
+			tmp_len = nla_total_size(strlen(list_iter->domain) +
+						 1);
+			if (buf_len < NETLBL_LEN_U32 + tmp_len)
+				goto dump_failure;
+			if (nla_put_string(skb,
+					   NLA_STRING,
+					   list_iter->domain) != 0)
+				goto dump_failure;
+			if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0)
+				goto dump_failure;
+			buf_len -= NETLBL_LEN_U32 + tmp_len;
+			switch (list_iter->type) {
+			case NETLBL_NLTYPE_UNLABELED:
+				break;
+			case NETLBL_NLTYPE_CIPSOV4:
+				if (buf_len < 2 * NETLBL_LEN_U32)
+					goto dump_failure;
+				if (nla_put_u32(skb,
+				       NLA_U32,
+				       list_iter->type_def.cipsov4->type) != 0)
+					goto dump_failure;
+				if (nla_put_u32(skb,
+				       NLA_U32,
+				       list_iter->type_def.cipsov4->doi) != 0)
+					goto dump_failure;
+				buf_len -= 2 * NETLBL_LEN_U32;
+				break;
+			}
+		}
+	rcu_read_unlock();
+
+	return skb;
+
+dump_failure:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return NULL;
+}
+
+/**
+ * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff
+ *
+ * Description:
+ * Dump the default domain mapping into a buffer suitable for returning to an
+ * application in response to a NetLabel management DEFDOMAIN message.  This
+ * function may fail if another process is changing the default domain mapping
+ * at the same time.  The returned sk_buff has room at the front of the
+ * skb_buff for @headroom bytes.  See netlabel.h for the DEFDOMAIN message
+ * format.  Returns a pointer to a sk_buff on success, NULL on error.
+ *
+ */
+struct sk_buff *netlbl_domhsh_dump_default(size_t headroom)
+{
+	struct sk_buff *skb;
+	ssize_t buf_len;
+	struct netlbl_dom_map *entry;
+
+	buf_len = NETLBL_LEN_U32;
+	rcu_read_lock();
+	entry = rcu_dereference(netlbl_domhsh_def);
+	if (entry != NULL)
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			buf_len += 2 * NETLBL_LEN_U32;
+			break;
+		}
+
+	skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC);
+	if (skb == NULL)
+		goto dump_default_failure;
+
+	if (entry != rcu_dereference(netlbl_domhsh_def))
+		goto dump_default_failure;
+	if (entry != NULL) {
+		if (nla_put_u32(skb, NLA_U32, entry->type) != 0)
+			goto dump_default_failure;
+		buf_len -= NETLBL_LEN_U32;
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (buf_len < 2 * NETLBL_LEN_U32)
+				goto dump_default_failure;
+			if (nla_put_u32(skb,
+					NLA_U32,
+					entry->type_def.cipsov4->type) != 0)
+				goto dump_default_failure;
+			if (nla_put_u32(skb,
+					NLA_U32,
+					entry->type_def.cipsov4->doi) != 0)
+				goto dump_default_failure;
+			buf_len -= 2 * NETLBL_LEN_U32;
+			break;
+		}
+	} else
+		nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE);
+	rcu_read_unlock();
+
+	return skb;
+
+dump_default_failure:
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return NULL;
+}
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
new file mode 100644
index 0000000..99a2287
--- /dev/null
+++ b/net/netlabel/netlabel_domainhash.h
@@ -0,0 +1,67 @@
+/*
+ * NetLabel Domain Hash Table
+ *
+ * This file manages the domain hash table that NetLabel uses to determine
+ * which network labeling protocol to use for a given domain.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_DOMAINHASH_H
+#define _NETLABEL_DOMAINHASH_H
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+
+/* Domain hash table size */
+/* XXX - currently this number is an uneducated guess */
+#define NETLBL_DOMHSH_BITSIZE       7
+
+/* Domain mapping definition struct */
+struct netlbl_dom_map {
+	char *domain;
+	u32 type;
+	union {
+		struct cipso_v4_doi *cipsov4;
+	} type_def;
+
+	u32 valid;
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* init function */
+int netlbl_domhsh_init(u32 size);
+
+/* Manipulate the domain hash table */
+int netlbl_domhsh_add(struct netlbl_dom_map *entry);
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
+int netlbl_domhsh_remove_default(void);
+struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
+struct sk_buff *netlbl_domhsh_dump(size_t headroom);
+struct sk_buff *netlbl_domhsh_dump_default(size_t headroom);
+
+#endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
new file mode 100644
index 0000000..0fd8aaa
--- /dev/null
+++ b/net/netlabel/netlabel_kapi.c
@@ -0,0 +1,231 @@
+/*
+ * NetLabel Kernel API
+ *
+ * This file defines the kernel API for the NetLabel system.  The NetLabel
+ * system manages static and dynamic label mappings for network protocols such
+ * as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <net/ip.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+#include <asm/bug.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_user.h"
+
+/*
+ * LSM Functions
+ */
+
+/**
+ * netlbl_socket_setattr - Label a socket using the correct protocol
+ * @sock: the socket to label
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given socket using the security attributes
+ * specified in @secattr.  This function requires exclusive access to
+ * @sock->sk, which means it either needs to be in the process of being
+ * created or locked via lock_sock(sock->sk).  Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int netlbl_socket_setattr(const struct socket *sock,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -ENOENT;
+	struct netlbl_dom_map *dom_entry;
+
+	rcu_read_lock();
+	dom_entry = netlbl_domhsh_getentry(secattr->domain);
+	if (dom_entry == NULL)
+		goto socket_setattr_return;
+	switch (dom_entry->type) {
+	case NETLBL_NLTYPE_CIPSOV4:
+		ret_val = cipso_v4_socket_setattr(sock,
+						  dom_entry->type_def.cipsov4,
+						  secattr);
+		break;
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = 0;
+		break;
+	default:
+		ret_val = -ENOENT;
+	}
+
+socket_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
+/**
+ * netlbl_socket_getattr - Determine the security attributes of a socket
+ * @sock: the socket
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given socket to see any NetLabel style labeling has been
+ * applied to the socket, if so it parses the socket label and returns the
+ * security attributes in @secattr.  Returns zero on success, negative values
+ * on failure.
+ *
+ */
+int netlbl_socket_getattr(const struct socket *sock,
+			  struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_socket_getattr(sock, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_getattr - Determine the security attributes of a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Examines the given packet to see if a recognized form of packet labeling
+ * is present, if so it parses the packet label and returns the security
+ * attributes in @secattr.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int netlbl_skbuff_getattr(const struct sk_buff *skb,
+			  struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+
+	ret_val = cipso_v4_skbuff_getattr(skb, secattr);
+	if (ret_val == 0)
+		return 0;
+
+	return netlbl_unlabel_getattr(secattr);
+}
+
+/**
+ * netlbl_skbuff_err - Handle a LSM error on a sk_buff
+ * @skb: the packet
+ * @error: the error code
+ *
+ * Description:
+ * Deal with a LSM problem when handling the packet in @skb, typically this is
+ * a permission denied problem (-EACCES).  The correct action is determined
+ * according to the packet's labeling protocol.
+ *
+ */
+void netlbl_skbuff_err(struct sk_buff *skb, int error)
+{
+	if (CIPSO_V4_OPTEXIST(skb))
+		cipso_v4_error(skb, error, 0);
+}
+
+/**
+ * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches
+ *
+ * Description:
+ * For all of the NetLabel protocols that support some form of label mapping
+ * cache, invalidate the cache.  Returns zero on success, negative values on
+ * error.
+ *
+ */
+void netlbl_cache_invalidate(void)
+{
+	cipso_v4_cache_invalidate();
+}
+
+/**
+ * netlbl_cache_add - Add an entry to a NetLabel protocol cache
+ * @skb: the packet
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add the LSM security attributes for the given packet to the underlying
+ * NetLabel protocol's label mapping cache.  Returns zero on success, negative
+ * values on error.
+ *
+ */
+int netlbl_cache_add(const struct sk_buff *skb,
+		     const struct netlbl_lsm_secattr *secattr)
+{
+	if (secattr->cache.data == NULL)
+		return -ENOMSG;
+
+	if (CIPSO_V4_OPTEXIST(skb))
+		return cipso_v4_cache_add(skb, secattr);
+
+	return -ENOMSG;
+}
+
+/*
+ * Setup Functions
+ */
+
+/**
+ * netlbl_init - Initialize NetLabel
+ *
+ * Description:
+ * Perform the required NetLabel initialization before first use.
+ *
+ */
+static int __init netlbl_init(void)
+{
+	int ret_val;
+
+	printk(KERN_INFO "NetLabel: Initializing\n");
+	printk(KERN_INFO "NetLabel:  domain hash size = %u\n",
+	       (1 << NETLBL_DOMHSH_BITSIZE));
+	printk(KERN_INFO "NetLabel:  protocols ="
+	       " UNLABELED"
+	       " CIPSOv4"
+	       "\n");
+
+	ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE);
+	if (ret_val != 0)
+		goto init_failure;
+
+	ret_val = netlbl_netlink_init();
+	if (ret_val != 0)
+		goto init_failure;
+
+	ret_val = netlbl_unlabel_defconf();
+	if (ret_val != 0)
+		goto init_failure;
+	printk(KERN_INFO "NetLabel:  unlabeled traffic allowed by default\n");
+
+	return 0;
+
+init_failure:
+	panic("NetLabel: failed to initialize properly (%d)\n", ret_val);
+}
+
+subsys_initcall(netlbl_init);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
new file mode 100644
index 0000000..85bc11a
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.c
@@ -0,0 +1,624 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <net/cipso_ipv4.h>
+
+#include "netlabel_domainhash.h"
+#include "netlabel_user.h"
+#include "netlabel_mgmt.h"
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_mgmt_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_MGMT_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_mgmt_add - Handle an ADD message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADD message and add the domains from the message
+ * to the hash table.  See netlabel.h for a description of the message format.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	u32 count;
+	struct netlbl_dom_map *entry = NULL;
+	u32 iter;
+	u32 tmp_val;
+	int tmp_size;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto add_failure;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto add_failure;
+	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) {
+		if (msg_len <= 0) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (entry == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		tmp_size = nla_len(msg_ptr);
+		if (tmp_size <= 0 || tmp_size > msg_len) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
+		if (entry->domain == NULL) {
+			ret_val = -ENOMEM;
+			goto add_failure;
+		}
+		nla_strlcpy(entry->domain, msg_ptr, tmp_size);
+		entry->domain[tmp_size - 1] = '\0';
+		msg_ptr = nla_next(msg_ptr, &msg_len);
+
+		if (msg_len < NETLBL_LEN_U32) {
+			ret_val = -EINVAL;
+			goto add_failure;
+		}
+		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+		entry->type = tmp_val;
+		switch (tmp_val) {
+		case NETLBL_NLTYPE_UNLABELED:
+			ret_val = netlbl_domhsh_add(entry);
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			if (msg_len < NETLBL_LEN_U32) {
+				ret_val = -EINVAL;
+				goto add_failure;
+			}
+			tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+			/* We should be holding a rcu_read_lock() here
+			 * while we hold the result but since the entry
+			 * will always be deleted when the CIPSO DOI
+			 * is deleted we aren't going to keep the lock. */
+			rcu_read_lock();
+			entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+			if (entry->type_def.cipsov4 == NULL) {
+				rcu_read_unlock();
+				ret_val = -EINVAL;
+				goto add_failure;
+			}
+			ret_val = netlbl_domhsh_add(entry);
+			rcu_read_unlock();
+			break;
+		default:
+			ret_val = -EINVAL;
+		}
+		if (ret_val != 0)
+			goto add_failure;
+	}
+
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				NETLBL_E_OK);
+	return 0;
+
+add_failure:
+	if (entry)
+		kfree(entry->domain);
+	kfree(entry);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_remove - Handle a REMOVE message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVE message and remove the specified domain
+ * mappings.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	u32 count;
+	u32 iter;
+	int tmp_size;
+	unsigned char *domain;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto remove_return;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto remove_return;
+	count = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	for (iter = 0; iter < count && msg_len > 0; iter++) {
+		if (msg_len <= 0) {
+			ret_val = -EINVAL;
+			goto remove_return;
+		}
+		tmp_size = nla_len(msg_ptr);
+		domain = nla_data(msg_ptr);
+		if (tmp_size <= 0 || tmp_size > msg_len ||
+		    domain[tmp_size - 1] != '\0') {
+			ret_val = -EINVAL;
+			goto remove_return;
+		}
+		ret_val = netlbl_domhsh_remove(domain);
+		if (ret_val != 0)
+			goto remove_return;
+		msg_ptr = nla_next(msg_ptr, &msg_len);
+	}
+
+	ret_val = 0;
+
+remove_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and dumps the domain hash table in a
+ * form suitable for use in a kernel generated LIST message.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL)
+		goto list_failure;
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_LIST);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_adddef - Handle an ADDDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ADDDEF message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -EINVAL;
+	struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb);
+	int msg_len = netlbl_netlink_payload_len(skb);
+	struct netlbl_dom_map *entry = NULL;
+	u32 tmp_val;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto adddef_failure;
+
+	if (msg_len < NETLBL_LEN_U32)
+		goto adddef_failure;
+	tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		ret_val = -ENOMEM;
+		goto adddef_failure;
+	}
+
+	entry->type = tmp_val;
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		ret_val = netlbl_domhsh_add_default(entry);
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		if (msg_len < NETLBL_LEN_U32) {
+			ret_val = -EINVAL;
+			goto adddef_failure;
+		}
+		tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len);
+		/* We should be holding a rcu_read_lock here while we
+		 * hold the result but since the entry will always be
+		 * deleted when the CIPSO DOI is deleted we are going
+		 * to skip the lock. */
+		rcu_read_lock();
+		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
+		if (entry->type_def.cipsov4 == NULL) {
+			rcu_read_unlock();
+			ret_val = -EINVAL;
+			goto adddef_failure;
+		}
+		ret_val = netlbl_domhsh_add_default(entry);
+		rcu_read_unlock();
+		break;
+	default:
+		ret_val = -EINVAL;
+	}
+	if (ret_val != 0)
+		goto adddef_failure;
+
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				NETLBL_E_OK);
+	return 0;
+
+adddef_failure:
+	kfree(entry);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_removedef - Handle a REMOVEDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated REMOVEDEF message and remove the default domain
+ * mapping.  Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		goto removedef_return;
+
+	ret_val = netlbl_domhsh_remove_default();
+
+removedef_return:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_listdef - Handle a LISTDEF message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LISTDEF message and dumps the default domain
+ * mapping in a form suitable for use in a kernel generated LISTDEF message.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN));
+	if (ans_skb == NULL)
+		goto listdef_failure;
+	netlbl_netlink_hdr_push(ans_skb,
+				info->snd_pid,
+				0,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_LISTDEF);
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto listdef_failure;
+
+	return 0;
+
+listdef_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_modules - Handle a MODULES message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated MODULES message and respond accordingly.
+ *
+ */
+static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	size_t data_size;
+	u32 mod_count;
+	struct sk_buff *ans_skb = NULL;
+
+	/* unlabeled + cipsov4 */
+	mod_count = 2;
+
+	data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32;
+	ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto modules_failure;
+
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_mgmt_gnl_family.id,
+				   NLBL_MGMT_C_MODULES) == NULL)
+		goto modules_failure;
+
+	ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count);
+	if (ret_val != 0)
+		goto modules_failure;
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED);
+	if (ret_val != 0)
+		goto modules_failure;
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4);
+	if (ret_val != 0)
+		goto modules_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto modules_failure;
+
+	return 0;
+
+modules_failure:
+	kfree_skb(ans_skb);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+/**
+ * netlbl_mgmt_version - Handle a VERSION message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated VERSION message and respond accordingly.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb = NULL;
+
+	ans_skb = netlbl_netlink_alloc_skb(0,
+					   GENL_HDRLEN + NETLBL_LEN_U32,
+					   GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto version_failure;
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_mgmt_gnl_family.id,
+				   NLBL_MGMT_C_VERSION) == NULL)
+		goto version_failure;
+
+	ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION);
+	if (ret_val != 0)
+		goto version_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto version_failure;
+
+	return 0;
+
+version_failure:
+	kfree_skb(ans_skb);
+	netlbl_netlink_send_ack(info,
+				netlbl_mgmt_gnl_family.id,
+				NLBL_MGMT_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_mgmt_genl_c_add = {
+	.cmd = NLBL_MGMT_C_ADD,
+	.flags = 0,
+	.doit = netlbl_mgmt_add,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_remove = {
+	.cmd = NLBL_MGMT_C_REMOVE,
+	.flags = 0,
+	.doit = netlbl_mgmt_remove,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_list = {
+	.cmd = NLBL_MGMT_C_LIST,
+	.flags = 0,
+	.doit = netlbl_mgmt_list,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_adddef = {
+	.cmd = NLBL_MGMT_C_ADDDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_adddef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_removedef = {
+	.cmd = NLBL_MGMT_C_REMOVEDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_removedef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_listdef = {
+	.cmd = NLBL_MGMT_C_LISTDEF,
+	.flags = 0,
+	.doit = netlbl_mgmt_listdef,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_modules = {
+	.cmd = NLBL_MGMT_C_MODULES,
+	.flags = 0,
+	.doit = netlbl_mgmt_modules,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_mgmt_genl_c_version = {
+	.cmd = NLBL_MGMT_C_VERSION,
+	.flags = 0,
+	.doit = netlbl_mgmt_version,
+	.dumpit = NULL,
+};
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_mgmt_genl_init - Register the NetLabel management component
+ *
+ * Description:
+ * Register the NetLabel management component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_mgmt_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_mgmt_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_add);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_remove);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_adddef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_removedef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_listdef);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_modules);
+	if (ret_val != 0)
+		return ret_val;
+	ret_val = genl_register_ops(&netlbl_mgmt_gnl_family,
+				    &netlbl_mgmt_genl_c_version);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
new file mode 100644
index 0000000..fd6c6ac
--- /dev/null
+++ b/net/netlabel/netlabel_mgmt.h
@@ -0,0 +1,246 @@
+/*
+ * NetLabel Management Support
+ *
+ * This file defines the management functions for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_MGMT_H
+#define _NETLABEL_MGMT_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the management interface,
+ * all of which are preceeded by the nlmsghdr struct.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ADD:
+ *   Sent by an application to add a domain mapping to the NetLabel system.
+ *   The kernel should respond with an ACK.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+-------------------------+
+ *   | domain string (variable) | protocol type (32 bits) | ...
+ *   +--------------------------+-------------------------+
+ *
+ *   +-------------- ---- --- -- -
+ *   | mapping data                ... repeated
+ *   +-------------- ---- --- -- -
+ *
+ *     domain string: the domain string, NULL terminated
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +---------------+
+ *   | doi (32 bits) |
+ *   +---------------+
+ *
+ *     doi:  the CIPSO DOI value
+ *
+ * o REMOVE:
+ *   Sent by an application to remove a domain mapping from the NetLabel
+ *   system.  The kernel should ACK this message.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+
+ *   | domain string (variable) | ...
+ *   +--------------------------+
+ *
+ *     domain string: the domain string, NULL terminated
+ *
+ * o LIST:
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LIST message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a LIST
+ *   message either with a LIST message on success or an ACK message on
+ *   failure.
+ *
+ *   +-------------------+
+ *   | domains (32 bits) | ...
+ *   +-------------------+
+ *
+ *     domains: the number of domains in the message
+ *
+ *   +--------------------------+
+ *   | domain string (variable) | ...
+ *   +--------------------------+
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     domain string: the domain string, NULL terminated
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +----------------+---------------+
+ *   | type (32 bits) | doi (32 bits) |
+ *   +----------------+---------------+
+ *
+ *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
+ *           as CIPSO_V4_MAP_*)
+ *     doi:  the CIPSO DOI value
+ *
+ * o ADDDEF:
+ *   Sent by an application to set the default domain mapping for the NetLabel
+ *   system.  The kernel should respond with an ACK.
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +---------------+
+ *   | doi (32 bits) |
+ *   +---------------+
+ *
+ *     doi:  the CIPSO DOI value
+ *
+ * o REMOVEDEF:
+ *   Sent by an application to remove the default domain mapping from the
+ *   NetLabel system, there is no payload.  The kernel should ACK this message.
+ *
+ * o LISTDEF:
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LISTDEF message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a
+ *   LISTDEF message either with a LISTDEF message on success or an ACK message
+ *   on failure.
+ *
+ *   +-------------------------+-------------- ---- --- -- -
+ *   | protocol type (32 bits) | mapping data                ... repeated
+ *   +-------------------------+-------------- ---- --- -- -
+ *
+ *     protocol type: the protocol type (defined by NETLBL_NLTYPE_*)
+ *     mapping data:  specific to the map type (see below)
+ *
+ *   NETLBL_NLTYPE_UNLABELED
+ *
+ *     No mapping data for this protocol type.
+ *
+ *   NETLBL_NLTYPE_CIPSOV4
+ *
+ *   +----------------+---------------+
+ *   | type (32 bits) | doi (32 bits) |
+ *   +----------------+---------------+
+ *
+ *     type: the CIPSO mapping table type (defined in the cipso_ipv4.h header
+ *           as CIPSO_V4_MAP_*)
+ *     doi:  the CIPSO DOI value
+ *
+ * o MODULES:
+ *   Sent by an application to request a list of configured NetLabel modules
+ *   in the kernel.  When sent by an application there is no payload.
+ *
+ *   +-------------------+
+ *   | modules (32 bits) | ...
+ *   +-------------------+
+ *
+ *     modules: the number of modules in the message, if this is an application
+ *              generated message and the value is zero then return a list of
+ *              the configured modules
+ *
+ *   +------------------+
+ *   | module (32 bits) | ... repeated
+ *   +------------------+
+ *
+ *     module: the module number as defined by NETLBL_NLTYPE_*
+ *
+ * o VERSION:
+ *   Sent by an application to request the NetLabel version string.  When sent
+ *   by an application there is no payload.  This message type is also used by
+ *   the kernel to respond to an VERSION request.
+ *
+ *   +-------------------+
+ *   | version (32 bits) |
+ *   +-------------------+
+ *
+ *     version: the protocol version number
+ *
+ */
+
+/* NetLabel Management commands */
+enum {
+	NLBL_MGMT_C_UNSPEC,
+	NLBL_MGMT_C_ACK,
+	NLBL_MGMT_C_ADD,
+	NLBL_MGMT_C_REMOVE,
+	NLBL_MGMT_C_LIST,
+	NLBL_MGMT_C_ADDDEF,
+	NLBL_MGMT_C_REMOVEDEF,
+	NLBL_MGMT_C_LISTDEF,
+	NLBL_MGMT_C_MODULES,
+	NLBL_MGMT_C_VERSION,
+	__NLBL_MGMT_C_MAX,
+};
+#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_mgmt_genl_init(void);
+
+#endif
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
new file mode 100644
index 0000000..785f496
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -0,0 +1,253 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system.  The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/socket.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_user.h"
+#include "netlabel_domainhash.h"
+#include "netlabel_unlabeled.h"
+
+/* Accept unlabeled packets flag */
+static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0);
+
+/* NetLabel Generic NETLINK CIPSOv4 family */
+static struct genl_family netlbl_unlabel_gnl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = NETLBL_NLTYPE_UNLABELED_NAME,
+	.version = NETLBL_PROTO_VERSION,
+	.maxattr = 0,
+};
+
+
+/*
+ * NetLabel Command Handlers
+ */
+
+/**
+ * netlbl_unlabel_accept - Handle an ACCEPT message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated ACCEPT message and set the accept flag accordingly.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val;
+	struct nlattr *data = netlbl_netlink_payload_data(skb);
+	u32 value;
+
+	ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN);
+	if (ret_val != 0)
+		return ret_val;
+
+	if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) {
+		value = nla_get_u32(data);
+		if (value == 1 || value == 0) {
+			atomic_set(&netlabel_unlabel_accept_flg, value);
+			netlbl_netlink_send_ack(info,
+						netlbl_unlabel_gnl_family.id,
+						NLBL_UNLABEL_C_ACK,
+						NETLBL_E_OK);
+			return 0;
+		}
+	}
+
+	netlbl_netlink_send_ack(info,
+				netlbl_unlabel_gnl_family.id,
+				NLBL_UNLABEL_C_ACK,
+				EINVAL);
+	return -EINVAL;
+}
+
+/**
+ * netlbl_unlabel_list - Handle a LIST message
+ * @skb: the NETLINK buffer
+ * @info: the Generic NETLINK info block
+ *
+ * Description:
+ * Process a user generated LIST message and respond with the current status.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret_val = -ENOMEM;
+	struct sk_buff *ans_skb;
+
+	ans_skb = netlbl_netlink_alloc_skb(0,
+					   GENL_HDRLEN + NETLBL_LEN_U32,
+					   GFP_KERNEL);
+	if (ans_skb == NULL)
+		goto list_failure;
+
+	if (netlbl_netlink_hdr_put(ans_skb,
+				   info->snd_pid,
+				   0,
+				   netlbl_unlabel_gnl_family.id,
+				   NLBL_UNLABEL_C_LIST) == NULL)
+		goto list_failure;
+
+	ret_val = nla_put_u32(ans_skb,
+			      NLA_U32,
+			      atomic_read(&netlabel_unlabel_accept_flg));
+	if (ret_val != 0)
+		goto list_failure;
+
+	ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid);
+	if (ret_val != 0)
+		goto list_failure;
+
+	return 0;
+
+list_failure:
+	netlbl_netlink_send_ack(info,
+				netlbl_unlabel_gnl_family.id,
+				NLBL_UNLABEL_C_ACK,
+				-ret_val);
+	return ret_val;
+}
+
+
+/*
+ * NetLabel Generic NETLINK Command Definitions
+ */
+
+static struct genl_ops netlbl_unlabel_genl_c_accept = {
+	.cmd = NLBL_UNLABEL_C_ACCEPT,
+	.flags = 0,
+	.doit = netlbl_unlabel_accept,
+	.dumpit = NULL,
+};
+
+static struct genl_ops netlbl_unlabel_genl_c_list = {
+	.cmd = NLBL_UNLABEL_C_LIST,
+	.flags = 0,
+	.doit = netlbl_unlabel_list,
+	.dumpit = NULL,
+};
+
+
+/*
+ * NetLabel Generic NETLINK Protocol Functions
+ */
+
+/**
+ * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component
+ *
+ * Description:
+ * Register the unlabeled packet NetLabel component with the Generic NETLINK
+ * mechanism.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_unlabel_genl_init(void)
+{
+	int ret_val;
+
+	ret_val = genl_register_family(&netlbl_unlabel_gnl_family);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+				    &netlbl_unlabel_genl_c_accept);
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = genl_register_ops(&netlbl_unlabel_gnl_family,
+				    &netlbl_unlabel_genl_c_list);
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
+
+/*
+ * NetLabel KAPI Hooks
+ */
+
+/**
+ * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Determine the security attributes, if any, for an unlabled packet and return
+ * them in @secattr.  Returns zero on success and negative values on failure.
+ *
+ */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr)
+{
+	if (atomic_read(&netlabel_unlabel_accept_flg) == 1) {
+		memset(secattr, 0, sizeof(*secattr));
+		return 0;
+	}
+
+	return -ENOMSG;
+}
+
+/**
+ * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets
+ *
+ * Description:
+ * Set the default NetLabel configuration to allow incoming unlabeled packets
+ * and to send unlabeled network traffic by default.
+ *
+ */
+int netlbl_unlabel_defconf(void)
+{
+	int ret_val;
+	struct netlbl_dom_map *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL)
+		return -ENOMEM;
+	entry->type = NETLBL_NLTYPE_UNLABELED;
+	ret_val = netlbl_domhsh_add_default(entry);
+	if (ret_val != 0)
+		return ret_val;
+
+	atomic_set(&netlabel_unlabel_accept_flg, 1);
+
+	return 0;
+}
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
new file mode 100644
index 0000000..f300e54
--- /dev/null
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -0,0 +1,98 @@
+/*
+ * NetLabel Unlabeled Support
+ *
+ * This file defines functions for dealing with unlabeled packets for the
+ * NetLabel system.  The NetLabel system manages static and dynamic label
+ * mappings for network protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_UNLABELED_H
+#define _NETLABEL_UNLABELED_H
+
+#include <net/netlabel.h>
+
+/*
+ * The following NetLabel payloads are supported by the Unlabeled subsystem.
+ *
+ * o ACK:
+ *   Sent by the kernel in response to an applications message, applications
+ *   should never send this message.
+ *
+ *   +----------------------+-----------------------+
+ *   | seq number (32 bits) | return code (32 bits) |
+ *   +----------------------+-----------------------+
+ *
+ *     seq number:  the sequence number of the original message, taken from the
+ *                  nlmsghdr structure
+ *     return code: return value, based on errno values
+ *
+ * o ACCEPT
+ *   This message is sent from an application to specify if the kernel should
+ *   allow unlabled packets to pass if they do not match any of the static
+ *   mappings defined in the unlabeled module.
+ *
+ *   +-----------------+
+ *   | allow (32 bits) |
+ *   +-----------------+
+ *
+ *     allow: if true (1) then allow the packets to pass, if false (0) then
+ *            reject the packets
+ *
+ * o LIST
+ *   This message can be sent either from an application or by the kernel in
+ *   response to an application generated LIST message.  When sent by an
+ *   application there is no payload.  The kernel should respond to a LIST
+ *   message either with a LIST message on success or an ACK message on
+ *   failure.
+ *
+ *   +-----------------------+
+ *   | accept flag (32 bits) |
+ *   +-----------------------+
+ *
+ *     accept flag: if true (1) then unlabeled packets are allowed to pass,
+ *                  if false (0) then unlabeled packets are rejected
+ *
+ */
+
+/* NetLabel Unlabeled commands */
+enum {
+	NLBL_UNLABEL_C_UNSPEC,
+	NLBL_UNLABEL_C_ACK,
+	NLBL_UNLABEL_C_ACCEPT,
+	NLBL_UNLABEL_C_LIST,
+	__NLBL_UNLABEL_C_MAX,
+};
+#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
+
+/* NetLabel protocol functions */
+int netlbl_unlabel_genl_init(void);
+
+/* Process Unlabeled incoming network packets */
+int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr);
+
+/* Set the default configuration to allow Unlabeled packets */
+int netlbl_unlabel_defconf(void);
+
+#endif
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
new file mode 100644
index 0000000..73cbe66
--- /dev/null
+++ b/net/netlabel/netlabel_user.c
@@ -0,0 +1,158 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+#include <asm/bug.h>
+
+#include "netlabel_mgmt.h"
+#include "netlabel_unlabeled.h"
+#include "netlabel_cipso_v4.h"
+#include "netlabel_user.h"
+
+/*
+ * NetLabel NETLINK Setup Functions
+ */
+
+/**
+ * netlbl_netlink_init - Initialize the NETLINK communication channel
+ *
+ * Description:
+ * Call out to the NetLabel components so they can register their families and
+ * commands with the Generic NETLINK mechanism.  Returns zero on success and
+ * non-zero on failure.
+ *
+ */
+int netlbl_netlink_init(void)
+{
+	int ret_val;
+
+	ret_val = netlbl_mgmt_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = netlbl_cipsov4_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	ret_val = netlbl_unlabel_genl_init();
+	if (ret_val != 0)
+		return ret_val;
+
+	return 0;
+}
+
+/*
+ * NetLabel Common Protocol Functions
+ */
+
+/**
+ * netlbl_netlink_send_ack - Send an ACK message
+ * @info: the generic NETLINK information
+ * @genl_family: the generic NETLINK family ID value
+ * @ack_cmd: the generic NETLINK family ACK command value
+ * @ret_code: return code to use
+ *
+ * Description:
+ * This function sends an ACK message to the sender of the NETLINK message
+ * specified by @info.
+ *
+ */
+void netlbl_netlink_send_ack(const struct genl_info *info,
+			     u32 genl_family,
+			     u8 ack_cmd,
+			     u32 ret_code)
+{
+	size_t data_size;
+	struct sk_buff *skb;
+
+	data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32;
+	skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL);
+	if (skb == NULL)
+		return;
+
+	if (netlbl_netlink_hdr_put(skb,
+				   info->snd_pid,
+				   0,
+				   genl_family,
+				   ack_cmd) == NULL)
+		goto send_ack_failure;
+
+	if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0)
+		goto send_ack_failure;
+	if (nla_put_u32(skb, NLA_U32, ret_code) != 0)
+		goto send_ack_failure;
+
+	netlbl_netlink_snd(skb, info->snd_pid);
+	return;
+
+send_ack_failure:
+	kfree_skb(skb);
+}
+
+/*
+ * NETLINK I/O Functions
+ */
+
+/**
+ * netlbl_netlink_snd - Send a NetLabel message
+ * @skb: NetLabel message
+ * @pid: destination PID
+ *
+ * Description:
+ * Sends a unicast NetLabel message over the NETLINK socket.
+ *
+ */
+int netlbl_netlink_snd(struct sk_buff *skb, u32 pid)
+{
+	return genlmsg_unicast(skb, pid);
+}
+
+/**
+ * netlbl_netlink_snd - Send a NetLabel message
+ * @skb: NetLabel message
+ * @pid: sending PID
+ * @group: multicast group id
+ *
+ * Description:
+ * Sends a multicast NetLabel message over the NETLINK socket to all members
+ * of @group except @pid.
+ *
+ */
+int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group)
+{
+	return genlmsg_multicast(skb, pid, group, GFP_KERNEL);
+}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
new file mode 100644
index 0000000..385a6c7
--- /dev/null
+++ b/net/netlabel/netlabel_user.h
@@ -0,0 +1,215 @@
+/*
+ * NetLabel NETLINK Interface
+ *
+ * This file defines the NETLINK interface for the NetLabel system.  The
+ * NetLabel system manages static and dynamic label mappings for network
+ * protocols such as CIPSO and RIPSO.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _NETLABEL_USER_H
+#define _NETLABEL_USER_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/capability.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/netlabel.h>
+
+/* NetLabel NETLINK helper functions */
+
+/**
+ * netlbl_netlink_cap_check - Check the NETLINK msg capabilities
+ * @skb: the NETLINK buffer
+ * @req_cap: the required capability
+ *
+ * Description:
+ * Check the NETLINK buffer's capabilities against the required capabilities.
+ * Returns zero on success, negative values on failure.
+ *
+ */
+static inline int netlbl_netlink_cap_check(const struct sk_buff *skb,
+					   kernel_cap_t req_cap)
+{
+	if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap))
+		return 0;
+	return -EPERM;
+}
+
+/**
+ * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u8 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len)
+{
+	u8 val = nla_get_u8(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u16 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len)
+{
+	u16 val = nla_get_u16(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on
+ * @nla: the attribute
+ * @rem_len: remaining length
+ *
+ * Description:
+ * Return a u32 value pointed to by @nla and advance it to the next attribute.
+ *
+ */
+static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len)
+{
+	u32 val = nla_get_u32(*nla);
+	*nla = nla_next(*nla, rem_len);
+	return val;
+}
+
+/**
+ * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet.  Returns a pointer to the start of the payload buffer
+ * on success or NULL on failure.
+ *
+ */
+static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
+					   u32 pid,
+					   u32 seq,
+					   int type,
+					   u8 cmd)
+{
+	return genlmsg_put(skb,
+			   pid,
+			   seq,
+			   type,
+			   0,
+			   0,
+			   cmd,
+			   NETLBL_PROTO_VERSION);
+}
+
+/**
+ * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff
+ * @skb: the packet
+ * @pid: the PID of the receipient
+ * @seq: the sequence number
+ * @type: the generic NETLINK message family type
+ * @cmd: command
+ *
+ * Description:
+ * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr
+ * struct to the packet.
+ *
+ */
+static inline void netlbl_netlink_hdr_push(struct sk_buff *skb,
+					   u32 pid,
+					   u32 seq,
+					   int type,
+					   u8 cmd)
+
+{
+	struct nlmsghdr *nlh;
+	struct genlmsghdr *hdr;
+
+	nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN));
+	nlh->nlmsg_type = type;
+	nlh->nlmsg_len = skb->len;
+	nlh->nlmsg_flags = 0;
+	nlh->nlmsg_pid = pid;
+	nlh->nlmsg_seq = seq;
+
+	hdr = nlmsg_data(nlh);
+	hdr->cmd = cmd;
+	hdr->version = NETLBL_PROTO_VERSION;
+	hdr->reserved = 0;
+}
+
+/**
+ * netlbl_netlink_payload_len - Return the length of the payload
+ * @skb: the NETLINK buffer
+ *
+ * Description:
+ * This function returns the length of the NetLabel payload.
+ *
+ */
+static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb)
+{
+	return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN;
+}
+
+/**
+ * netlbl_netlink_payload_data - Returns a pointer to the start of the payload
+ * @skb: the NETLINK buffer
+ *
+ * Description:
+ * This function returns a pointer to the start of the NetLabel payload.
+ *
+ */
+static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb)
+{
+  return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) +
+	  GENL_HDRLEN;
+}
+
+/* NetLabel common protocol functions */
+
+void netlbl_netlink_send_ack(const struct genl_info *info,
+			     u32 genl_family,
+			     u8 ack_cmd,
+			     u32 ret_code);
+
+/* NetLabel NETLINK I/O functions */
+
+int netlbl_netlink_init(void);
+int netlbl_netlink_snd(struct sk_buff *skb, u32 pid);
+int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group);
+
+#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8b85036..d56e0d2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1147,7 +1147,7 @@
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
 	err = -ENOBUFS;
-	skb = alloc_skb(len, GFP_KERNEL);
+	skb = nlmsg_new(len, GFP_KERNEL);
 	if (skb==NULL)
 		goto out;
 
@@ -1341,19 +1341,18 @@
 	struct netlink_callback *cb;
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
-	int len;
+	int len, err = -ENOBUFS;
 	
 	skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
 	if (!skb)
-		return -ENOBUFS;
+		goto errout;
 
 	spin_lock(&nlk->cb_lock);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
-		spin_unlock(&nlk->cb_lock);
-		kfree_skb(skb);
-		return -EINVAL;
+		err = -EINVAL;
+		goto errout_skb;
 	}
 
 	len = cb->dump(skb, cb);
@@ -1365,8 +1364,12 @@
 		return 0;
 	}
 
-	nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
-	memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
+	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
+	if (!nlh)
+		goto errout_skb;
+
+	memcpy(nlmsg_data(nlh), &len, sizeof(len));
+
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
 
@@ -1378,8 +1381,11 @@
 	netlink_destroy_callback(cb);
 	return 0;
 
-nlmsg_failure:
-	return -ENOBUFS;
+errout_skb:
+	spin_unlock(&nlk->cb_lock);
+	kfree_skb(skb);
+errout:
+	return err;
 }
 
 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1431,11 +1437,11 @@
 	int size;
 
 	if (err == 0)
-		size = NLMSG_SPACE(sizeof(struct nlmsgerr));
+		size = nlmsg_total_size(sizeof(*errmsg));
 	else
-		size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
+		size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh));
 
-	skb = alloc_skb(size, GFP_KERNEL);
+	skb = nlmsg_new(size, GFP_KERNEL);
 	if (!skb) {
 		struct sock *sk;
 
@@ -1451,16 +1457,15 @@
 
 	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
 			  NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
-	errmsg = NLMSG_DATA(rep);
+	errmsg = nlmsg_data(rep);
 	errmsg->error = err;
-	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
+	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
 }
 
 static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 						     struct nlmsghdr *, int *))
 {
-	unsigned int total_len;
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -1470,8 +1475,6 @@
 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
 
-		total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
-
 		if (cb(skb, nlh, &err) < 0) {
 			/* Not an error, but we have to interrupt processing
 			 * here. Note: that in this case we do not pull
@@ -1483,7 +1486,7 @@
 		} else if (nlh->nlmsg_flags & NLM_F_ACK)
 			netlink_ack(skb, nlh, 0);
 
-		skb_pull(skb, total_len);
+		netlink_queue_skip(nlh, skb);
 	}
 
 	return 0;
@@ -1546,6 +1549,38 @@
 	skb_pull(skb, msglen);
 }
 
+/**
+ * nlmsg_notify - send a notification netlink message
+ * @sk: netlink socket to use
+ * @skb: notification message
+ * @pid: destination netlink pid for reports or 0
+ * @group: destination multicast group or 0
+ * @report: 1 to report back, 0 to disable
+ * @flags: allocation flags
+ */
+int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
+		 unsigned int group, int report, gfp_t flags)
+{
+	int err = 0;
+
+	if (group) {
+		int exclude_pid = 0;
+
+		if (report) {
+			atomic_inc(&skb->users);
+			exclude_pid = pid;
+		}
+
+		/* errors reported via destination sk->sk_err */
+		nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+	}
+
+	if (report)
+		err = nlmsg_unicast(sk, skb, pid);
+
+	return err;
+}
+
 #ifdef CONFIG_PROC_FS
 struct nl_seq_iter {
 	int link;
@@ -1727,8 +1762,6 @@
 	.owner	= THIS_MODULE,	/* for consistency 8) */
 };
 
-extern void netlink_skb_parms_too_large(void);
-
 static int __init netlink_proto_init(void)
 {
 	struct sk_buff *dummy_skb;
@@ -1740,8 +1773,7 @@
 	if (err != 0)
 		goto out;
 
-	if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
-		netlink_skb_parms_too_large();
+	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
 
 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
 	if (!nl_table)
@@ -1799,4 +1831,4 @@
 EXPORT_SYMBOL(netlink_set_nonroot);
 EXPORT_SYMBOL(netlink_unicast);
 EXPORT_SYMBOL(netlink_unregister_notifier);
-
+EXPORT_SYMBOL(nlmsg_notify);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index dddbd15..0041395 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -20,7 +20,6 @@
 	[NLA_U16]	= sizeof(u16),
 	[NLA_U32]	= sizeof(u32),
 	[NLA_U64]	= sizeof(u64),
-	[NLA_STRING]	= 1,
 	[NLA_NESTED]	= NLA_HDRLEN,
 };
 
@@ -28,7 +27,7 @@
 			struct nla_policy *policy)
 {
 	struct nla_policy *pt;
-	int minlen = 0;
+	int minlen = 0, attrlen = nla_len(nla);
 
 	if (nla->nla_type <= 0 || nla->nla_type > maxtype)
 		return 0;
@@ -37,16 +36,46 @@
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	if (pt->minlen)
-		minlen = pt->minlen;
-	else if (pt->type != NLA_UNSPEC)
-		minlen = nla_attr_minlen[pt->type];
+	switch (pt->type) {
+	case NLA_FLAG:
+		if (attrlen > 0)
+			return -ERANGE;
+		break;
 
-	if (pt->type == NLA_FLAG && nla_len(nla) > 0)
-		return -ERANGE;
+	case NLA_NUL_STRING:
+		if (pt->len)
+			minlen = min_t(int, attrlen, pt->len + 1);
+		else
+			minlen = attrlen;
 
-	if (nla_len(nla) < minlen)
-		return -ERANGE;
+		if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
+			return -EINVAL;
+		/* fall through */
+
+	case NLA_STRING:
+		if (attrlen < 1)
+			return -ERANGE;
+
+		if (pt->len) {
+			char *buf = nla_data(nla);
+
+			if (buf[attrlen - 1] == '\0')
+				attrlen--;
+
+			if (attrlen > pt->len)
+				return -ERANGE;
+		}
+		break;
+
+	default:
+		if (pt->len)
+			minlen = pt->len;
+		else if (pt->type != NLA_UNSPEC)
+			minlen = nla_attr_minlen[pt->type];
+
+		if (attrlen < minlen)
+			return -ERANGE;
+	}
 
 	return 0;
 }
@@ -255,6 +284,26 @@
 }
 
 /**
+ * __nla_reserve_nohdr - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the payload.
+ */
+void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+	void *start;
+
+	start = skb_put(skb, NLA_ALIGN(attrlen));
+	memset(start, 0, NLA_ALIGN(attrlen));
+
+	return start;
+}
+
+/**
  * nla_reserve - reserve room for attribute on the skb
  * @skb: socket buffer to reserve room on
  * @attrtype: attribute type
@@ -275,6 +324,24 @@
 }
 
 /**
+ * nla_reserve - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @len: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return NULL;
+
+	return __nla_reserve_nohdr(skb, attrlen);
+}
+
+/**
  * __nla_put - Add a netlink attribute to a socket buffer
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
@@ -293,6 +360,22 @@
 	memcpy(nla_data(nla), data, attrlen);
 }
 
+/**
+ * __nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute payload.
+ */
+void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+	void *start;
+
+	start = __nla_reserve_nohdr(skb, attrlen);
+	memcpy(start, data, attrlen);
+}
 
 /**
  * nla_put - Add a netlink attribute to a socket buffer
@@ -313,15 +396,36 @@
 	return 0;
 }
 
+/**
+ * nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -1 if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+		return -1;
+
+	__nla_put_nohdr(skb, attrlen, data);
+	return 0;
+}
 
 EXPORT_SYMBOL(nla_validate);
 EXPORT_SYMBOL(nla_parse);
 EXPORT_SYMBOL(nla_find);
 EXPORT_SYMBOL(nla_strlcpy);
 EXPORT_SYMBOL(__nla_reserve);
+EXPORT_SYMBOL(__nla_reserve_nohdr);
 EXPORT_SYMBOL(nla_reserve);
+EXPORT_SYMBOL(nla_reserve_nohdr);
 EXPORT_SYMBOL(__nla_put);
+EXPORT_SYMBOL(__nla_put_nohdr);
 EXPORT_SYMBOL(nla_put);
+EXPORT_SYMBOL(nla_put_nohdr);
 EXPORT_SYMBOL(nla_memcpy);
 EXPORT_SYMBOL(nla_memcmp);
 EXPORT_SYMBOL(nla_strcmp);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a298f77..49bc2db 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -387,7 +387,10 @@
 static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 			  u32 flags, struct sk_buff *skb, u8 cmd)
 {
+	struct nlattr *nla_ops;
+	struct genl_ops *ops;
 	void *hdr;
+	int idx = 1;
 
 	hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd,
 			  family->version);
@@ -396,6 +399,37 @@
 
 	NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name);
 	NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id);
+	NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version);
+	NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize);
+	NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr);
+
+	nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
+	if (nla_ops == NULL)
+		goto nla_put_failure;
+
+	list_for_each_entry(ops, &family->ops_list, ops_list) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(skb, idx++);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd);
+		NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags);
+
+		if (ops->policy)
+			NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY);
+
+		if (ops->doit)
+			NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT);
+
+		if (ops->dumpit)
+			NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT);
+
+		nla_nest_end(skb, nest);
+	}
+
+	nla_nest_end(skb, nla_ops);
 
 	return genlmsg_end(skb, hdr);
 
@@ -411,6 +445,9 @@
 	int chains_to_skip = cb->args[0];
 	int fams_to_skip = cb->args[1];
 
+	if (chains_to_skip != 0)
+		genl_lock();
+
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
 		if (i < chains_to_skip)
 			continue;
@@ -428,6 +465,9 @@
 	}
 
 errout:
+	if (chains_to_skip != 0)
+		genl_unlock();
+
 	cb->args[0] = i;
 	cb->args[1] = n;
 
@@ -440,7 +480,7 @@
 	struct sk_buff *skb;
 	int err;
 
-	skb = nlmsg_new(NLMSG_GOODSIZE);
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
@@ -455,7 +495,8 @@
 
 static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
 	[CTRL_ATTR_FAMILY_ID]	= { .type = NLA_U16 },
-	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_STRING },
+	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_NUL_STRING,
+				    .len = GENL_NAMSIZ - 1 },
 };
 
 static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
@@ -470,12 +511,9 @@
 	}
 
 	if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
-		char name[GENL_NAMSIZ];
+		char *name;
 
-		if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME],
-				GENL_NAMSIZ) >= GENL_NAMSIZ)
-			goto errout;
-
+		name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
 		res = genl_family_find_byname(name);
 	}
 
@@ -510,7 +548,7 @@
 		if (IS_ERR(msg))
 			return PTR_ERR(msg);
 
-		genlmsg_multicast(msg, 0, GENL_ID_CTRL);
+		genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
 		break;
 	}
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4172a52..f4ccb90 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -427,21 +427,24 @@
 }
 #endif
 
-static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+static inline int run_filter(struct sk_buff *skb, struct sock *sk,
+							unsigned *snaplen)
 {
 	struct sk_filter *filter;
+	int err = 0;
 
-	bh_lock_sock(sk);
-	filter = sk->sk_filter;
-	/*
-	 * Our caller already checked that filter != NULL but we need to
-	 * verify that under bh_lock_sock() to be safe
-	 */
-	if (likely(filter != NULL))
-		res = sk_run_filter(skb, filter->insns, filter->len);
-	bh_unlock_sock(sk);
+	rcu_read_lock_bh();
+	filter = rcu_dereference(sk->sk_filter);
+	if (filter != NULL) {
+		err = sk_run_filter(skb, filter->insns, filter->len);
+		if (!err)
+			err = -EPERM;
+		else if (*snaplen > err)
+			*snaplen = err;
+	}
+	rcu_read_unlock_bh();
 
-	return res;
+	return err;
 }
 
 /*
@@ -491,13 +494,8 @@
 
 	snaplen = skb->len;
 
-	if (sk->sk_filter) {
-		unsigned res = run_filter(skb, sk, snaplen);
-		if (res == 0)
-			goto drop_n_restore;
-		if (snaplen > res)
-			snaplen = res;
-	}
+	if (run_filter(skb, sk, &snaplen) < 0)
+		goto drop_n_restore;
 
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 	    (unsigned)sk->sk_rcvbuf)
@@ -586,20 +584,15 @@
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
-			if (skb->ip_summed == CHECKSUM_HW)
+			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				status |= TP_STATUS_CSUMNOTREADY;
 		}
 	}
 
 	snaplen = skb->len;
 
-	if (sk->sk_filter) {
-		unsigned res = run_filter(skb, sk, snaplen);
-		if (res == 0)
-			goto drop_n_restore;
-		if (snaplen > res)
-			snaplen = res;
-	}
+	if (run_filter(skb, sk, &snaplen) < 0)
+		goto drop_n_restore;
 
 	if (sk->sk_type == SOCK_DGRAM) {
 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a2587b5..835070e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -33,16 +33,230 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
-#if 0 /* control */
-#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
-#else
-#define DPRINTK(format, args...)
+void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+	struct tcf_common **p1p;
+
+	for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+		if (*p1p == p) {
+			write_lock_bh(hinfo->lock);
+			*p1p = p->tcfc_next;
+			write_unlock_bh(hinfo->lock);
+#ifdef CONFIG_NET_ESTIMATOR
+			gen_kill_estimator(&p->tcfc_bstats,
+					   &p->tcfc_rate_est);
 #endif
-#if 0 /* data */
-#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args)
-#else
-#define D2PRINTK(format, args...)
+			kfree(p);
+			return;
+		}
+	}
+	BUG_TRAP(0);
+}
+EXPORT_SYMBOL(tcf_hash_destroy);
+
+int tcf_hash_release(struct tcf_common *p, int bind,
+		     struct tcf_hashinfo *hinfo)
+{
+	int ret = 0;
+
+	if (p) {
+		if (bind)
+			p->tcfc_bindcnt--;
+
+		p->tcfc_refcnt--;
+	       	if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
+			tcf_hash_destroy(p, hinfo);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(tcf_hash_release);
+
+static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			   struct tc_action *a, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p;
+	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+	struct rtattr *r ;
+
+	read_lock(hinfo->lock);
+
+	s_i = cb->args[0];
+
+	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+		for (; p; p = p->tcfc_next) {
+			index++;
+			if (index < s_i)
+				continue;
+			a->priv = p;
+			a->order = n_i;
+			r = (struct rtattr*) skb->tail;
+			RTA_PUT(skb, a->order, 0, NULL);
+			err = tcf_action_dump_1(skb, a, 0, 0);
+			if (err < 0) {
+				index--;
+				skb_trim(skb, (u8*)r - skb->data);
+				goto done;
+			}
+			r->rta_len = skb->tail - (u8*)r;
+			n_i++;
+			if (n_i >= TCA_ACT_MAX_PRIO)
+				goto done;
+		}
+	}
+done:
+	read_unlock(hinfo->lock);
+	if (n_i)
+		cb->args[0] += n_i;
+	return n_i;
+
+rtattr_failure:
+	skb_trim(skb, (u8*)r - skb->data);
+	goto done;
+}
+
+static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
+			  struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p, *s_p;
+	struct rtattr *r ;
+	int i= 0, n_i = 0;
+
+	r = (struct rtattr*) skb->tail;
+	RTA_PUT(skb, a->order, 0, NULL);
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
+	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+		while (p != NULL) {
+			s_p = p->tcfc_next;
+			if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
+				 module_put(a->ops->owner);
+			n_i++;
+			p = s_p;
+		}
+	}
+	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
+	r->rta_len = skb->tail - (u8*)r;
+
+	return n_i;
+rtattr_failure:
+	skb_trim(skb, (u8*)r - skb->data);
+	return -EINVAL;
+}
+
+int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+		       int type, struct tc_action *a)
+{
+	struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
+	if (type == RTM_DELACTION) {
+		return tcf_del_walker(skb, a, hinfo);
+	} else if (type == RTM_GETACTION) {
+		return tcf_dump_walker(skb, cb, a, hinfo);
+	} else {
+		printk("tcf_generic_walker: unknown action %d\n", type);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(tcf_generic_walker);
+
+struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p;
+
+	read_lock(hinfo->lock);
+	for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+	     p = p->tcfc_next) {
+		if (p->tcfc_index == index)
+			break;
+	}
+	read_unlock(hinfo->lock);
+
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_lookup);
+
+u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+	u32 val = *idx_gen;
+
+	do {
+		if (++val == 0)
+			val = 1;
+	} while (tcf_hash_lookup(val, hinfo));
+
+	return (*idx_gen = val);
+}
+EXPORT_SYMBOL(tcf_hash_new_index);
+
+int tcf_hash_search(struct tc_action *a, u32 index)
+{
+	struct tcf_hashinfo *hinfo = a->ops->hinfo;
+	struct tcf_common *p = tcf_hash_lookup(index, hinfo);
+
+	if (p) {
+		a->priv = p;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcf_hash_search);
+
+struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
+				  struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p = NULL;
+	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+		if (bind) {
+			p->tcfc_bindcnt++;
+			p->tcfc_refcnt++;
+		}
+		a->priv = p;
+	}
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_check);
+
+struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+
+	if (unlikely(!p))
+		return p;
+	p->tcfc_refcnt = 1;
+	if (bind)
+		p->tcfc_bindcnt = 1;
+
+	spin_lock_init(&p->tcfc_lock);
+	p->tcfc_stats_lock = &p->tcfc_lock;
+	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
+	p->tcfc_tm.install = jiffies;
+	p->tcfc_tm.lastuse = jiffies;
+#ifdef CONFIG_NET_ESTIMATOR
+	if (est)
+		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+				  p->tcfc_stats_lock, est);
 #endif
+	a->priv = (void *) p;
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_create);
+
+void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+
+	write_lock_bh(hinfo->lock);
+	p->tcfc_next = hinfo->htab[h];
+	hinfo->htab[h] = p;
+	write_unlock_bh(hinfo->lock);
+}
+EXPORT_SYMBOL(tcf_hash_insert);
 
 static struct tc_action_ops *act_base = NULL;
 static DEFINE_RWLOCK(act_mod_lock);
@@ -155,9 +369,6 @@
 
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
-		D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n",
-		         skb, skb->input_dev ? skb->input_dev->name : "xxx",
-		         skb->dev->name);
 		ret = TC_ACT_OK;
 		goto exec_done;
 	}
@@ -187,8 +398,6 @@
 
 	for (a = act; a; a = act) {
 		if (a->ops && a->ops->cleanup) {
-			DPRINTK("tcf_action_destroy destroying %p next %p\n",
-			        a, a->next);
 			if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
 				module_put(a->ops->owner);
 			act = act->next;
@@ -331,7 +540,6 @@
 	if (*err != ACT_P_CREATED)
 		module_put(a_o->owner);
 	a->ops = a_o;
-	DPRINTK("tcf_action_init_1: successfull %s\n", act_name);
 
 	*err = 0;
 	return a;
@@ -392,12 +600,12 @@
 	if (compat_mode) {
 		if (a->type == TCA_OLD_COMPAT)
 			err = gnet_stats_start_copy_compat(skb, 0,
-				TCA_STATS, TCA_XSTATS, h->stats_lock, &d);
+				TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d);
 		else
 			return 0;
 	} else
 		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
-			h->stats_lock, &d);
+			h->tcf_stats_lock, &d);
 
 	if (err < 0)
 		goto errout;
@@ -406,11 +614,11 @@
 		if (a->ops->get_stats(skb, a) < 0)
 			goto errout;
 
-	if (gnet_stats_copy_basic(&d, &h->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
 #ifdef CONFIG_NET_ESTIMATOR
-	    gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
 #endif
-	    gnet_stats_copy_queue(&d, &h->qstats) < 0)
+	    gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
 		goto errout;
 
 	if (gnet_stats_finish_copy(&d) < 0)
@@ -459,7 +667,6 @@
 act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 {
 	struct sk_buff *skb;
-	int err = 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
@@ -468,10 +675,8 @@
 		kfree_skb(skb);
 		return -EINVAL;
 	}
-	err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
-	if (err > 0)
-		err = 0;
-	return err;
+
+	return rtnl_unicast(skb, pid);
 }
 
 static struct tc_action *
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e75a147..6cff566 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -34,48 +34,43 @@
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
 
-/* use generic hash table */
-#define MY_TAB_SIZE	16
-#define MY_TAB_MASK	15
-
-static u32 idx_gen;
-static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE];
+#define GACT_TAB_MASK	15
+static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
+static u32 gact_idx_gen;
 static DEFINE_RWLOCK(gact_lock);
 
-/* ovewrride the defaults */
-#define tcf_st		tcf_gact
-#define tc_st		tc_gact
-#define tcf_t_lock	gact_lock
-#define tcf_ht		tcf_gact_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
+static struct tcf_hashinfo gact_hash_info = {
+	.htab	=	tcf_gact_ht,
+	.hmask	=	GACT_TAB_MASK,
+	.lock	=	&gact_lock,
+};
 
 #ifdef CONFIG_GACT_PROB
-static int gact_net_rand(struct tcf_gact *p)
+static int gact_net_rand(struct tcf_gact *gact)
 {
-	if (net_random()%p->pval)
-		return p->action;
-	return p->paction;
+	if (net_random() % gact->tcfg_pval)
+		return gact->tcf_action;
+	return gact->tcfg_paction;
 }
 
-static int gact_determ(struct tcf_gact *p)
+static int gact_determ(struct tcf_gact *gact)
 {
-	if (p->bstats.packets%p->pval)
-		return p->action;
-	return p->paction;
+	if (gact->tcf_bstats.packets % gact->tcfg_pval)
+		return gact->tcf_action;
+	return gact->tcfg_paction;
 }
 
-typedef int (*g_rand)(struct tcf_gact *p);
+typedef int (*g_rand)(struct tcf_gact *gact);
 static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
-#endif
+#endif /* CONFIG_GACT_PROB */
 
 static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
                          struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_GACT_MAX];
 	struct tc_gact *parm;
-	struct tcf_gact *p;
+	struct tcf_gact *gact;
+	struct tcf_common *pc;
 	int ret = 0;
 
 	if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
@@ -94,105 +89,106 @@
 		return -EOPNOTSUPP;
 #endif
 
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+	pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+				     bind, &gact_idx_gen, &gact_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
-			tcf_hash_release(p, bind);
+			tcf_hash_release(pc, bind, &gact_hash_info);
 			return -EEXIST;
 		}
 	}
 
-	spin_lock_bh(&p->lock);
-	p->action = parm->action;
+	gact = to_gact(pc);
+
+	spin_lock_bh(&gact->tcf_lock);
+	gact->tcf_action = parm->action;
 #ifdef CONFIG_GACT_PROB
 	if (tb[TCA_GACT_PROB-1] != NULL) {
 		struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
-		p->paction = p_parm->paction;
-		p->pval    = p_parm->pval;
-		p->ptype   = p_parm->ptype;
+		gact->tcfg_paction = p_parm->paction;
+		gact->tcfg_pval    = p_parm->pval;
+		gact->tcfg_ptype   = p_parm->ptype;
 	}
 #endif
-	spin_unlock_bh(&p->lock);
+	spin_unlock_bh(&gact->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &gact_hash_info);
 	return ret;
 }
 
-static int
-tcf_gact_cleanup(struct tc_action *a, int bind)
+static int tcf_gact_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_gact *gact = a->priv;
 
-	if (p != NULL)
-		return tcf_hash_release(p, bind);
+	if (gact)
+		return tcf_hash_release(&gact->common, bind, &gact_hash_info);
 	return 0;
 }
 
-static int
-tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
-	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_gact *gact = a->priv;
 	int action = TC_ACT_SHOT;
 
-	spin_lock(&p->lock);
+	spin_lock(&gact->tcf_lock);
 #ifdef CONFIG_GACT_PROB
-	if (p->ptype && gact_rand[p->ptype] != NULL)
-		action = gact_rand[p->ptype](p);
+	if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+		action = gact_rand[gact->tcfg_ptype](gact);
 	else
-		action = p->action;
+		action = gact->tcf_action;
 #else
-	action = p->action;
+	action = gact->tcf_action;
 #endif
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	gact->tcf_bstats.bytes += skb->len;
+	gact->tcf_bstats.packets++;
 	if (action == TC_ACT_SHOT)
-		p->qstats.drops++;
-	p->tm.lastuse = jiffies;
-	spin_unlock(&p->lock);
+		gact->tcf_qstats.drops++;
+	gact->tcf_tm.lastuse = jiffies;
+	spin_unlock(&gact->tcf_lock);
 
 	return action;
 }
 
-static int
-tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb->tail;
 	struct tc_gact opt;
-	struct tcf_gact *p = PRIV(a, gact);
+	struct tcf_gact *gact = a->priv;
 	struct tcf_t t;
 
-	opt.index = p->index;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	opt.action = p->action;
+	opt.index = gact->tcf_index;
+	opt.refcnt = gact->tcf_refcnt - ref;
+	opt.bindcnt = gact->tcf_bindcnt - bind;
+	opt.action = gact->tcf_action;
 	RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
 #ifdef CONFIG_GACT_PROB
-	if (p->ptype) {
+	if (gact->tcfg_ptype) {
 		struct tc_gact_p p_opt;
-		p_opt.paction = p->paction;
-		p_opt.pval = p->pval;
-		p_opt.ptype = p->ptype;
+		p_opt.paction = gact->tcfg_paction;
+		p_opt.pval = gact->tcfg_pval;
+		p_opt.ptype = gact->tcfg_ptype;
 		RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
 	}
 #endif
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
+	t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
 	RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
 	return skb->len;
 
-      rtattr_failure:
+rtattr_failure:
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
 
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
+	.hinfo		=	&gact_hash_info,
 	.type		=	TCA_ACT_GACT,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -208,8 +204,7 @@
 MODULE_DESCRIPTION("Generic Classifier actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-gact_init_module(void)
+static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
 	printk("GACT probability on\n");
@@ -219,8 +214,7 @@
 	return tcf_register_action(&act_gact_ops);
 }
 
-static void __exit
-gact_cleanup_module(void)
+static void __exit gact_cleanup_module(void)
 {
 	tcf_unregister_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d799e01..d8c9310 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -38,25 +38,19 @@
 
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-/* use generic hash table */
-#define MY_TAB_SIZE     16
-#define MY_TAB_MASK     15
 
-static u32 idx_gen;
-static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE];
-/* ipt hash table lock */
+#define IPT_TAB_MASK     15
+static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
+static u32 ipt_idx_gen;
 static DEFINE_RWLOCK(ipt_lock);
 
-/* ovewrride the defaults */
-#define tcf_st		tcf_ipt
-#define tcf_t_lock	ipt_lock
-#define tcf_ht		tcf_ipt_ht
+static struct tcf_hashinfo ipt_hash_info = {
+	.htab	=	tcf_ipt_ht,
+	.hmask	=	IPT_TAB_MASK,
+	.lock	=	&ipt_lock,
+};
 
-#define CONFIG_NET_ACT_INIT
-#include <net/pkt_act.h>
-
-static int
-ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
 {
 	struct ipt_target *target;
 	int ret = 0;
@@ -65,7 +59,6 @@
 	if (!target)
 		return -ENOENT;
 
-	DPRINTK("ipt_init_target: found %s\n", target->name);
 	t->u.kernel.target = target;
 
 	ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
@@ -76,10 +69,7 @@
 	if (t->u.kernel.target->checkentry
 	    && !t->u.kernel.target->checkentry(table, NULL,
 		    			       t->u.kernel.target, t->data,
-					       t->u.target_size - sizeof(*t),
 					       hook)) {
-		DPRINTK("ipt_init_target: check failed for `%s'.\n",
-			t->u.kernel.target->name);
 		module_put(t->u.kernel.target->me);
 		ret = -EINVAL;
 	}
@@ -87,40 +77,37 @@
 	return ret;
 }
 
-static void
-ipt_destroy_target(struct ipt_entry_target *t)
+static void ipt_destroy_target(struct ipt_entry_target *t)
 {
 	if (t->u.kernel.target->destroy)
-		t->u.kernel.target->destroy(t->u.kernel.target, t->data,
-		                            t->u.target_size - sizeof(*t));
+		t->u.kernel.target->destroy(t->u.kernel.target, t->data);
         module_put(t->u.kernel.target->me);
 }
 
-static int
-tcf_ipt_release(struct tcf_ipt *p, int bind)
+static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
 {
 	int ret = 0;
-	if (p) {
+	if (ipt) {
 		if (bind)
-			p->bindcnt--;
-		p->refcnt--;
-		if (p->bindcnt <= 0 && p->refcnt <= 0) {
-			ipt_destroy_target(p->t);
-			kfree(p->tname);
-			kfree(p->t);
-			tcf_hash_destroy(p);
+			ipt->tcf_bindcnt--;
+		ipt->tcf_refcnt--;
+		if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
+			ipt_destroy_target(ipt->tcfi_t);
+			kfree(ipt->tcfi_tname);
+			kfree(ipt->tcfi_t);
+			tcf_hash_destroy(&ipt->common, &ipt_hash_info);
 			ret = ACT_P_DELETED;
 		}
 	}
 	return ret;
 }
 
-static int
-tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
-             int ovr, int bind)
+static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
+			struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_IPT_MAX];
-	struct tcf_ipt *p;
+	struct tcf_ipt *ipt;
+	struct tcf_common *pc;
 	struct ipt_entry_target *td, *t;
 	char *tname;
 	int ret = 0, err;
@@ -144,49 +131,51 @@
 	    RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
 		index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
 
-	p = tcf_hash_check(index, a, ovr, bind);
-	if (p == NULL) {
-		p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+	pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
+				     &ipt_idx_gen, &ipt_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
-			tcf_ipt_release(p, bind);
+			tcf_ipt_release(to_ipt(pc), bind);
 			return -EEXIST;
 		}
 	}
+	ipt = to_ipt(pc);
 
 	hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
 
 	err = -ENOMEM;
 	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
-	if (tname == NULL)
+	if (unlikely(!tname))
 		goto err1;
 	if (tb[TCA_IPT_TABLE - 1] == NULL ||
 	    rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
 		strcpy(tname, "mangle");
 
 	t = kmalloc(td->u.target_size, GFP_KERNEL);
-	if (t == NULL)
+	if (unlikely(!t))
 		goto err2;
 	memcpy(t, td, td->u.target_size);
 
 	if ((err = ipt_init_target(t, tname, hook)) < 0)
 		goto err3;
 
-	spin_lock_bh(&p->lock);
+	spin_lock_bh(&ipt->tcf_lock);
 	if (ret != ACT_P_CREATED) {
-		ipt_destroy_target(p->t);
-		kfree(p->tname);
-		kfree(p->t);
+		ipt_destroy_target(ipt->tcfi_t);
+		kfree(ipt->tcfi_tname);
+		kfree(ipt->tcfi_t);
 	}
-	p->tname = tname;
-	p->t     = t;
-	p->hook  = hook;
-	spin_unlock_bh(&p->lock);
+	ipt->tcfi_tname = tname;
+	ipt->tcfi_t     = t;
+	ipt->tcfi_hook  = hook;
+	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &ipt_hash_info);
 	return ret;
 
 err3:
@@ -194,33 +183,32 @@
 err2:
 	kfree(tname);
 err1:
-	kfree(p);
+	kfree(pc);
 	return err;
 }
 
-static int
-tcf_ipt_cleanup(struct tc_action *a, int bind)
+static int tcf_ipt_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_ipt *p = PRIV(a, ipt);
-	return tcf_ipt_release(p, bind);
+	struct tcf_ipt *ipt = a->priv;
+	return tcf_ipt_release(ipt, bind);
 }
 
-static int
-tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
+		   struct tcf_result *res)
 {
 	int ret = 0, result = 0;
-	struct tcf_ipt *p = PRIV(a, ipt);
+	struct tcf_ipt *ipt = a->priv;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			return TC_ACT_UNSPEC;
 	}
 
-	spin_lock(&p->lock);
+	spin_lock(&ipt->tcf_lock);
 
-	p->tm.lastuse = jiffies;
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	ipt->tcf_tm.lastuse = jiffies;
+	ipt->tcf_bstats.bytes += skb->len;
+	ipt->tcf_bstats.packets++;
 
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
@@ -229,16 +217,17 @@
 	/* iptables targets take a double skb pointer in case the skb
 	 * needs to be replaced. We don't own the skb, so this must not
 	 * happen. The pskb_expand_head above should make sure of this */
-	ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook,
-					    p->t->u.kernel.target, p->t->data,
-					    NULL);
+	ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+						   ipt->tcfi_hook,
+						   ipt->tcfi_t->u.kernel.target,
+						   ipt->tcfi_t->data);
 	switch (ret) {
 	case NF_ACCEPT:
 		result = TC_ACT_OK;
 		break;
 	case NF_DROP:
 		result = TC_ACT_SHOT;
-		p->qstats.drops++;
+		ipt->tcf_qstats.drops++;
 		break;
 	case IPT_CONTINUE:
 		result = TC_ACT_PIPE;
@@ -249,53 +238,46 @@
 		result = TC_POLICE_OK;
 		break;
 	}
-	spin_unlock(&p->lock);
+	spin_unlock(&ipt->tcf_lock);
 	return result;
 
 }
 
-static int
-tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
+	unsigned char *b = skb->tail;
+	struct tcf_ipt *ipt = a->priv;
 	struct ipt_entry_target *t;
 	struct tcf_t tm;
 	struct tc_cnt c;
-	unsigned char *b = skb->tail;
-	struct tcf_ipt *p = PRIV(a, ipt);
 
 	/* for simple targets kernel size == user size
 	** user name = target name
 	** for foolproof you need to not assume this
 	*/
 
-	t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC);
-	if (t == NULL)
+	t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
+	if (unlikely(!t))
 		goto rtattr_failure;
 
-	c.bindcnt = p->bindcnt - bind;
-	c.refcnt = p->refcnt - ref;
-	memcpy(t, p->t, p->t->u.user.target_size);
-	strcpy(t->u.user.name, p->t->u.kernel.target->name);
+	c.bindcnt = ipt->tcf_bindcnt - bind;
+	c.refcnt = ipt->tcf_refcnt - ref;
+	memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size);
+	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
-	DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname,
-		strlen(p->tname));
-	DPRINTK("\tdump target name %s size %d size user %d "
-	        "data[0] %x data[1] %x\n", p->t->u.kernel.target->name,
-	        p->t->u.target_size, p->t->u.user.target_size,
-	        p->t->data[0], p->t->data[1]);
-	RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t);
-	RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index);
-	RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook);
+	RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
+	RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
+	RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
 	RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
-	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname);
-	tm.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	tm.expires = jiffies_to_clock_t(p->tm.expires);
+	RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
+	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
+	tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
 	RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
 	kfree(t);
 	return skb->len;
 
-      rtattr_failure:
+rtattr_failure:
 	skb_trim(skb, b - skb->data);
 	kfree(t);
 	return -1;
@@ -303,6 +285,7 @@
 
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
+	.hinfo		=	&ipt_hash_info,
 	.type		=	TCA_ACT_IPT,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -318,14 +301,12 @@
 MODULE_DESCRIPTION("Iptables target actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-ipt_init_module(void)
+static int __init ipt_init_module(void)
 {
 	return tcf_register_action(&act_ipt_ops);
 }
 
-static void __exit
-ipt_cleanup_module(void)
+static void __exit ipt_cleanup_module(void)
 {
 	tcf_unregister_action(&act_ipt_ops);
 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fc56204..4838972 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -39,46 +39,39 @@
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
 
-
-/* use generic hash table */
-#define MY_TAB_SIZE     8
-#define MY_TAB_MASK     (MY_TAB_SIZE - 1)
-static u32 idx_gen;
-static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE];
+#define MIRRED_TAB_MASK     7
+static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
+static u32 mirred_idx_gen;
 static DEFINE_RWLOCK(mirred_lock);
 
-/* ovewrride the defaults */
-#define tcf_st		tcf_mirred
-#define tc_st		tc_mirred
-#define tcf_t_lock	mirred_lock
-#define tcf_ht		tcf_mirred_ht
+static struct tcf_hashinfo mirred_hash_info = {
+	.htab	=	tcf_mirred_ht,
+	.hmask	=	MIRRED_TAB_MASK,
+	.lock	=	&mirred_lock,
+};
 
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
-
-static inline int
-tcf_mirred_release(struct tcf_mirred *p, int bind)
+static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
-	if (p) {
+	if (m) {
 		if (bind)
-			p->bindcnt--;
-		p->refcnt--;
-		if(!p->bindcnt && p->refcnt <= 0) {
-			dev_put(p->dev);
-			tcf_hash_destroy(p);
+			m->tcf_bindcnt--;
+		m->tcf_refcnt--;
+		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+			dev_put(m->tcfm_dev);
+			tcf_hash_destroy(&m->common, &mirred_hash_info);
 			return 1;
 		}
 	}
 	return 0;
 }
 
-static int
-tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
-                int ovr, int bind)
+static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est,
+			   struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_MIRRED_MAX];
 	struct tc_mirred *parm;
-	struct tcf_mirred *p;
+	struct tcf_mirred *m;
+	struct tcf_common *pc;
 	struct net_device *dev = NULL;
 	int ret = 0;
 	int ok_push = 0;
@@ -110,64 +103,62 @@
 		}
 	}
 
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
+	pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info);
+	if (!pc) {
 		if (!parm->ifindex)
 			return -EINVAL;
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
+				     &mirred_idx_gen, &mirred_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
-			tcf_mirred_release(p, bind);
+			tcf_mirred_release(to_mirred(pc), bind);
 			return -EEXIST;
 		}
 	}
+	m = to_mirred(pc);
 
-	spin_lock_bh(&p->lock);
-	p->action = parm->action;
-	p->eaction = parm->eaction;
+	spin_lock_bh(&m->tcf_lock);
+	m->tcf_action = parm->action;
+	m->tcfm_eaction = parm->eaction;
 	if (parm->ifindex) {
-		p->ifindex = parm->ifindex;
+		m->tcfm_ifindex = parm->ifindex;
 		if (ret != ACT_P_CREATED)
-			dev_put(p->dev);
-		p->dev = dev;
+			dev_put(m->tcfm_dev);
+		m->tcfm_dev = dev;
 		dev_hold(dev);
-		p->ok_push = ok_push;
+		m->tcfm_ok_push = ok_push;
 	}
-	spin_unlock_bh(&p->lock);
+	spin_unlock_bh(&m->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &mirred_hash_info);
 
-	DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s "
-	        "ifindex %d\n", parm->index, parm->action, parm->eaction,
-	        dev->name, parm->ifindex);
 	return ret;
 }
 
-static int
-tcf_mirred_cleanup(struct tc_action *a, int bind)
+static int tcf_mirred_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_mirred *p = PRIV(a, mirred);
+	struct tcf_mirred *m = a->priv;
 
-	if (p != NULL)
-		return tcf_mirred_release(p, bind);
+	if (m)
+		return tcf_mirred_release(m, bind);
 	return 0;
 }
 
-static int
-tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
+		      struct tcf_result *res)
 {
-	struct tcf_mirred *p = PRIV(a, mirred);
+	struct tcf_mirred *m = a->priv;
 	struct net_device *dev;
 	struct sk_buff *skb2 = NULL;
 	u32 at = G_TC_AT(skb->tc_verd);
 
-	spin_lock(&p->lock);
+	spin_lock(&m->tcf_lock);
 
-	dev = p->dev;
-	p->tm.lastuse = jiffies;
+	dev = m->tcfm_dev;
+	m->tcf_tm.lastuse = jiffies;
 
 	if (!(dev->flags&IFF_UP) ) {
 		if (net_ratelimit())
@@ -176,10 +167,10 @@
 bad_mirred:
 		if (skb2 != NULL)
 			kfree_skb(skb2);
-		p->qstats.overlimits++;
-		p->bstats.bytes += skb->len;
-		p->bstats.packets++;
-		spin_unlock(&p->lock);
+		m->tcf_qstats.overlimits++;
+		m->tcf_bstats.bytes += skb->len;
+		m->tcf_bstats.packets++;
+		spin_unlock(&m->tcf_lock);
 		/* should we be asking for packet to be dropped?
 		 * may make sense for redirect case only
 		*/
@@ -189,59 +180,59 @@
 	skb2 = skb_clone(skb, GFP_ATOMIC);
 	if (skb2 == NULL)
 		goto bad_mirred;
-	if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) {
+	if (m->tcfm_eaction != TCA_EGRESS_MIRROR &&
+	    m->tcfm_eaction != TCA_EGRESS_REDIR) {
 		if (net_ratelimit())
-			printk("tcf_mirred unknown action %d\n", p->eaction);
+			printk("tcf_mirred unknown action %d\n",
+			       m->tcfm_eaction);
 		goto bad_mirred;
 	}
 
-	p->bstats.bytes += skb2->len;
-	p->bstats.packets++;
+	m->tcf_bstats.bytes += skb2->len;
+	m->tcf_bstats.packets++;
 	if (!(at & AT_EGRESS))
-		if (p->ok_push)
+		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
 
 	/* mirror is always swallowed */
-	if (p->eaction != TCA_EGRESS_MIRROR)
+	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
 	skb2->dev = dev;
 	skb2->input_dev = skb->dev;
 	dev_queue_xmit(skb2);
-	spin_unlock(&p->lock);
-	return p->action;
+	spin_unlock(&m->tcf_lock);
+	return m->tcf_action;
 }
 
-static int
-tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char *b = skb->tail;
+	struct tcf_mirred *m = a->priv;
 	struct tc_mirred opt;
-	struct tcf_mirred *p = PRIV(a, mirred);
 	struct tcf_t t;
 
-	opt.index = p->index;
-	opt.action = p->action;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	opt.eaction = p->eaction;
-	opt.ifindex = p->ifindex;
-	DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n",
-	         p->index, p->action, p->eaction, p->ifindex);
+	opt.index = m->tcf_index;
+	opt.action = m->tcf_action;
+	opt.refcnt = m->tcf_refcnt - ref;
+	opt.bindcnt = m->tcf_bindcnt - bind;
+	opt.eaction = m->tcfm_eaction;
+	opt.ifindex = m->tcfm_ifindex;
 	RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
+	t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
 	RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
 	return skb->len;
 
-      rtattr_failure:
+rtattr_failure:
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
 
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
+	.hinfo		=	&mirred_hash_info,
 	.type		=	TCA_ACT_MIRRED,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -257,15 +248,13 @@
 MODULE_DESCRIPTION("Device Mirror/redirect actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-mirred_init_module(void)
+static int __init mirred_init_module(void)
 {
 	printk("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
-static void __exit
-mirred_cleanup_module(void)
+static void __exit mirred_cleanup_module(void)
 {
 	tcf_unregister_action(&act_mirred_ops);
 }
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index f257475..8ac65c2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -33,32 +33,25 @@
 #include <linux/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_pedit.h>
 
-
-#define PEDIT_DEB 1
-
-/* use generic hash table */
-#define MY_TAB_SIZE     16
-#define MY_TAB_MASK     15
-static u32 idx_gen;
-static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE];
+#define PEDIT_TAB_MASK	15
+static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
+static u32 pedit_idx_gen;
 static DEFINE_RWLOCK(pedit_lock);
 
-#define tcf_st		tcf_pedit
-#define tc_st		tc_pedit
-#define tcf_t_lock	pedit_lock
-#define tcf_ht		tcf_pedit_ht
+static struct tcf_hashinfo pedit_hash_info = {
+	.htab	=	tcf_pedit_ht,
+	.hmask	=	PEDIT_TAB_MASK,
+	.lock	=	&pedit_lock,
+};
 
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
-
-static int
-tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,
-               int ovr, int bind)
+static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est,
+			  struct tc_action *a, int ovr, int bind)
 {
 	struct rtattr *tb[TCA_PEDIT_MAX];
 	struct tc_pedit *parm;
 	int ret = 0;
 	struct tcf_pedit *p;
+	struct tcf_common *pc;
 	struct tc_pedit_key *keys = NULL;
 	int ksize;
 
@@ -73,54 +66,56 @@
 	if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize)
 		return -EINVAL;
 
-	p = tcf_hash_check(parm->index, a, ovr, bind);
-	if (p == NULL) {
+	pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
+	if (!pc) {
 		if (!parm->nkeys)
 			return -EINVAL;
-		p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind);
-		if (p == NULL)
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+				     &pedit_idx_gen, &pedit_hash_info);
+		if (unlikely(!pc))
 			return -ENOMEM;
+		p = to_pedit(pc);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			kfree(p);
+			kfree(pc);
 			return -ENOMEM;
 		}
 		ret = ACT_P_CREATED;
 	} else {
+		p = to_pedit(pc);
 		if (!ovr) {
-			tcf_hash_release(p, bind);
+			tcf_hash_release(pc, bind, &pedit_hash_info);
 			return -EEXIST;
 		}
-		if (p->nkeys && p->nkeys != parm->nkeys) {
+		if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
 			keys = kmalloc(ksize, GFP_KERNEL);
 			if (keys == NULL)
 				return -ENOMEM;
 		}
 	}
 
-	spin_lock_bh(&p->lock);
-	p->flags = parm->flags;
-	p->action = parm->action;
+	spin_lock_bh(&p->tcf_lock);
+	p->tcfp_flags = parm->flags;
+	p->tcf_action = parm->action;
 	if (keys) {
-		kfree(p->keys);
-		p->keys = keys;
-		p->nkeys = parm->nkeys;
+		kfree(p->tcfp_keys);
+		p->tcfp_keys = keys;
+		p->tcfp_nkeys = parm->nkeys;
 	}
-	memcpy(p->keys, parm->keys, ksize);
-	spin_unlock_bh(&p->lock);
+	memcpy(p->tcfp_keys, parm->keys, ksize);
+	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(p);
+		tcf_hash_insert(pc, &pedit_hash_info);
 	return ret;
 }
 
-static int
-tcf_pedit_cleanup(struct tc_action *a, int bind)
+static int tcf_pedit_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_pedit *p = PRIV(a, pedit);
+	struct tcf_pedit *p = a->priv;
 
-	if (p != NULL) {
-		struct tc_pedit_key *keys = p->keys;
-		if (tcf_hash_release(p, bind)) {
+	if (p) {
+		struct tc_pedit_key *keys = p->tcfp_keys;
+		if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
 			kfree(keys);
 			return 1;
 		}
@@ -128,30 +123,30 @@
 	return 0;
 }
 
-static int
-tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
+		     struct tcf_result *res)
 {
-	struct tcf_pedit *p = PRIV(a, pedit);
+	struct tcf_pedit *p = a->priv;
 	int i, munged = 0;
 	u8 *pptr;
 
 	if (!(skb->tc_verd & TC_OK2MUNGE)) {
 		/* should we set skb->cloned? */
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-			return p->action;
+			return p->tcf_action;
 		}
 	}
 
 	pptr = skb->nh.raw;
 
-	spin_lock(&p->lock);
+	spin_lock(&p->tcf_lock);
 
-	p->tm.lastuse = jiffies;
+	p->tcf_tm.lastuse = jiffies;
 
-	if (p->nkeys > 0) {
-		struct tc_pedit_key *tkey = p->keys;
+	if (p->tcfp_nkeys > 0) {
+		struct tc_pedit_key *tkey = p->tcfp_keys;
 
-		for (i = p->nkeys; i > 0; i--, tkey++) {
+		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
 			u32 *ptr;
 			int offset = tkey->off;
 
@@ -169,7 +164,8 @@
 				printk("offset must be on 32 bit boundaries\n");
 				goto bad;
 			}
-			if (skb->len < 0 || (offset > 0 && offset > skb->len)) {
+			if (skb->len < 0 ||
+			    (offset > 0 && offset > skb->len)) {
 				printk("offset %d cant exceed pkt length %d\n",
 				       offset, skb->len);
 				goto bad;
@@ -185,63 +181,47 @@
 			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
 		goto done;
 	} else {
-		printk("pedit BUG: index %d\n",p->index);
+		printk("pedit BUG: index %d\n", p->tcf_index);
 	}
 
 bad:
-	p->qstats.overlimits++;
+	p->tcf_qstats.overlimits++;
 done:
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
-	spin_unlock(&p->lock);
-	return p->action;
+	p->tcf_bstats.bytes += skb->len;
+	p->tcf_bstats.packets++;
+	spin_unlock(&p->tcf_lock);
+	return p->tcf_action;
 }
 
-static int
-tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref)
+static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
+			  int bind, int ref)
 {
 	unsigned char *b = skb->tail;
+	struct tcf_pedit *p = a->priv;
 	struct tc_pedit *opt;
-	struct tcf_pedit *p = PRIV(a, pedit);
 	struct tcf_t t;
 	int s; 
 		
-	s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key);
+	s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
 
 	/* netlink spinlocks held above us - must use ATOMIC */
 	opt = kzalloc(s, GFP_ATOMIC);
-	if (opt == NULL)
+	if (unlikely(!opt))
 		return -ENOBUFS;
 
-	memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key));
-	opt->index = p->index;
-	opt->nkeys = p->nkeys;
-	opt->flags = p->flags;
-	opt->action = p->action;
-	opt->refcnt = p->refcnt - ref;
-	opt->bindcnt = p->bindcnt - bind;
-
-
-#ifdef PEDIT_DEB
-	{                
-		/* Debug - get rid of later */
-		int i;
-		struct tc_pedit_key *key = opt->keys;
-
-		for (i=0; i<opt->nkeys; i++, key++) {
-			printk( "\n key #%d",i);
-			printk( "  at %d: val %08x mask %08x",
-			(unsigned int)key->off,
-			(unsigned int)key->val,
-			(unsigned int)key->mask);
-		}
-	}
-#endif
+	memcpy(opt->keys, p->tcfp_keys,
+	       p->tcfp_nkeys * sizeof(struct tc_pedit_key));
+	opt->index = p->tcf_index;
+	opt->nkeys = p->tcfp_nkeys;
+	opt->flags = p->tcfp_flags;
+	opt->action = p->tcf_action;
+	opt->refcnt = p->tcf_refcnt - ref;
+	opt->bindcnt = p->tcf_bindcnt - bind;
 
 	RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
-	t.install = jiffies_to_clock_t(jiffies - p->tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse);
-	t.expires = jiffies_to_clock_t(p->tm.expires);
+	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
 	RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
 	kfree(opt);
 	return skb->len;
@@ -252,9 +232,9 @@
 	return -1;
 }
 
-static
-struct tc_action_ops act_pedit_ops = {
+static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
+	.hinfo		=	&pedit_hash_info,
 	.type		=	TCA_ACT_PEDIT,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
@@ -270,14 +250,12 @@
 MODULE_DESCRIPTION("Generic Packet Editor actions");
 MODULE_LICENSE("GPL");
 
-static int __init
-pedit_init_module(void)
+static int __init pedit_init_module(void)
 {
 	return tcf_register_action(&act_pedit_ops);
 }
 
-static void __exit
-pedit_cleanup_module(void)
+static void __exit pedit_cleanup_module(void)
 {
 	tcf_unregister_action(&act_pedit_ops);
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index da905d7..fed47b6 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -32,43 +32,27 @@
 #include <net/sock.h>
 #include <net/act_api.h>
 
-#define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
-#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
-#define PRIV(a) ((struct tcf_police *) (a)->priv)
+#define L2T(p,L)   ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
+#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
 
-/* use generic hash table */
-#define MY_TAB_SIZE     16
-#define MY_TAB_MASK     15
-static u32 idx_gen;
-static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
-/* Policer hash table lock */
+#define POL_TAB_MASK     15
+static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
+static u32 police_idx_gen;
 static DEFINE_RWLOCK(police_lock);
 
+static struct tcf_hashinfo police_hash_info = {
+	.htab	=	tcf_police_ht,
+	.hmask	=	POL_TAB_MASK,
+	.lock	=	&police_lock,
+};
+
 /* Each policer is serialized by its individual spinlock */
 
-static __inline__ unsigned tcf_police_hash(u32 index)
-{
-	return index&0xF;
-}
-
-static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
-{
-	struct tcf_police *p;
-
-	read_lock(&police_lock);
-	for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
-		if (p->index == index)
-			break;
-	}
-	read_unlock(&police_lock);
-	return p;
-}
-
 #ifdef CONFIG_NET_CLS_ACT
 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
                               int type, struct tc_action *a)
 {
-	struct tcf_police *p;
+	struct tcf_common *p;
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	struct rtattr *r;
 
@@ -76,10 +60,10 @@
 
 	s_i = cb->args[0];
 
-	for (i = 0; i < MY_TAB_SIZE; i++) {
-		p = tcf_police_ht[tcf_police_hash(i)];
+	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
+		p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
 
-		for (; p; p = p->next) {
+		for (; p; p = p->tcfc_next) {
 			index++;
 			if (index < s_i)
 				continue;
@@ -110,48 +94,26 @@
 	skb_trim(skb, (u8*)r - skb->data);
 	goto done;
 }
-
-static inline int
-tcf_act_police_hash_search(struct tc_action *a, u32 index)
-{
-	struct tcf_police *p = tcf_police_lookup(index);
-
-	if (p != NULL) {
-		a->priv = p;
-		return 1;
-	} else {
-		return 0;
-	}
-}
 #endif
 
-static inline u32 tcf_police_new_index(void)
-{
-	do {
-		if (++idx_gen == 0)
-			idx_gen = 1;
-	} while (tcf_police_lookup(idx_gen));
-
-	return idx_gen;
-}
-
 void tcf_police_destroy(struct tcf_police *p)
 {
-	unsigned h = tcf_police_hash(p->index);
-	struct tcf_police **p1p;
+	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
+	struct tcf_common **p1p;
 	
-	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
-		if (*p1p == p) {
+	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+		if (*p1p == &p->common) {
 			write_lock_bh(&police_lock);
-			*p1p = p->next;
+			*p1p = p->tcf_next;
 			write_unlock_bh(&police_lock);
 #ifdef CONFIG_NET_ESTIMATOR
-			gen_kill_estimator(&p->bstats, &p->rate_est);
+			gen_kill_estimator(&p->tcf_bstats,
+					   &p->tcf_rate_est);
 #endif
-			if (p->R_tab)
-				qdisc_put_rtab(p->R_tab);
-			if (p->P_tab)
-				qdisc_put_rtab(p->P_tab);
+			if (p->tcfp_R_tab)
+				qdisc_put_rtab(p->tcfp_R_tab);
+			if (p->tcfp_P_tab)
+				qdisc_put_rtab(p->tcfp_P_tab);
 			kfree(p);
 			return;
 		}
@@ -167,7 +129,7 @@
 	int ret = 0, err;
 	struct rtattr *tb[TCA_POLICE_MAX];
 	struct tc_police *parm;
-	struct tcf_police *p;
+	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 
 	if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
@@ -185,27 +147,32 @@
 	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
 		return -EINVAL;
 
-	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
-		a->priv = p;
-		if (bind) {
-			p->bindcnt += 1;
-			p->refcnt += 1;
+	if (parm->index) {
+		struct tcf_common *pc;
+
+		pc = tcf_hash_lookup(parm->index, &police_hash_info);
+		if (pc != NULL) {
+			a->priv = pc;
+			police = to_police(pc);
+			if (bind) {
+				police->tcf_bindcnt += 1;
+				police->tcf_refcnt += 1;
+			}
+			if (ovr)
+				goto override;
+			return ret;
 		}
-		if (ovr)
-			goto override;
-		return ret;
 	}
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (p == NULL)
+	police = kzalloc(sizeof(*police), GFP_KERNEL);
+	if (police == NULL)
 		return -ENOMEM;
-
 	ret = ACT_P_CREATED;
-	p->refcnt = 1;
-	spin_lock_init(&p->lock);
-	p->stats_lock = &p->lock;
+	police->tcf_refcnt = 1;
+	spin_lock_init(&police->tcf_lock);
+	police->tcf_stats_lock = &police->tcf_lock;
 	if (bind)
-		p->bindcnt = 1;
+		police->tcf_bindcnt = 1;
 override:
 	if (parm->rate.rate) {
 		err = -ENOMEM;
@@ -215,67 +182,71 @@
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
 					       tb[TCA_POLICE_PEAKRATE-1]);
-			if (p->P_tab == NULL) {
+			if (P_tab == NULL) {
 				qdisc_put_rtab(R_tab);
 				goto failure;
 			}
 		}
 	}
 	/* No failure allowed after this point */
-	spin_lock_bh(&p->lock);
+	spin_lock_bh(&police->tcf_lock);
 	if (R_tab != NULL) {
-		qdisc_put_rtab(p->R_tab);
-		p->R_tab = R_tab;
+		qdisc_put_rtab(police->tcfp_R_tab);
+		police->tcfp_R_tab = R_tab;
 	}
 	if (P_tab != NULL) {
-		qdisc_put_rtab(p->P_tab);
-		p->P_tab = P_tab;
+		qdisc_put_rtab(police->tcfp_P_tab);
+		police->tcfp_P_tab = P_tab;
 	}
 
 	if (tb[TCA_POLICE_RESULT-1])
-		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
-	p->toks = p->burst = parm->burst;
-	p->mtu = parm->mtu;
-	if (p->mtu == 0) {
-		p->mtu = ~0;
-		if (p->R_tab)
-			p->mtu = 255<<p->R_tab->rate.cell_log;
+		police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+	police->tcfp_toks = police->tcfp_burst = parm->burst;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (police->tcfp_R_tab)
+			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
 	}
-	if (p->P_tab)
-		p->ptoks = L2T_P(p, p->mtu);
-	p->action = parm->action;
+	if (police->tcfp_P_tab)
+		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+	police->tcf_action = parm->action;
 
 #ifdef CONFIG_NET_ESTIMATOR
 	if (tb[TCA_POLICE_AVRATE-1])
-		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+		police->tcfp_ewma_rate =
+			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
 	if (est)
-		gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+		gen_replace_estimator(&police->tcf_bstats,
+				      &police->tcf_rate_est,
+				      police->tcf_stats_lock, est);
 #endif
 
-	spin_unlock_bh(&p->lock);
+	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
 		return ret;
 
-	PSCHED_GET_TIME(p->t_c);
-	p->index = parm->index ? : tcf_police_new_index();
-	h = tcf_police_hash(p->index);
+	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcf_index = parm->index ? parm->index :
+		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
+	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 	write_lock_bh(&police_lock);
-	p->next = tcf_police_ht[h];
-	tcf_police_ht[h] = p;
+	police->tcf_next = tcf_police_ht[h];
+	tcf_police_ht[h] = &police->common;
 	write_unlock_bh(&police_lock);
 
-	a->priv = p;
+	a->priv = police;
 	return ret;
 
 failure:
 	if (ret == ACT_P_CREATED)
-		kfree(p);
+		kfree(police);
 	return err;
 }
 
 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
 {
-	struct tcf_police *p = PRIV(a);
+	struct tcf_police *p = a->priv;
 
 	if (p != NULL)
 		return tcf_police_release(p, bind);
@@ -285,86 +256,87 @@
 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
                           struct tcf_result *res)
 {
+	struct tcf_police *police = a->priv;
 	psched_time_t now;
-	struct tcf_police *p = PRIV(a);
 	long toks;
 	long ptoks = 0;
 
-	spin_lock(&p->lock);
+	spin_lock(&police->tcf_lock);
 
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	police->tcf_bstats.bytes += skb->len;
+	police->tcf_bstats.packets++;
 
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
-		p->qstats.overlimits++;
-		spin_unlock(&p->lock);
-		return p->action;
+	if (police->tcfp_ewma_rate &&
+	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+		police->tcf_qstats.overlimits++;
+		spin_unlock(&police->tcf_lock);
+		return police->tcf_action;
 	}
 #endif
 
-	if (skb->len <= p->mtu) {
-		if (p->R_tab == NULL) {
-			spin_unlock(&p->lock);
-			return p->result;
+	if (skb->len <= police->tcfp_mtu) {
+		if (police->tcfp_R_tab == NULL) {
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 
 		PSCHED_GET_TIME(now);
 
-		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
-
-		if (p->P_tab) {
-			ptoks = toks + p->ptoks;
-			if (ptoks > (long)L2T_P(p, p->mtu))
-				ptoks = (long)L2T_P(p, p->mtu);
-			ptoks -= L2T_P(p, skb->len);
+		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
+					 police->tcfp_burst);
+		if (police->tcfp_P_tab) {
+			ptoks = toks + police->tcfp_ptoks;
+			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+				ptoks = (long)L2T_P(police, police->tcfp_mtu);
+			ptoks -= L2T_P(police, skb->len);
 		}
-		toks += p->toks;
-		if (toks > (long)p->burst)
-			toks = p->burst;
-		toks -= L2T(p, skb->len);
-
+		toks += police->tcfp_toks;
+		if (toks > (long)police->tcfp_burst)
+			toks = police->tcfp_burst;
+		toks -= L2T(police, skb->len);
 		if ((toks|ptoks) >= 0) {
-			p->t_c = now;
-			p->toks = toks;
-			p->ptoks = ptoks;
-			spin_unlock(&p->lock);
-			return p->result;
+			police->tcfp_t_c = now;
+			police->tcfp_toks = toks;
+			police->tcfp_ptoks = ptoks;
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 	}
 
-	p->qstats.overlimits++;
-	spin_unlock(&p->lock);
-	return p->action;
+	police->tcf_qstats.overlimits++;
+	spin_unlock(&police->tcf_lock);
+	return police->tcf_action;
 }
 
 static int
 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	unsigned char	 *b = skb->tail;
+	struct tcf_police *police = a->priv;
 	struct tc_police opt;
-	struct tcf_police *p = PRIV(a);
 
-	opt.index = p->index;
-	opt.action = p->action;
-	opt.mtu = p->mtu;
-	opt.burst = p->burst;
-	opt.refcnt = p->refcnt - ref;
-	opt.bindcnt = p->bindcnt - bind;
-	if (p->R_tab)
-		opt.rate = p->R_tab->rate;
+	opt.index = police->tcf_index;
+	opt.action = police->tcf_action;
+	opt.mtu = police->tcfp_mtu;
+	opt.burst = police->tcfp_burst;
+	opt.refcnt = police->tcf_refcnt - ref;
+	opt.bindcnt = police->tcf_bindcnt - bind;
+	if (police->tcfp_R_tab)
+		opt.rate = police->tcfp_R_tab->rate;
 	else
 		memset(&opt.rate, 0, sizeof(opt.rate));
-	if (p->P_tab)
-		opt.peakrate = p->P_tab->rate;
+	if (police->tcfp_P_tab)
+		opt.peakrate = police->tcfp_P_tab->rate;
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
-	if (p->result)
-		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+	if (police->tcfp_result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+			&police->tcfp_result);
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate)
-		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+	if (police->tcfp_ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
 #endif
 	return skb->len;
 
@@ -379,13 +351,14 @@
 
 static struct tc_action_ops act_police_ops = {
 	.kind		=	"police",
+	.hinfo		=	&police_hash_info,
 	.type		=	TCA_ID_POLICE,
 	.capab		=	TCA_CAP_NONE,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_act_police,
 	.dump		=	tcf_act_police_dump,
 	.cleanup	=	tcf_act_police_cleanup,
-	.lookup		=	tcf_act_police_hash_search,
+	.lookup		=	tcf_hash_search,
 	.init		=	tcf_act_police_locate,
 	.walk		=	tcf_act_police_walker
 };
@@ -407,10 +380,39 @@
 
 #else /* CONFIG_NET_CLS_ACT */
 
-struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+static struct tcf_common *tcf_police_lookup(u32 index)
 {
-	unsigned h;
-	struct tcf_police *p;
+	struct tcf_hashinfo *hinfo = &police_hash_info;
+	struct tcf_common *p;
+
+	read_lock(hinfo->lock);
+	for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+	     p = p->tcfc_next) {
+		if (p->tcfc_index == index)
+			break;
+	}
+	read_unlock(hinfo->lock);
+
+	return p;
+}
+
+static u32 tcf_police_new_index(void)
+{
+	u32 *idx_gen = &police_idx_gen;
+	u32 val = *idx_gen;
+
+	do {
+		if (++val == 0)
+			val = 1;
+	} while (tcf_police_lookup(val));
+
+	return (*idx_gen = val);
+}
+
+struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+{
+	unsigned int h;
+	struct tcf_police *police;
 	struct rtattr *tb[TCA_POLICE_MAX];
 	struct tc_police *parm;
 
@@ -423,149 +425,158 @@
 
 	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
 
-	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
-		p->refcnt++;
-		return p;
-	}
+	if (parm->index) {
+		struct tcf_common *pc;
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (p == NULL)
+		pc = tcf_police_lookup(parm->index);
+		if (pc) {
+			police = to_police(pc);
+			police->tcf_refcnt++;
+			return police;
+		}
+	}
+	police = kzalloc(sizeof(*police), GFP_KERNEL);
+	if (unlikely(!police))
 		return NULL;
 
-	p->refcnt = 1;
-	spin_lock_init(&p->lock);
-	p->stats_lock = &p->lock;
+	police->tcf_refcnt = 1;
+	spin_lock_init(&police->tcf_lock);
+	police->tcf_stats_lock = &police->tcf_lock;
 	if (parm->rate.rate) {
-		p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
-		if (p->R_tab == NULL)
+		police->tcfp_R_tab =
+			qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+		if (police->tcfp_R_tab == NULL)
 			goto failure;
 		if (parm->peakrate.rate) {
-			p->P_tab = qdisc_get_rtab(&parm->peakrate,
-			                          tb[TCA_POLICE_PEAKRATE-1]);
-			if (p->P_tab == NULL)
+			police->tcfp_P_tab =
+				qdisc_get_rtab(&parm->peakrate,
+					       tb[TCA_POLICE_PEAKRATE-1]);
+			if (police->tcfp_P_tab == NULL)
 				goto failure;
 		}
 	}
 	if (tb[TCA_POLICE_RESULT-1]) {
 		if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
 			goto failure;
-		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+		police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
 	}
 #ifdef CONFIG_NET_ESTIMATOR
 	if (tb[TCA_POLICE_AVRATE-1]) {
 		if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
 			goto failure;
-		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+		police->tcfp_ewma_rate =
+			*(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
 	}
 #endif
-	p->toks = p->burst = parm->burst;
-	p->mtu = parm->mtu;
-	if (p->mtu == 0) {
-		p->mtu = ~0;
-		if (p->R_tab)
-			p->mtu = 255<<p->R_tab->rate.cell_log;
+	police->tcfp_toks = police->tcfp_burst = parm->burst;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (police->tcfp_R_tab)
+			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
 	}
-	if (p->P_tab)
-		p->ptoks = L2T_P(p, p->mtu);
-	PSCHED_GET_TIME(p->t_c);
-	p->index = parm->index ? : tcf_police_new_index();
-	p->action = parm->action;
+	if (police->tcfp_P_tab)
+		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcf_index = parm->index ? parm->index :
+		tcf_police_new_index();
+	police->tcf_action = parm->action;
 #ifdef CONFIG_NET_ESTIMATOR
 	if (est)
-		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+		gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est,
+				  police->tcf_stats_lock, est);
 #endif
-	h = tcf_police_hash(p->index);
+	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
 	write_lock_bh(&police_lock);
-	p->next = tcf_police_ht[h];
-	tcf_police_ht[h] = p;
+	police->tcf_next = tcf_police_ht[h];
+	tcf_police_ht[h] = &police->common;
 	write_unlock_bh(&police_lock);
-	return p;
+	return police;
 
 failure:
-	if (p->R_tab)
-		qdisc_put_rtab(p->R_tab);
-	kfree(p);
+	if (police->tcfp_R_tab)
+		qdisc_put_rtab(police->tcfp_R_tab);
+	kfree(police);
 	return NULL;
 }
 
-int tcf_police(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police(struct sk_buff *skb, struct tcf_police *police)
 {
 	psched_time_t now;
 	long toks;
 	long ptoks = 0;
 
-	spin_lock(&p->lock);
+	spin_lock(&police->tcf_lock);
 
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	police->tcf_bstats.bytes += skb->len;
+	police->tcf_bstats.packets++;
 
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
-		p->qstats.overlimits++;
-		spin_unlock(&p->lock);
-		return p->action;
+	if (police->tcfp_ewma_rate &&
+	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+		police->tcf_qstats.overlimits++;
+		spin_unlock(&police->tcf_lock);
+		return police->tcf_action;
 	}
 #endif
-
-	if (skb->len <= p->mtu) {
-		if (p->R_tab == NULL) {
-			spin_unlock(&p->lock);
-			return p->result;
+	if (skb->len <= police->tcfp_mtu) {
+		if (police->tcfp_R_tab == NULL) {
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 
 		PSCHED_GET_TIME(now);
-
-		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
-
-		if (p->P_tab) {
-			ptoks = toks + p->ptoks;
-			if (ptoks > (long)L2T_P(p, p->mtu))
-				ptoks = (long)L2T_P(p, p->mtu);
-			ptoks -= L2T_P(p, skb->len);
+		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
+					 police->tcfp_burst);
+		if (police->tcfp_P_tab) {
+			ptoks = toks + police->tcfp_ptoks;
+			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+				ptoks = (long)L2T_P(police, police->tcfp_mtu);
+			ptoks -= L2T_P(police, skb->len);
 		}
-		toks += p->toks;
-		if (toks > (long)p->burst)
-			toks = p->burst;
-		toks -= L2T(p, skb->len);
-
+		toks += police->tcfp_toks;
+		if (toks > (long)police->tcfp_burst)
+			toks = police->tcfp_burst;
+		toks -= L2T(police, skb->len);
 		if ((toks|ptoks) >= 0) {
-			p->t_c = now;
-			p->toks = toks;
-			p->ptoks = ptoks;
-			spin_unlock(&p->lock);
-			return p->result;
+			police->tcfp_t_c = now;
+			police->tcfp_toks = toks;
+			police->tcfp_ptoks = ptoks;
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
 		}
 	}
 
-	p->qstats.overlimits++;
-	spin_unlock(&p->lock);
-	return p->action;
+	police->tcf_qstats.overlimits++;
+	spin_unlock(&police->tcf_lock);
+	return police->tcf_action;
 }
 EXPORT_SYMBOL(tcf_police);
 
-int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb->tail;
 	struct tc_police opt;
 
-	opt.index = p->index;
-	opt.action = p->action;
-	opt.mtu = p->mtu;
-	opt.burst = p->burst;
-	if (p->R_tab)
-		opt.rate = p->R_tab->rate;
+	opt.index = police->tcf_index;
+	opt.action = police->tcf_action;
+	opt.mtu = police->tcfp_mtu;
+	opt.burst = police->tcfp_burst;
+	if (police->tcfp_R_tab)
+		opt.rate = police->tcfp_R_tab->rate;
 	else
 		memset(&opt.rate, 0, sizeof(opt.rate));
-	if (p->P_tab)
-		opt.peakrate = p->P_tab->rate;
+	if (police->tcfp_P_tab)
+		opt.peakrate = police->tcfp_P_tab->rate;
 	else
 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
 	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
-	if (p->result)
-		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+	if (police->tcfp_result)
+		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int),
+			&police->tcfp_result);
 #ifdef CONFIG_NET_ESTIMATOR
-	if (p->ewma_rate)
-		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+	if (police->tcfp_ewma_rate)
+		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate);
 #endif
 	return skb->len;
 
@@ -574,19 +585,20 @@
 	return -1;
 }
 
-int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
+int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police)
 {
 	struct gnet_dump d;
 	
 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-			TCA_XSTATS, p->stats_lock, &d) < 0)
+					 TCA_XSTATS, police->tcf_stats_lock,
+					 &d) < 0)
 		goto errout;
 	
-	if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 ||
 #ifdef CONFIG_NET_ESTIMATOR
-	    gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 ||
 #endif
-	    gnet_stats_copy_queue(&d, &p->qstats) < 0)
+	    gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0)
 		goto errout;
 
 	if (gnet_stats_finish_copy(&d) < 0)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 17105c8..901571a 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -20,54 +20,175 @@
 
 #define TCA_ACT_SIMP 22
 
-/* XXX: Hide all these common elements under some macro 
- * probably
-*/
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
-/* use generic hash table with 8 buckets */
-#define MY_TAB_SIZE     8
-#define MY_TAB_MASK     (MY_TAB_SIZE - 1)
-static u32 idx_gen;
-static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE];
+#define SIMP_TAB_MASK     7
+static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
+static u32 simp_idx_gen;
 static DEFINE_RWLOCK(simp_lock);
 
-/* override the defaults */
-#define tcf_st		tcf_defact
-#define tc_st		tc_defact
-#define tcf_t_lock	simp_lock
-#define tcf_ht		tcf_simp_ht
-
-#define CONFIG_NET_ACT_INIT 1
-#include <net/pkt_act.h>
-#include <net/act_generic.h>
+static struct tcf_hashinfo simp_hash_info = {
+	.htab	=	tcf_simp_ht,
+	.hmask	=	SIMP_TAB_MASK,
+	.lock	=	&simp_lock,
+};
 
 static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
 {
-	struct tcf_defact *p = PRIV(a, defact);
+	struct tcf_defact *d = a->priv;
 
-	spin_lock(&p->lock);
-	p->tm.lastuse = jiffies;
-	p->bstats.bytes += skb->len;
-	p->bstats.packets++;
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += skb->len;
+	d->tcf_bstats.packets++;
 
 	/* print policy string followed by _ then packet count 
 	 * Example if this was the 3rd packet and the string was "hello" 
 	 * then it would look like "hello_3" (without quotes) 
 	 **/
-	printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets);
-	spin_unlock(&p->lock);
-	return p->action;
+	printk("simple: %s_%d\n",
+	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static int tcf_simp_release(struct tcf_defact *d, int bind)
+{
+	int ret = 0;
+	if (d) {
+		if (bind)
+			d->tcf_bindcnt--;
+		d->tcf_refcnt--;
+		if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
+			kfree(d->tcfd_defdata);
+			tcf_hash_destroy(&d->common, &simp_hash_info);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
+{
+	d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL);
+	if (unlikely(!d->tcfd_defdata))
+		return -ENOMEM;
+	d->tcfd_datalen = datalen;
+	memcpy(d->tcfd_defdata, defdata, datalen);
+	return 0;
+}
+
+static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
+{
+	kfree(d->tcfd_defdata);
+	return alloc_defdata(d, datalen, defdata);
+}
+
+static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct rtattr *tb[TCA_DEF_MAX];
+	struct tc_defact *parm;
+	struct tcf_defact *d;
+	struct tcf_common *pc;
+	void *defdata;
+	u32 datalen = 0;
+	int ret = 0;
+
+	if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
+		return -EINVAL;
+
+	if (tb[TCA_DEF_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
+		return -EINVAL;
+
+	parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
+	defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
+	if (defdata == NULL)
+		return -EINVAL;
+
+	datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
+	if (datalen <= 0)
+		return -EINVAL;
+
+	pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &simp_idx_gen, &simp_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_defact(pc);
+		ret = alloc_defdata(d, datalen, defdata);
+		if (ret < 0) {
+			kfree(pc);
+			return ret;
+		}
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_defact(pc);
+		if (!ovr) {
+			tcf_simp_release(d, bind);
+			return -EEXIST;
+		}
+		realloc_defdata(d, datalen, defdata);
+	}
+
+	spin_lock_bh(&d->tcf_lock);
+	d->tcf_action = parm->action;
+	spin_unlock_bh(&d->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &simp_hash_info);
+	return ret;
+}
+
+static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_defact *d = a->priv;
+
+	if (d)
+		return tcf_simp_release(d, bind);
+	return 0;
+}
+
+static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb->tail;
+	struct tcf_defact *d = a->priv;
+	struct tc_defact opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
+	RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
 }
 
 static struct tc_action_ops act_simp_ops = {
-	.kind = "simple",
-	.type = TCA_ACT_SIMP,
-	.capab = TCA_CAP_NONE,
-	.owner = THIS_MODULE,
-	.act = tcf_simp,
-	tca_use_default_ops
+	.kind		=	"simple",
+	.hinfo		=	&simp_hash_info,
+	.type		=	TCA_ACT_SIMP,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_simp,
+	.dump		=	tcf_simp_dump,
+	.cleanup	=	tcf_simp_cleanup,
+	.init		=	tcf_simp_init,
+	.walk		=	tcf_generic_walker,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e6973d9..e54acc6 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -50,6 +50,7 @@
 struct fw_head
 {
 	struct fw_filter *ht[HTSIZE];
+	u32 mask;
 };
 
 struct fw_filter
@@ -101,7 +102,7 @@
 	struct fw_filter *f;
 	int r;
 #ifdef CONFIG_NETFILTER
-	u32 id = skb->nfmark;
+	u32 id = skb->nfmark & head->mask;
 #else
 	u32 id = 0;
 #endif
@@ -209,7 +210,9 @@
 fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
 	struct rtattr **tb, struct rtattr **tca, unsigned long base)
 {
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct tcf_exts e;
+	u32 mask;
 	int err;
 
 	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map);
@@ -232,6 +235,15 @@
 	}
 #endif /* CONFIG_NET_CLS_IND */
 
+	if (tb[TCA_FW_MASK-1]) {
+		if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+			goto errout;
+		mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+		if (mask != head->mask)
+			goto errout;
+	} else if (head->mask != 0xFFFFFFFF)
+		goto errout;
+
 	tcf_exts_change(tp, &f->exts, &e);
 
 	return 0;
@@ -267,9 +279,17 @@
 		return -EINVAL;
 
 	if (head == NULL) {
+		u32 mask = 0xFFFFFFFF;
+		if (tb[TCA_FW_MASK-1]) {
+			if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+				return -EINVAL;
+			mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+		}
+
 		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
 		if (head == NULL)
 			return -ENOBUFS;
+		head->mask = mask;
 
 		tcf_tree_lock(tp);
 		tp->root = head;
@@ -330,6 +350,7 @@
 static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
+	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter*)fh;
 	unsigned char	 *b = skb->tail;
 	struct rtattr *rta;
@@ -351,6 +372,8 @@
 	if (strlen(f->indev))
 		RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev);
 #endif /* CONFIG_NET_CLS_IND */
+	if (head->mask != 0xFFFFFFFF)
+		RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask);
 
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
 		goto rtattr_failure;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 880a339..bb3ddd4 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1,4 +1,4 @@
-/* vim: ts=8 sw=8
+/*
  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
  *
  *		This program is free software; you can redistribute it and/or
@@ -68,218 +68,165 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16	/* classid hash size */
-#define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#undef HTB_DEBUG	/* compile debugging support (activated by tc tool) */
-#define HTB_RATECM 1    /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
-#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
-#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30011	/* major must be matched with number suplied by TC as version */
+#define HTB_HSIZE 16		/* classid hash size */
+#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
+#define HTB_RATECM 1		/* whether to use rate computer */
+#define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
+#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
-/* debugging support; S is subsystem, these are defined:
-  0 - netlink messages
-  1 - enqueue
-  2 - drop & requeue
-  3 - dequeue main
-  4 - dequeue one prio DRR part
-  5 - dequeue class accounting
-  6 - class overlimit status computation
-  7 - hint tree
-  8 - event queue
- 10 - rate estimator
- 11 - classifier 
- 12 - fast dequeue cache
-
- L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
- q->debug uint32 contains 16 2-bit fields one for subsystem starting
- from LSB
- */
-#ifdef HTB_DEBUG
-#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
-#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
-	printk(KERN_DEBUG FMT,##ARG)
-#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
-#define HTB_PASSQ q,
-#define HTB_ARGQ struct htb_sched *q,
-#define static
-#undef __inline__
-#define __inline__
-#undef inline
-#define inline
-#define HTB_CMAGIC 0xFEFAFEF1
-#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
-		if ((N)->rb_color == -1) break; \
-		rb_erase(N,R); \
-		(N)->rb_color = -1; } while (0)
-#else
-#define HTB_DBG_COND(S,L) (0)
-#define HTB_DBG(S,L,FMT,ARG...)
-#define HTB_PASSQ
-#define HTB_ARGQ
-#define HTB_CHCL(cl)
-#define htb_safe_rb_erase(N,R) rb_erase(N,R)
-#endif
-
-
 /* used internaly to keep status of single class */
 enum htb_cmode {
-    HTB_CANT_SEND,		/* class can't send and can't borrow */
-    HTB_MAY_BORROW,		/* class can't send but may borrow */
-    HTB_CAN_SEND		/* class can send */
+	HTB_CANT_SEND,		/* class can't send and can't borrow */
+	HTB_MAY_BORROW,		/* class can't send but may borrow */
+	HTB_CAN_SEND		/* class can send */
 };
 
 /* interior & leaf nodes; props specific to leaves are marked L: */
-struct htb_class
-{
-#ifdef HTB_DEBUG
-	unsigned magic;
-#endif
-    /* general class parameters */
-    u32 classid;
-    struct gnet_stats_basic bstats;
-    struct gnet_stats_queue qstats;
-    struct gnet_stats_rate_est rate_est;
-    struct tc_htb_xstats xstats;/* our special stats */
-    int refcnt;			/* usage count of this class */
+struct htb_class {
+	/* general class parameters */
+	u32 classid;
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct tc_htb_xstats xstats;	/* our special stats */
+	int refcnt;		/* usage count of this class */
 
 #ifdef HTB_RATECM
-    /* rate measurement counters */
-    unsigned long rate_bytes,sum_bytes;
-    unsigned long rate_packets,sum_packets;
+	/* rate measurement counters */
+	unsigned long rate_bytes, sum_bytes;
+	unsigned long rate_packets, sum_packets;
 #endif
 
-    /* topology */
-    int level;			/* our level (see above) */
-    struct htb_class *parent;	/* parent class */
-    struct list_head hlist;	/* classid hash list item */
-    struct list_head sibling;	/* sibling list item */
-    struct list_head children;	/* children list */
+	/* topology */
+	int level;		/* our level (see above) */
+	struct htb_class *parent;	/* parent class */
+	struct hlist_node hlist;	/* classid hash list item */
+	struct list_head sibling;	/* sibling list item */
+	struct list_head children;	/* children list */
 
-    union {
-	    struct htb_class_leaf {
-		    struct Qdisc *q;
-		    int prio;
-		    int aprio;	
-		    int quantum;
-		    int deficit[TC_HTB_MAXDEPTH];
-		    struct list_head drop_list;
-	    } leaf;
-	    struct htb_class_inner {
-		    struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
-		    struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
-            /* When class changes from state 1->2 and disconnects from 
-               parent's feed then we lost ptr value and start from the
-              first child again. Here we store classid of the
-              last valid ptr (used when ptr is NULL). */
-              u32 last_ptr_id[TC_HTB_NUMPRIO];
-	    } inner;
-    } un;
-    struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
-    struct rb_node pq_node;		 /* node for event queue */
-    unsigned long pq_key;	/* the same type as jiffies global */
-    
-    int prio_activity;		/* for which prios are we active */
-    enum htb_cmode cmode;	/* current mode of the class */
+	union {
+		struct htb_class_leaf {
+			struct Qdisc *q;
+			int prio;
+			int aprio;
+			int quantum;
+			int deficit[TC_HTB_MAXDEPTH];
+			struct list_head drop_list;
+		} leaf;
+		struct htb_class_inner {
+			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
+			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
+			/* When class changes from state 1->2 and disconnects from
+			   parent's feed then we lost ptr value and start from the
+			   first child again. Here we store classid of the
+			   last valid ptr (used when ptr is NULL). */
+			u32 last_ptr_id[TC_HTB_NUMPRIO];
+		} inner;
+	} un;
+	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
+	struct rb_node pq_node;	/* node for event queue */
+	unsigned long pq_key;	/* the same type as jiffies global */
 
-    /* class attached filters */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
+	int prio_activity;	/* for which prios are we active */
+	enum htb_cmode cmode;	/* current mode of the class */
 
-    int warned;		/* only one warning about non work conserving .. */
+	/* class attached filters */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
 
-    /* token bucket parameters */
-    struct qdisc_rate_table *rate;	/* rate table of the class itself */
-    struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
-    long buffer,cbuffer;		/* token bucket depth/rate */
-    psched_tdiff_t mbuffer;		/* max wait time */
-    long tokens,ctokens;		/* current number of tokens */
-    psched_time_t t_c;			/* checkpoint time */
+	int warned;		/* only one warning about non work conserving .. */
+
+	/* token bucket parameters */
+	struct qdisc_rate_table *rate;	/* rate table of the class itself */
+	struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
+	long buffer, cbuffer;	/* token bucket depth/rate */
+	psched_tdiff_t mbuffer;	/* max wait time */
+	long tokens, ctokens;	/* current number of tokens */
+	psched_time_t t_c;	/* checkpoint time */
 };
 
 /* TODO: maybe compute rate when size is too large .. or drop ? */
-static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
-	int size)
-{ 
-    int slot = size >> rate->rate.cell_log;
-    if (slot > 255) {
-	cl->xstats.giants++;
-	slot = 255;
-    }
-    return rate->data[slot];
+static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
+			   int size)
+{
+	int slot = size >> rate->rate.cell_log;
+	if (slot > 255) {
+		cl->xstats.giants++;
+		slot = 255;
+	}
+	return rate->data[slot];
 }
 
-struct htb_sched
-{
-    struct list_head root;			/* root classes list */
-    struct list_head hash[HTB_HSIZE];		/* hashed by classid */
-    struct list_head drops[TC_HTB_NUMPRIO];	/* active leaves (for drops) */
-    
-    /* self list - roots of self generating tree */
-    struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    int row_mask[TC_HTB_MAXDEPTH];
-    struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
-    u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+struct htb_sched {
+	struct list_head root;	/* root classes list */
+	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
-    /* self wait list - roots of wait PQs per row */
-    struct rb_root wait_pq[TC_HTB_MAXDEPTH];
+	/* self list - roots of self generating tree */
+	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	int row_mask[TC_HTB_MAXDEPTH];
+	struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 
-    /* time of nearest event per level (row) */
-    unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+	/* self wait list - roots of wait PQs per row */
+	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 
-    /* cached value of jiffies in dequeue */
-    unsigned long jiffies;
+	/* time of nearest event per level (row) */
+	unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
 
-    /* whether we hit non-work conserving class during this dequeue; we use */
-    int nwc_hit;	/* this to disable mindelay complaint in dequeue */
+	/* cached value of jiffies in dequeue */
+	unsigned long jiffies;
 
-    int defcls;		/* class where unclassified flows go to */
-    u32 debug;		/* subsystem debug levels */
+	/* whether we hit non-work conserving class during this dequeue; we use */
+	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
 
-    /* filters for qdisc itself */
-    struct tcf_proto *filter_list;
-    int filter_cnt;
+	int defcls;		/* class where unclassified flows go to */
 
-    int rate2quantum;		/* quant = rate / rate2quantum */
-    psched_time_t now;		/* cached dequeue time */
-    struct timer_list timer;	/* send delay timer */
+	/* filters for qdisc itself */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
+
+	int rate2quantum;	/* quant = rate / rate2quantum */
+	psched_time_t now;	/* cached dequeue time */
+	struct timer_list timer;	/* send delay timer */
 #ifdef HTB_RATECM
-    struct timer_list rttim;	/* rate computer timer */
-    int recmp_bucket;		/* which hash bucket to recompute next */
+	struct timer_list rttim;	/* rate computer timer */
+	int recmp_bucket;	/* which hash bucket to recompute next */
 #endif
-    
-    /* non shaped skbs; let them go directly thru */
-    struct sk_buff_head direct_queue;
-    int direct_qlen;  /* max qlen of above */
 
-    long direct_pkts;
+	/* non shaped skbs; let them go directly thru */
+	struct sk_buff_head direct_queue;
+	int direct_qlen;	/* max qlen of above */
+
+	long direct_pkts;
 };
 
 /* compute hash of size HTB_HSIZE for given handle */
-static __inline__ int htb_hash(u32 h) 
+static inline int htb_hash(u32 h)
 {
 #if HTB_HSIZE != 16
- #error "Declare new hash for your HTB_HSIZE"
+#error "Declare new hash for your HTB_HSIZE"
 #endif
-    h ^= h>>8;	/* stolen from cbq_hash */
-    h ^= h>>4;
-    return h & 0xf;
+	h ^= h >> 8;		/* stolen from cbq_hash */
+	h ^= h >> 4;
+	return h & 0xf;
 }
 
 /* find class in global hash table using given handle */
-static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct list_head *p;
-	if (TC_H_MAJ(handle) != sch->handle) 
+	struct hlist_node *p;
+	struct htb_class *cl;
+
+	if (TC_H_MAJ(handle) != sch->handle)
 		return NULL;
-	
-	list_for_each (p,q->hash+htb_hash(handle)) {
-		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+
+	hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
 		if (cl->classid == handle)
 			return cl;
 	}
@@ -304,7 +251,8 @@
 	return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
 }
 
-static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
@@ -316,8 +264,8 @@
 	   note that nfmark can be used too by attaching filter fw with no
 	   rules in it */
 	if (skb->priority == sch->handle)
-		return HTB_DIRECT;  /* X:0 (direct flow) selected */
-	if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
+		return HTB_DIRECT;	/* X:0 (direct flow) selected */
+	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 		return cl;
 
 	*qerr = NET_XMIT_BYPASS;
@@ -326,7 +274,7 @@
 #ifdef CONFIG_NET_CLS_ACT
 		switch (result) {
 		case TC_ACT_QUEUED:
-		case TC_ACT_STOLEN: 
+		case TC_ACT_STOLEN:
 			*qerr = NET_XMIT_SUCCESS;
 		case TC_ACT_SHOT:
 			return NULL;
@@ -335,97 +283,44 @@
 		if (result == TC_POLICE_SHOT)
 			return HTB_DIRECT;
 #endif
-		if ((cl = (void*)res.class) == NULL) {
+		if ((cl = (void *)res.class) == NULL) {
 			if (res.classid == sch->handle)
-				return HTB_DIRECT;  /* X:0 (direct flow) */
-			if ((cl = htb_find(res.classid,sch)) == NULL)
-				break; /* filter selected invalid classid */
+				return HTB_DIRECT;	/* X:0 (direct flow) */
+			if ((cl = htb_find(res.classid, sch)) == NULL)
+				break;	/* filter selected invalid classid */
 		}
 		if (!cl->level)
-			return cl; /* we hit leaf; return it */
+			return cl;	/* we hit leaf; return it */
 
 		/* we have got inner class; apply inner filter chain */
 		tcf = cl->filter_list;
 	}
 	/* classification failed; try to use default class */
-	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
+	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 	if (!cl || cl->level)
-		return HTB_DIRECT; /* bad default .. this is safe bet */
+		return HTB_DIRECT;	/* bad default .. this is safe bet */
 	return cl;
 }
 
-#ifdef HTB_DEBUG
-static void htb_next_rb_node(struct rb_node **n);
-#define HTB_DUMTREE(root,memb) if(root) { \
-	struct rb_node *n = (root)->rb_node; \
-	while (n->rb_left) n = n->rb_left; \
-	while (n) { \
-		struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
-		printk(" %x",cl->classid); htb_next_rb_node (&n); \
-	} }
-
-static void htb_debug_dump (struct htb_sched *q)
-{
-	int i,p;
-	printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
-	/* rows */
-	for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
-		printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
-		for (p=0;p<TC_HTB_NUMPRIO;p++) {
-			if (!q->row[i][p].rb_node) continue;
-			printk(" p%d:",p);
-			HTB_DUMTREE(q->row[i]+p,node[p]);
-		}
-		printk("\n");
-	}
-	/* classes */
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *l;
-		list_for_each (l,q->hash+i) {
-			struct htb_class *cl = list_entry(l,struct htb_class,hlist);
-			long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-			printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
-					"pa=%x f:",
-				cl->classid,cl->cmode,cl->tokens,cl->ctokens,
-				cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
-				cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
-			if (cl->level)
-			for (p=0;p<TC_HTB_NUMPRIO;p++) {
-				if (!cl->un.inner.feed[p].rb_node) continue;
-				printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
-				HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
-			}
-			printk("\n");
-		}
-	}
-}
-#endif
 /**
  * htb_add_to_id_tree - adds class to the round robin list
  *
  * Routine adds class to the list (actually tree) sorted by classid.
  * Make sure that class is not already on such list for given prio.
  */
-static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
-		struct htb_class *cl,int prio)
+static void htb_add_to_id_tree(struct rb_root *root,
+			       struct htb_class *cl, int prio)
 {
 	struct rb_node **p = &root->rb_node, *parent = NULL;
-	HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
-#ifdef HTB_DEBUG
-	if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
-	HTB_CHCL(cl);
-	if (*p) {
-		struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
-		HTB_CHCL(x);
-	}
-#endif
+
 	while (*p) {
-		struct htb_class *c; parent = *p;
+		struct htb_class *c;
+		parent = *p;
 		c = rb_entry(parent, struct htb_class, node[prio]);
-		HTB_CHCL(c);
+
 		if (cl->classid > c->classid)
 			p = &parent->rb_right;
-		else 
+		else
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->node[prio], parent, p);
@@ -439,17 +334,11 @@
  * change its mode in cl->pq_key microseconds. Make sure that class is not
  * already in the queue.
  */
-static void htb_add_to_wait_tree (struct htb_sched *q,
-		struct htb_class *cl,long delay,int debug_hint)
+static void htb_add_to_wait_tree(struct htb_sched *q,
+				 struct htb_class *cl, long delay)
 {
 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
-	HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
-#ifdef HTB_DEBUG
-	if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
-	HTB_CHCL(cl);
-	if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
-		printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
-#endif
+
 	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
 	if (cl->pq_key == q->jiffies)
 		cl->pq_key++;
@@ -457,13 +346,14 @@
 	/* update the nearest event cache */
 	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
 		q->near_ev_cache[cl->level] = cl->pq_key;
-	
+
 	while (*p) {
-		struct htb_class *c; parent = *p;
+		struct htb_class *c;
+		parent = *p;
 		c = rb_entry(parent, struct htb_class, pq_node);
 		if (time_after_eq(cl->pq_key, c->pq_key))
 			p = &parent->rb_right;
-		else 
+		else
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->pq_node, parent, p);
@@ -476,7 +366,7 @@
  * When we are past last key we return NULL.
  * Average complexity is 2 steps per call.
  */
-static void htb_next_rb_node(struct rb_node **n)
+static inline void htb_next_rb_node(struct rb_node **n)
 {
 	*n = rb_next(*n);
 }
@@ -487,42 +377,51 @@
  * The class is added to row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static inline void htb_add_class_to_row(struct htb_sched *q, 
-		struct htb_class *cl,int mask)
+static inline void htb_add_class_to_row(struct htb_sched *q,
+					struct htb_class *cl, int mask)
 {
-	HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
-			cl->classid,mask,q->row_mask[cl->level]);
-	HTB_CHCL(cl);
 	q->row_mask[cl->level] |= mask;
 	while (mask) {
 		int prio = ffz(~mask);
 		mask &= ~(1 << prio);
-		htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
+		htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 	}
 }
 
+/* If this triggers, it is a bug in this code, but it need not be fatal */
+static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
+{
+	if (RB_EMPTY_NODE(rb)) {
+		WARN_ON(1);
+	} else {
+		rb_erase(rb, root);
+		RB_CLEAR_NODE(rb);
+	}
+}
+
+
 /**
  * htb_remove_class_from_row - removes class from its row
  *
  * The class is removed from row at priorities marked in mask.
  * It does nothing if mask == 0.
  */
-static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
-		struct htb_class *cl,int mask)
+static inline void htb_remove_class_from_row(struct htb_sched *q,
+						 struct htb_class *cl, int mask)
 {
 	int m = 0;
-	HTB_CHCL(cl);
+
 	while (mask) {
 		int prio = ffz(~mask);
+
 		mask &= ~(1 << prio);
-		if (q->ptr[cl->level][prio] == cl->node+prio)
-			htb_next_rb_node(q->ptr[cl->level]+prio);
-		htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
-		if (!q->row[cl->level][prio].rb_node) 
+		if (q->ptr[cl->level][prio] == cl->node + prio)
+			htb_next_rb_node(q->ptr[cl->level] + prio);
+
+		htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
+		if (!q->row[cl->level][prio].rb_node)
 			m |= 1 << prio;
 	}
-	HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
-			cl->classid,mask,q->row_mask[cl->level],m);
 	q->row_mask[cl->level] &= ~m;
 }
 
@@ -533,34 +432,31 @@
  * for priorities it is participating on. cl->cmode must be new 
  * (activated) mode. It does nothing if cl->prio_activity == 0.
  */
-static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
+static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
-	long m,mask = cl->prio_activity;
-	HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-	HTB_CHCL(cl);
+	long m, mask = cl->prio_activity;
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-		HTB_CHCL(p);
-		m = mask; while (m) {
+		m = mask;
+		while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
-			
+
 			if (p->un.inner.feed[prio].rb_node)
 				/* parent already has its feed in use so that
 				   reset bit in mask as parent is already ok */
 				mask &= ~(1 << prio);
-			
-			htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
+
+			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 		}
-		HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-				p->classid,p->prio_activity,mask,p->cmode);
 		p->prio_activity |= mask;
-		cl = p; p = cl->parent;
-		HTB_CHCL(cl);
+		cl = p;
+		p = cl->parent;
+
 	}
 	if (cl->cmode == HTB_CAN_SEND && mask)
-		htb_add_class_to_row(q,cl,mask);
+		htb_add_class_to_row(q, cl, mask);
 }
 
 /**
@@ -573,39 +469,52 @@
 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 {
 	struct htb_class *p = cl->parent;
-	long m,mask = cl->prio_activity;
-	HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
-	HTB_CHCL(cl);
+	long m, mask = cl->prio_activity;
 
 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
-		m = mask; mask = 0; 
+		m = mask;
+		mask = 0;
 		while (m) {
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
-			
-			if (p->un.inner.ptr[prio] == cl->node+prio) {
+
+			if (p->un.inner.ptr[prio] == cl->node + prio) {
 				/* we are removing child which is pointed to from
 				   parent feed - forget the pointer but remember
 				   classid */
 				p->un.inner.last_ptr_id[prio] = cl->classid;
 				p->un.inner.ptr[prio] = NULL;
 			}
-			
-			htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
-			
-			if (!p->un.inner.feed[prio].rb_node) 
+
+			htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
+
+			if (!p->un.inner.feed[prio].rb_node)
 				mask |= 1 << prio;
 		}
-		HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
-				p->classid,p->prio_activity,mask,p->cmode);
+
 		p->prio_activity &= ~mask;
-		cl = p; p = cl->parent;
-		HTB_CHCL(cl);
+		cl = p;
+		p = cl->parent;
+
 	}
-	if (cl->cmode == HTB_CAN_SEND && mask) 
-		htb_remove_class_from_row(q,cl,mask);
+	if (cl->cmode == HTB_CAN_SEND && mask)
+		htb_remove_class_from_row(q, cl, mask);
 }
 
+#if HTB_HYSTERESIS
+static inline long htb_lowater(const struct htb_class *cl)
+{
+	return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
+}
+static inline long htb_hiwater(const struct htb_class *cl)
+{
+	return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
+}
+#else
+#define htb_lowater(cl)	(0)
+#define htb_hiwater(cl)	(0)
+#endif
+
 /**
  * htb_class_mode - computes and returns current class mode
  *
@@ -617,28 +526,21 @@
  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
  * mode transitions per time unit. The speed gain is about 1/6.
  */
-static __inline__ enum htb_cmode 
-htb_class_mode(struct htb_class *cl,long *diff)
+static inline enum htb_cmode
+htb_class_mode(struct htb_class *cl, long *diff)
 {
-    long toks;
+	long toks;
 
-    if ((toks = (cl->ctokens + *diff)) < (
-#if HTB_HYSTERESIS
-	    cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
-#endif
-       	    0)) {
-	    *diff = -toks;
-	    return HTB_CANT_SEND;
-    }
-    if ((toks = (cl->tokens + *diff)) >= (
-#if HTB_HYSTERESIS
-	    cl->cmode == HTB_CAN_SEND ? -cl->buffer :
-#endif
-	    0))
-	    return HTB_CAN_SEND;
+	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
+		*diff = -toks;
+		return HTB_CANT_SEND;
+	}
 
-    *diff = -toks;
-    return HTB_MAY_BORROW;
+	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
+		return HTB_CAN_SEND;
+
+	*diff = -toks;
+	return HTB_MAY_BORROW;
 }
 
 /**
@@ -650,24 +552,21 @@
  * be different from old one and cl->pq_key has to be valid if changing
  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
  */
-static void 
+static void
 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
-{ 
-	enum htb_cmode new_mode = htb_class_mode(cl,diff);
-	
-	HTB_CHCL(cl);
-	HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
+{
+	enum htb_cmode new_mode = htb_class_mode(cl, diff);
 
 	if (new_mode == cl->cmode)
-		return;	
-	
-	if (cl->prio_activity) { /* not necessary: speed optimization */
-		if (cl->cmode != HTB_CANT_SEND) 
-			htb_deactivate_prios(q,cl);
+		return;
+
+	if (cl->prio_activity) {	/* not necessary: speed optimization */
+		if (cl->cmode != HTB_CANT_SEND)
+			htb_deactivate_prios(q, cl);
 		cl->cmode = new_mode;
-		if (new_mode != HTB_CANT_SEND) 
-			htb_activate_prios(q,cl);
-	} else 
+		if (new_mode != HTB_CANT_SEND)
+			htb_activate_prios(q, cl);
+	} else
 		cl->cmode = new_mode;
 }
 
@@ -678,14 +577,15 @@
  * for the prio. It can be called on already active leaf safely.
  * It also adds leaf into droplist.
  */
-static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 {
 	BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
-	HTB_CHCL(cl);
+
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
-		htb_activate_prios(q,cl);
-		list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
+		htb_activate_prios(q, cl);
+		list_add_tail(&cl->un.leaf.drop_list,
+			      q->drops + cl->un.leaf.aprio);
 	}
 }
 
@@ -695,120 +595,120 @@
  * Make sure that leaf is active. In the other words it can't be called
  * with non-active leaf. It also removes class from the drop list.
  */
-static __inline__ void 
-htb_deactivate(struct htb_sched *q,struct htb_class *cl)
+static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 {
 	BUG_TRAP(cl->prio_activity);
-	HTB_CHCL(cl);
-	htb_deactivate_prios(q,cl);
+
+	htb_deactivate_prios(q, cl);
 	cl->prio_activity = 0;
 	list_del_init(&cl->un.leaf.drop_list);
 }
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    int ret;
-    struct htb_sched *q = qdisc_priv(sch);
-    struct htb_class *cl = htb_classify(skb,sch,&ret);
+	int ret;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
 
-    if (cl == HTB_DIRECT) {
-	/* enqueue to helper queue */
-	if (q->direct_queue.qlen < q->direct_qlen) {
-	    __skb_queue_tail(&q->direct_queue, skb);
-	    q->direct_pkts++;
-	} else {
-	    kfree_skb(skb);
-	    sch->qstats.drops++;
-	    return NET_XMIT_DROP;
-	}
+	if (cl == HTB_DIRECT) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen) {
+			__skb_queue_tail(&q->direct_queue, skb);
+			q->direct_pkts++;
+		} else {
+			kfree_skb(skb);
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
 #ifdef CONFIG_NET_CLS_ACT
-    } else if (!cl) {
-	if (ret == NET_XMIT_BYPASS)
-		sch->qstats.drops++;
-	kfree_skb (skb);
-	return ret;
+	} else if (!cl) {
+		if (ret == NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
 #endif
-    } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-	sch->qstats.drops++;
-	cl->qstats.drops++;
-	return NET_XMIT_DROP;
-    } else {
-	cl->bstats.packets++; cl->bstats.bytes += skb->len;
-	htb_activate (q,cl);
-    }
+	} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
+		   NET_XMIT_SUCCESS) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+		return NET_XMIT_DROP;
+	} else {
+		cl->bstats.packets++;
+		cl->bstats.bytes += skb->len;
+		htb_activate(q, cl);
+	}
 
-    sch->q.qlen++;
-    sch->bstats.packets++; sch->bstats.bytes += skb->len;
-    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
-    return NET_XMIT_SUCCESS;
+	sch->q.qlen++;
+	sch->bstats.packets++;
+	sch->bstats.bytes += skb->len;
+	return NET_XMIT_SUCCESS;
 }
 
 /* TODO: requeuing packet charges it to policers again !! */
 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
-    struct htb_sched *q = qdisc_priv(sch);
-    int ret =  NET_XMIT_SUCCESS;
-    struct htb_class *cl = htb_classify(skb,sch, &ret);
-    struct sk_buff *tskb;
+	struct htb_sched *q = qdisc_priv(sch);
+	int ret = NET_XMIT_SUCCESS;
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+	struct sk_buff *tskb;
 
-    if (cl == HTB_DIRECT || !cl) {
-	/* enqueue to helper queue */
-	if (q->direct_queue.qlen < q->direct_qlen && cl) {
-	    __skb_queue_head(&q->direct_queue, skb);
-	} else {
-            __skb_queue_head(&q->direct_queue, skb);
-            tskb = __skb_dequeue_tail(&q->direct_queue);
-            kfree_skb (tskb);
-            sch->qstats.drops++;
-            return NET_XMIT_CN;	
-	}
-    } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
-	sch->qstats.drops++;
-	cl->qstats.drops++;
-	return NET_XMIT_DROP;
-    } else 
-	    htb_activate (q,cl);
+	if (cl == HTB_DIRECT || !cl) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen && cl) {
+			__skb_queue_head(&q->direct_queue, skb);
+		} else {
+			__skb_queue_head(&q->direct_queue, skb);
+			tskb = __skb_dequeue_tail(&q->direct_queue);
+			kfree_skb(tskb);
+			sch->qstats.drops++;
+			return NET_XMIT_CN;
+		}
+	} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+		   NET_XMIT_SUCCESS) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+		return NET_XMIT_DROP;
+	} else
+		htb_activate(q, cl);
 
-    sch->q.qlen++;
-    sch->qstats.requeues++;
-    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
-    return NET_XMIT_SUCCESS;
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+	return NET_XMIT_SUCCESS;
 }
 
 static void htb_timer(unsigned long arg)
 {
-    struct Qdisc *sch = (struct Qdisc*)arg;
-    sch->flags &= ~TCQ_F_THROTTLED;
-    wmb();
-    netif_schedule(sch->dev);
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	sch->flags &= ~TCQ_F_THROTTLED;
+	wmb();
+	netif_schedule(sch->dev);
 }
 
 #ifdef HTB_RATECM
 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
 static void htb_rate_timer(unsigned long arg)
 {
-	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct list_head *p;
+	struct hlist_node *p;
+	struct htb_class *cl;
+
 
 	/* lock queue so that we can muck with it */
-	HTB_QLOCK(sch);
-	HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
+	spin_lock_bh(&sch->dev->queue_lock);
 
 	q->rttim.expires = jiffies + HZ;
 	add_timer(&q->rttim);
 
 	/* scan and recompute one bucket at time */
-	if (++q->recmp_bucket >= HTB_HSIZE) 
+	if (++q->recmp_bucket >= HTB_HSIZE)
 		q->recmp_bucket = 0;
-	list_for_each (p,q->hash+q->recmp_bucket) {
-		struct htb_class *cl = list_entry(p,struct htb_class,hlist);
-		HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
-				cl->classid,cl->sum_bytes,cl->sum_packets);
-		RT_GEN (cl->sum_bytes,cl->rate_bytes);
-		RT_GEN (cl->sum_packets,cl->rate_packets);
+
+	hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
+		RT_GEN(cl->sum_bytes, cl->rate_bytes);
+		RT_GEN(cl->sum_packets, cl->rate_packets);
 	}
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 }
 #endif
 
@@ -823,12 +723,11 @@
  * CAN_SEND) because we can use more precise clock that event queue here.
  * In such case we remove class from event queue first.
  */
-static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
-		int level,int bytes)
-{	
-	long toks,diff;
+static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
+			     int level, int bytes)
+{
+	long toks, diff;
 	enum htb_cmode old_mode;
-	HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
 
 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
 	if (toks > cl->B) toks = cl->B; \
@@ -837,47 +736,31 @@
 	cl->T = toks
 
 	while (cl) {
-		HTB_CHCL(cl);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-				       cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-				       (unsigned long long) q->now,
-				       (unsigned long long) cl->t_c,
-#endif
-				       q->jiffies);
-			diff = 1000;
-		}
-#endif
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
 		if (cl->level >= level) {
-			if (cl->level == level) cl->xstats.lends++;
-			HTB_ACCNT (tokens,buffer,rate);
+			if (cl->level == level)
+				cl->xstats.lends++;
+			HTB_ACCNT(tokens, buffer, rate);
 		} else {
 			cl->xstats.borrows++;
-			cl->tokens += diff; /* we moved t_c; update tokens */
+			cl->tokens += diff;	/* we moved t_c; update tokens */
 		}
-		HTB_ACCNT (ctokens,cbuffer,ceil);
+		HTB_ACCNT(ctokens, cbuffer, ceil);
 		cl->t_c = q->now;
-		HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
 
-		old_mode = cl->cmode; diff = 0;
-		htb_change_class_mode(q,cl,&diff);
+		old_mode = cl->cmode;
+		diff = 0;
+		htb_change_class_mode(q, cl, &diff);
 		if (old_mode != cl->cmode) {
 			if (old_mode != HTB_CAN_SEND)
-				htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
+				htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 			if (cl->cmode != HTB_CAN_SEND)
-				htb_add_to_wait_tree (q,cl,diff,1);
+				htb_add_to_wait_tree(q, cl, diff);
 		}
-		
 #ifdef HTB_RATECM
 		/* update rate counters */
-		cl->sum_bytes += bytes; cl->sum_packets++;
+		cl->sum_bytes += bytes;
+		cl->sum_packets++;
 #endif
 
 		/* update byte stats except for leaves which are already updated */
@@ -896,60 +779,46 @@
  * next pending event (0 for no event in pq).
  * Note: Aplied are events whose have cl->pq_key <= jiffies.
  */
-static long htb_do_events(struct htb_sched *q,int level)
+static long htb_do_events(struct htb_sched *q, int level)
 {
 	int i;
-	HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
-			level,q->wait_pq[level].rb_node,q->row_mask[level]);
+
 	for (i = 0; i < 500; i++) {
 		struct htb_class *cl;
 		long diff;
 		struct rb_node *p = q->wait_pq[level].rb_node;
-		if (!p) return 0;
-		while (p->rb_left) p = p->rb_left;
+		if (!p)
+			return 0;
+		while (p->rb_left)
+			p = p->rb_left;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
 		if (time_after(cl->pq_key, q->jiffies)) {
-			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
 			return cl->pq_key - q->jiffies;
 		}
-		htb_safe_rb_erase(p,q->wait_pq+level);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer);
-#ifdef HTB_DEBUG
-		if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
-			if (net_ratelimit())
-				printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
-				       cl->classid, diff,
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-				       q->now.tv_sec * 1000000ULL + q->now.tv_usec,
-				       cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec,
-#else
-				       (unsigned long long) q->now,
-				       (unsigned long long) cl->t_c,
-#endif
-				       q->jiffies);
-			diff = 1000;
-		}
-#endif
-		htb_change_class_mode(q,cl,&diff);
+		htb_safe_rb_erase(p, q->wait_pq + level);
+		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		htb_change_class_mode(q, cl, &diff);
 		if (cl->cmode != HTB_CAN_SEND)
-			htb_add_to_wait_tree (q,cl,diff,2);
+			htb_add_to_wait_tree(q, cl, diff);
 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "htb: too many events !\n");
-	return HZ/10;
+	return HZ / 10;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
    is no such one exists. */
-static struct rb_node *
-htb_id_find_next_upper(int prio,struct rb_node *n,u32 id)
+static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
+					      u32 id)
 {
 	struct rb_node *r = NULL;
 	while (n) {
-		struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
-		if (id == cl->classid) return n;
-		
+		struct htb_class *cl =
+		    rb_entry(n, struct htb_class, node[prio]);
+		if (id == cl->classid)
+			return n;
+
 		if (id > cl->classid) {
 			n = n->rb_right;
 		} else {
@@ -965,49 +834,49 @@
  *
  * Find leaf where current feed pointers points to.
  */
-static struct htb_class *
-htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid)
+static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
+					 struct rb_node **pptr, u32 * pid)
 {
 	int i;
 	struct {
 		struct rb_node *root;
 		struct rb_node **pptr;
 		u32 *pid;
-	} stk[TC_HTB_MAXDEPTH],*sp = stk;
-	
+	} stk[TC_HTB_MAXDEPTH], *sp = stk;
+
 	BUG_TRAP(tree->rb_node);
 	sp->root = tree->rb_node;
 	sp->pptr = pptr;
 	sp->pid = pid;
 
 	for (i = 0; i < 65535; i++) {
-		HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
-		
-		if (!*sp->pptr && *sp->pid) { 
+		if (!*sp->pptr && *sp->pid) {
 			/* ptr was invalidated but id is valid - try to recover 
 			   the original or next ptr */
-			*sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+			*sp->pptr =
+			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 		}
-		*sp->pid = 0; /* ptr is valid now so that remove this hint as it
-			         can become out of date quickly */
-		if (!*sp->pptr) { /* we are at right end; rewind & go up */
+		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
+				   can become out of date quickly */
+		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 			*sp->pptr = sp->root;
-			while ((*sp->pptr)->rb_left) 
+			while ((*sp->pptr)->rb_left)
 				*sp->pptr = (*sp->pptr)->rb_left;
 			if (sp > stk) {
 				sp--;
-				BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
-				htb_next_rb_node (sp->pptr);
+				BUG_TRAP(*sp->pptr);
+				if (!*sp->pptr)
+					return NULL;
+				htb_next_rb_node(sp->pptr);
 			}
 		} else {
 			struct htb_class *cl;
-			cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
-			HTB_CHCL(cl);
-			if (!cl->level) 
+			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
+			if (!cl->level)
 				return cl;
 			(++sp)->root = cl->un.inner.feed[prio].rb_node;
-			sp->pptr = cl->un.inner.ptr+prio;
-			sp->pid = cl->un.inner.last_ptr_id+prio;
+			sp->pptr = cl->un.inner.ptr + prio;
+			sp->pid = cl->un.inner.last_ptr_id + prio;
 		}
 	}
 	BUG_TRAP(0);
@@ -1016,21 +885,21 @@
 
 /* dequeues packet at given priority and level; call only if
    you are sure that there is active class at prio/level */
-static struct sk_buff *
-htb_dequeue_tree(struct htb_sched *q,int prio,int level)
+static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
+					int level)
 {
 	struct sk_buff *skb = NULL;
-	struct htb_class *cl,*start;
+	struct htb_class *cl, *start;
 	/* look initial class up in the row */
-	start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
-			q->ptr[level]+prio,q->last_ptr_id[level]+prio);
-	
+	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
+
 	do {
 next:
-		BUG_TRAP(cl); 
-		if (!cl) return NULL;
-		HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
-				prio,level,cl->classid,cl->un.leaf.deficit[level]);
+		BUG_TRAP(cl);
+		if (!cl)
+			return NULL;
 
 		/* class can be empty - it is unlikely but can be true if leaf
 		   qdisc drops packets in enqueue routine or if someone used
@@ -1038,64 +907,69 @@
 		   simply deactivate and skip such class */
 		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 			struct htb_class *next;
-			htb_deactivate(q,cl);
+			htb_deactivate(q, cl);
 
 			/* row/level might become empty */
 			if ((q->row_mask[level] & (1 << prio)) == 0)
-				return NULL; 
-			
-			next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
-					prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+				return NULL;
 
-			if (cl == start) /* fix start if we just deleted it */
+			next = htb_lookup_leaf(q->row[level] + prio,
+					       prio, q->ptr[level] + prio,
+					       q->last_ptr_id[level] + prio);
+
+			if (cl == start)	/* fix start if we just deleted it */
 				start = next;
 			cl = next;
 			goto next;
 		}
-	
-		if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
+
+		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+		if (likely(skb != NULL))
 			break;
 		if (!cl->warned) {
-			printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
+			printk(KERN_WARNING
+			       "htb: class %X isn't work conserving ?!\n",
+			       cl->classid);
 			cl->warned = 1;
 		}
 		q->nwc_hit++;
-		htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
-		cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
-				q->last_ptr_id[level]+prio);
+		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+				  ptr[0]) + prio);
+		cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
 
 	} while (cl != start);
 
 	if (likely(skb != NULL)) {
 		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
-			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
-				level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
 			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
-			htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
+			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+					  ptr[0]) + prio);
 		}
 		/* this used to be after charge_class but this constelation
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
-			htb_deactivate (q,cl);
-		htb_charge_class (q,cl,level,skb->len);
+			htb_deactivate(q, cl);
+		htb_charge_class(q, cl, level, skb->len);
 	}
 	return skb;
 }
 
-static void htb_delay_by(struct Qdisc *sch,long delay)
+static void htb_delay_by(struct Qdisc *sch, long delay)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	if (delay <= 0) delay = 1;
-	if (unlikely(delay > 5*HZ)) {
+	if (delay <= 0)
+		delay = 1;
+	if (unlikely(delay > 5 * HZ)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
-		delay = 5*HZ;
+		delay = 5 * HZ;
 	}
 	/* why don't use jiffies here ? because expires can be in past */
 	mod_timer(&q->timer, q->jiffies + delay);
 	sch->flags |= TCQ_F_THROTTLED;
 	sch->qstats.overlimits++;
-	HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
 }
 
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
@@ -1104,22 +978,19 @@
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
 	long min_delay;
-#ifdef HTB_DEBUG
-	int evs_used = 0;
-#endif
 
 	q->jiffies = jiffies;
-	HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
-			sch->q.qlen);
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
-	if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
+	skb = __skb_dequeue(&q->direct_queue);
+	if (skb != NULL) {
 		sch->flags &= ~TCQ_F_THROTTLED;
 		sch->q.qlen--;
 		return skb;
 	}
 
-	if (!sch->q.qlen) goto fin;
+	if (!sch->q.qlen)
+		goto fin;
 	PSCHED_GET_TIME(q->now);
 
 	min_delay = LONG_MAX;
@@ -1129,21 +1000,19 @@
 		int m;
 		long delay;
 		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
-			delay = htb_do_events(q,level);
-			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
-#ifdef HTB_DEBUG
-			evs_used++;
-#endif
+			delay = htb_do_events(q, level);
+			q->near_ev_cache[level] =
+			    q->jiffies + (delay ? delay : HZ);
 		} else
-			delay = q->near_ev_cache[level] - q->jiffies;	
-		
-		if (delay && min_delay > delay) 
+			delay = q->near_ev_cache[level] - q->jiffies;
+
+		if (delay && min_delay > delay)
 			min_delay = delay;
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
-			int prio = ffz (m);
+			int prio = ffz(m);
 			m |= 1 << prio;
-			skb = htb_dequeue_tree(q,prio,level);
+			skb = htb_dequeue_tree(q, prio, level);
 			if (likely(skb != NULL)) {
 				sch->q.qlen--;
 				sch->flags &= ~TCQ_F_THROTTLED;
@@ -1151,40 +1020,28 @@
 			}
 		}
 	}
-#ifdef HTB_DEBUG
-	if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
-		if (min_delay == LONG_MAX) {
-			printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
-					evs_used,q->jiffies,jiffies);
-			htb_debug_dump(q);
-		} else 
-			printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
-					"too small rate\n",min_delay);
-	}
-#endif
-	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
+	htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
 fin:
-	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
 	return skb;
 }
 
 /* try to drop from each class (by prio) until one succeed */
-static unsigned int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int prio;
 
 	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 		struct list_head *p;
-		list_for_each (p,q->drops+prio) {
+		list_for_each(p, q->drops + prio) {
 			struct htb_class *cl = list_entry(p, struct htb_class,
 							  un.leaf.drop_list);
 			unsigned int len;
-			if (cl->un.leaf.q->ops->drop && 
-				(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+			if (cl->un.leaf.q->ops->drop &&
+			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 				sch->q.qlen--;
 				if (!cl->un.leaf.q->q.qlen)
-					htb_deactivate (q,cl);
+					htb_deactivate(q, cl);
 				return len;
 			}
 		}
@@ -1194,29 +1051,25 @@
 
 /* reset all classes */
 /* always caled under BH & queue lock */
-static void htb_reset(struct Qdisc* sch)
+static void htb_reset(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int i;
-	HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
 
 	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *p;
-		list_for_each (p,q->hash+i) {
-			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		struct hlist_node *p;
+		struct htb_class *cl;
+
+		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 			if (cl->level)
-				memset(&cl->un.inner,0,sizeof(cl->un.inner));
+				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
-				if (cl->un.leaf.q) 
+				if (cl->un.leaf.q)
 					qdisc_reset(cl->un.leaf.q);
 				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
-#ifdef HTB_DEBUG
-			cl->pq_node.rb_color = -1;
-			memset(cl->node,255,sizeof(cl->node));
-#endif
 
 		}
 	}
@@ -1224,12 +1077,12 @@
 	del_timer(&q->timer);
 	__skb_queue_purge(&q->direct_queue);
 	sch->q.qlen = 0;
-	memset(q->row,0,sizeof(q->row));
-	memset(q->row_mask,0,sizeof(q->row_mask));
-	memset(q->wait_pq,0,sizeof(q->wait_pq));
-	memset(q->ptr,0,sizeof(q->ptr));
+	memset(q->row, 0, sizeof(q->row));
+	memset(q->row_mask, 0, sizeof(q->row_mask));
+	memset(q->wait_pq, 0, sizeof(q->wait_pq));
+	memset(q->ptr, 0, sizeof(q->ptr));
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops+i);
+		INIT_LIST_HEAD(q->drops + i);
 }
 
 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
@@ -1238,36 +1091,31 @@
 	struct rtattr *tb[TCA_HTB_INIT];
 	struct tc_htb_glob *gopt;
 	int i;
-#ifdef HTB_DEBUG
-	printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
-			  HTB_VER >> 16,HTB_VER & 0xffff);
-#endif
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) ||
-			tb[TCA_HTB_INIT-1] == NULL ||
-			RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
+	    tb[TCA_HTB_INIT - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) {
 		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 		return -EINVAL;
 	}
-	gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
+	gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]);
 	if (gopt->version != HTB_VER >> 16) {
-		printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
-				HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
+		printk(KERN_ERR
+		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
 		return -EINVAL;
 	}
-	q->debug = gopt->debug;
-	HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
 
 	INIT_LIST_HEAD(&q->root);
 	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_LIST_HEAD(q->hash+i);
+		INIT_HLIST_HEAD(q->hash + i);
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops+i);
+		INIT_LIST_HEAD(q->drops + i);
 
 	init_timer(&q->timer);
 	skb_queue_head_init(&q->direct_queue);
 
 	q->direct_qlen = sch->dev->tx_queue_len;
-	if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
+	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
 	q->timer.function = htb_timer;
 	q->timer.data = (unsigned long)sch;
@@ -1289,80 +1137,72 @@
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
-	HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 	gopt.direct_pkts = q->direct_pkts;
 
-#ifdef HTB_DEBUG
-	if (HTB_DBG_COND(0,2))
-		htb_debug_dump(q);
-#endif
 	gopt.version = HTB_VER;
 	gopt.rate2quantum = q->rate2quantum;
 	gopt.defcls = q->defcls;
-	gopt.debug = q->debug;
-	rta = (struct rtattr*)b;
+	gopt.debug = 0;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	rta->rta_len = skb->tail - b;
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	skb_trim(skb, skb->tail - skb->data);
 	return -1;
 }
 
 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
-	struct sk_buff *skb, struct tcmsg *tcm)
+			  struct sk_buff *skb, struct tcmsg *tcm)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
-	struct htb_class *cl = (struct htb_class*)arg;
-	unsigned char	 *b = skb->tail;
+	struct htb_class *cl = (struct htb_class *)arg;
+	unsigned char *b = skb->tail;
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
-	HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
-
-	HTB_QLOCK(sch);
+	spin_lock_bh(&sch->dev->queue_lock);
 	tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->classid;
 	if (!cl->level && cl->un.leaf.q)
 		tcm->tcm_info = cl->un.leaf.q->handle;
 
-	rta = (struct rtattr*)b;
+	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 
-	memset (&opt,0,sizeof(opt));
+	memset(&opt, 0, sizeof(opt));
 
-	opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
-	opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
-	opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
-	opt.level = cl->level; 
+	opt.rate = cl->rate->rate;
+	opt.buffer = cl->buffer;
+	opt.ceil = cl->ceil->rate;
+	opt.cbuffer = cl->cbuffer;
+	opt.quantum = cl->un.leaf.quantum;
+	opt.prio = cl->un.leaf.prio;
+	opt.level = cl->level;
 	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 	rta->rta_len = skb->tail - b;
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
-	HTB_QUNLOCK(sch);
+	spin_unlock_bh(&sch->dev->queue_lock);
 	skb_trim(skb, b - skb->data);
 	return -1;
 }
 
 static int
-htb_dump_class_stats(struct Qdisc *sch, unsigned long arg,
-	struct gnet_dump *d)
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 #ifdef HTB_RATECM
-	cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
-	cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
+	cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
+	cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
 #endif
 
 	if (!cl->level && cl->un.leaf.q)
@@ -1379,21 +1219,22 @@
 }
 
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-	struct Qdisc **old)
+		     struct Qdisc **old)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (cl && !cl->level) {
-		if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
-					&pfifo_qdisc_ops)) == NULL)
-					return -ENOBUFS;
+		if (new == NULL && (new = qdisc_create_dflt(sch->dev,
+							    &pfifo_qdisc_ops))
+		    == NULL)
+			return -ENOBUFS;
 		sch_tree_lock(sch);
 		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
 			if (cl->prio_activity)
-				htb_deactivate (qdisc_priv(sch),cl);
+				htb_deactivate(qdisc_priv(sch), cl);
 
 			/* TODO: is it correct ? Why CBQ doesn't do it ? */
-			sch->q.qlen -= (*old)->q.qlen;	
+			sch->q.qlen -= (*old)->q.qlen;
 			qdisc_reset(*old);
 		}
 		sch_tree_unlock(sch);
@@ -1402,20 +1243,16 @@
 	return -ENOENT;
 }
 
-static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
 {
-	struct htb_class *cl = (struct htb_class*)arg;
+	struct htb_class *cl = (struct htb_class *)arg;
 	return (cl && !cl->level) ? cl->un.leaf.q : NULL;
 }
 
 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
-	struct htb_class *cl = htb_find(classid,sch);
-	HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
-	if (cl) 
+	struct htb_class *cl = htb_find(classid, sch);
+	if (cl)
 		cl->refcnt++;
 	return (unsigned long)cl;
 }
@@ -1430,10 +1267,9 @@
 	}
 }
 
-static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
+static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
 	if (!cl->level) {
 		BUG_TRAP(cl->un.leaf.q);
 		sch->q.qlen -= cl->un.leaf.q->q.qlen;
@@ -1441,45 +1277,45 @@
 	}
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
-	
-	htb_destroy_filters (&cl->filter_list);
-	
-	while (!list_empty(&cl->children)) 
-		htb_destroy_class (sch,list_entry(cl->children.next,
-					struct htb_class,sibling));
+
+	htb_destroy_filters(&cl->filter_list);
+
+	while (!list_empty(&cl->children))
+		htb_destroy_class(sch, list_entry(cl->children.next,
+						  struct htb_class, sibling));
 
 	/* note: this delete may happen twice (see htb_delete) */
-	list_del(&cl->hlist);
+	if (!hlist_unhashed(&cl->hlist))
+		hlist_del(&cl->hlist);
 	list_del(&cl->sibling);
-	
+
 	if (cl->prio_activity)
-		htb_deactivate (q,cl);
-	
+		htb_deactivate(q, cl);
+
 	if (cl->cmode != HTB_CAN_SEND)
-		htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
-	
+		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
 	kfree(cl);
 }
 
 /* always caled under BH & queue lock */
-static void htb_destroy(struct Qdisc* sch)
+static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	HTB_DBG(0,1,"htb_destroy q=%p\n",q);
 
-	del_timer_sync (&q->timer);
+	del_timer_sync(&q->timer);
 #ifdef HTB_RATECM
-	del_timer_sync (&q->rttim);
+	del_timer_sync(&q->rttim);
 #endif
 	/* This line used to be after htb_destroy_class call below
 	   and surprisingly it worked in 2.4. But it must precede it 
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
 	htb_destroy_filters(&q->filter_list);
-	
-	while (!list_empty(&q->root)) 
-		htb_destroy_class (sch,list_entry(q->root.next,
-					struct htb_class,sibling));
+
+	while (!list_empty(&q->root))
+		htb_destroy_class(sch, list_entry(q->root.next,
+						  struct htb_class, sibling));
 
 	__skb_queue_purge(&q->direct_queue);
 }
@@ -1487,24 +1323,25 @@
 static int htb_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = (struct htb_class*)arg;
-	HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	// TODO: why don't allow to delete subtree ? references ? does
 	// tc subsys quarantee us that in htb_destroy it holds no class
 	// refs so that we can remove children safely there ?
 	if (!list_empty(&cl->children) || cl->filter_cnt)
 		return -EBUSY;
-	
+
 	sch_tree_lock(sch);
-	
+
 	/* delete from hash and active; remainder in destroy_class */
-	list_del_init(&cl->hlist);
+	if (!hlist_unhashed(&cl->hlist))
+		hlist_del(&cl->hlist);
+
 	if (cl->prio_activity)
-		htb_deactivate (q,cl);
+		htb_deactivate(q, cl);
 
 	if (--cl->refcnt == 0)
-		htb_destroy_class(sch,cl);
+		htb_destroy_class(sch, cl);
 
 	sch_tree_unlock(sch);
 	return 0;
@@ -1512,45 +1349,46 @@
 
 static void htb_put(struct Qdisc *sch, unsigned long arg)
 {
-#ifdef HTB_DEBUG
-	struct htb_sched *q = qdisc_priv(sch);
-#endif
-	struct htb_class *cl = (struct htb_class*)arg;
-	HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
+	struct htb_class *cl = (struct htb_class *)arg;
 
 	if (--cl->refcnt == 0)
-		htb_destroy_class(sch,cl);
+		htb_destroy_class(sch, cl);
 }
 
-static int htb_change_class(struct Qdisc *sch, u32 classid, 
-		u32 parentid, struct rtattr **tca, unsigned long *arg)
+static int htb_change_class(struct Qdisc *sch, u32 classid,
+			    u32 parentid, struct rtattr **tca,
+			    unsigned long *arg)
 {
 	int err = -EINVAL;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = (struct htb_class*)*arg,*parent;
-	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct htb_class *cl = (struct htb_class *)*arg, *parent;
+	struct rtattr *opt = tca[TCA_OPTIONS - 1];
 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct rtattr *tb[TCA_HTB_RTAB];
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
-			tb[TCA_HTB_PARMS-1] == NULL ||
-			RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
+	    tb[TCA_HTB_PARMS - 1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt))
 		goto failure;
-	
-	parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
 
-	hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
-	HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
-	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
-	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
-	if (!rtab || !ctab) goto failure;
+	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
 
-	if (!cl) { /* new class */
+	hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]);
+
+	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]);
+	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]);
+	if (!rtab || !ctab)
+		goto failure;
+
+	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
+		int prio;
+
 		/* check for valid classid */
-		if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
+		if (!classid || TC_H_MAJ(classid ^ sch->handle)
+		    || htb_find(classid, sch))
 			goto failure;
 
 		/* check maximal depth */
@@ -1561,15 +1399,16 @@
 		err = -ENOBUFS;
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
-		
+
 		cl->refcnt = 1;
 		INIT_LIST_HEAD(&cl->sibling);
-		INIT_LIST_HEAD(&cl->hlist);
+		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-#ifdef HTB_DEBUG
-		cl->magic = HTB_CMAGIC;
-#endif
+		RB_CLEAR_NODE(&cl->pq_node);
+
+		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
+			RB_CLEAR_NODE(&cl->node[prio]);
 
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		   so that can't be used inside of sch_tree_lock
@@ -1579,53 +1418,53 @@
 		if (parent && !parent->level) {
 			/* turn parent into inner node */
 			sch->q.qlen -= parent->un.leaf.q->q.qlen;
-			qdisc_destroy (parent->un.leaf.q);
-			if (parent->prio_activity) 
-				htb_deactivate (q,parent);
+			qdisc_destroy(parent->un.leaf.q);
+			if (parent->prio_activity)
+				htb_deactivate(q, parent);
 
 			/* remove from evt list because of level change */
 			if (parent->cmode != HTB_CAN_SEND) {
-				htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
+				htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
 				parent->cmode = HTB_CAN_SEND;
 			}
 			parent->level = (parent->parent ? parent->parent->level
-					: TC_HTB_MAXDEPTH) - 1;
-			memset (&parent->un.inner,0,sizeof(parent->un.inner));
+					 : TC_HTB_MAXDEPTH) - 1;
+			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
 		}
 		/* leaf (we) needs elementary qdisc */
 		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
-		cl->classid = classid; cl->parent = parent;
+		cl->classid = classid;
+		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
-		cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */
+		cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60);	/* 1min */
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
-		list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
-		list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
-#ifdef HTB_DEBUG
-		{ 
-			int i;
-			for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
-			cl->pq_node.rb_color = -1;
-		}
-#endif
-	} else sch_tree_lock(sch);
+		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
+		list_add_tail(&cl->sibling,
+			      parent ? &parent->children : &q->root);
+	} else
+		sch_tree_lock(sch);
 
 	/* it used to be a nasty bug here, we have to check that node
-           is really leaf before changing cl->un.leaf ! */
+	   is really leaf before changing cl->un.leaf ! */
 	if (!cl->level) {
 		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
 		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is small. Consider r2q change.\n",
+			       cl->classid);
 			cl->un.leaf.quantum = 1000;
 		}
 		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is big. Consider r2q change.\n",
+			       cl->classid);
 			cl->un.leaf.quantum = 200000;
 		}
 		if (hopt->quantum)
@@ -1636,16 +1475,22 @@
 
 	cl->buffer = hopt->buffer;
 	cl->cbuffer = hopt->cbuffer;
-	if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
-	if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
+	if (cl->rate)
+		qdisc_put_rtab(cl->rate);
+	cl->rate = rtab;
+	if (cl->ceil)
+		qdisc_put_rtab(cl->ceil);
+	cl->ceil = ctab;
 	sch_tree_unlock(sch);
 
 	*arg = (unsigned long)cl;
 	return 0;
 
 failure:
-	if (rtab) qdisc_put_rtab(rtab);
-	if (ctab) qdisc_put_rtab(ctab);
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ctab)
+		qdisc_put_rtab(ctab);
 	return err;
 }
 
@@ -1654,28 +1499,28 @@
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
 	struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
-	HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
+
 	return fl;
 }
 
 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
-	u32 classid)
+				     u32 classid)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = htb_find (classid,sch);
-	HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
+	struct htb_class *cl = htb_find(classid, sch);
+
 	/*if (cl && !cl->level) return 0;
-	  The line above used to be there to prevent attaching filters to 
-	  leaves. But at least tc_index filter uses this just to get class 
-	  for other reasons so that we have to allow for it.
-	  ----
-	  19.6.2002 As Werner explained it is ok - bind filter is just
-	  another way to "lock" the class - unlike "get" this lock can
-	  be broken by class during destroy IIUC.
+	   The line above used to be there to prevent attaching filters to
+	   leaves. But at least tc_index filter uses this just to get class
+	   for other reasons so that we have to allow for it.
+	   ----
+	   19.6.2002 As Werner explained it is ok - bind filter is just
+	   another way to "lock" the class - unlike "get" this lock can
+	   be broken by class during destroy IIUC.
 	 */
-	if (cl) 
-		cl->filter_cnt++; 
-	else 
+	if (cl)
+		cl->filter_cnt++;
+	else
 		q->filter_cnt++;
 	return (unsigned long)cl;
 }
@@ -1684,10 +1529,10 @@
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
-	HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
-	if (cl) 
-		cl->filter_cnt--; 
-	else 
+
+	if (cl)
+		cl->filter_cnt--;
+	else
 		q->filter_cnt--;
 }
 
@@ -1700,9 +1545,10 @@
 		return;
 
 	for (i = 0; i < HTB_HSIZE; i++) {
-		struct list_head *p;
-		list_for_each (p,q->hash+i) {
-			struct htb_class *cl = list_entry(p,struct htb_class,hlist);
+		struct hlist_node *p;
+		struct htb_class *cl;
+
+		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -1750,12 +1596,13 @@
 
 static int __init htb_module_init(void)
 {
-    return register_qdisc(&htb_qdisc_ops);
+	return register_qdisc(&htb_qdisc_ops);
 }
-static void __exit htb_module_exit(void) 
+static void __exit htb_module_exit(void)
 {
-    unregister_qdisc(&htb_qdisc_ops);
+	unregister_qdisc(&htb_qdisc_ops);
 }
+
 module_init(htb_module_init)
 module_exit(htb_module_exit)
 MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a08ec4c7c..45939ba 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -192,8 +192,8 @@
 	 */
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
-		    || (skb->ip_summed == CHECKSUM_HW
-			&& skb_checksum_help(skb, 0))) {
+		    || (skb->ip_summed == CHECKSUM_PARTIAL
+			&& skb_checksum_help(skb))) {
 			sch->qstats.drops++;
 			return NET_XMIT_DROP;
 		}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ffda1d6..35c49ff 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -173,7 +173,7 @@
 	SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
 
 	/* Free up the HMAC transform. */
-	sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac);
+	crypto_free_hash(sctp_sk(ep->base.sk)->hmac);
 
 	/* Cleanup. */
 	sctp_inq_free(&ep->base.inqueue);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 42b66e7..03f65de 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -228,7 +228,7 @@
 		goto discard_release;
 	nf_reset(skb);
 
-	if (sk_filter(sk, skb, 1))
+	if (sk_filter(sk, skb))
                 goto discard_release;
 
 	/* Create an SCTP packet structure. */
@@ -255,10 +255,13 @@
 	 */
 	sctp_bh_lock_sock(sk);
 
-	if (sock_owned_by_user(sk))
+	if (sock_owned_by_user(sk)) {
+		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
 		sctp_add_backlog(sk, skb);
-	else
+	} else {
+		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
 		sctp_inq_push(&chunk->rcvr->inqueue, chunk);
+	}
 
 	sctp_bh_unlock_sock(sk);
 
@@ -271,6 +274,7 @@
 	return 0;
 
 discard_it:
+	SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
 	kfree_skb(skb);
 	return 0;
 
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cf0c767..cf6deed 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -87,7 +87,7 @@
 /* Put a new packet in an SCTP inqueue.
  * We assume that packet->sctp_hdr is set and in host byte order.
  */
-void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
+void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
 {
 	/* Directly call the packet handling routine. */
 
@@ -96,7 +96,7 @@
 	 * Eventually, we should clean up inqueue to not rely
 	 * on the BH related data structures.
 	 */
-	list_add_tail(&packet->list, &q->in_chunk_list);
+	list_add_tail(&chunk->list, &q->in_chunk_list);
 	q->immediate.func(q->immediate.data);
 }
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 99c0cef..249e503 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -78,7 +78,6 @@
 
 #include <asm/uaccess.h>
 
-extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static struct notifier_block sctp_inet6addr_notifier = {
 	.notifier_call = sctp_inetaddr_event,
 };
@@ -322,9 +321,9 @@
 	struct inet6_ifaddr *ifp;
 	struct sctp_sockaddr_entry *addr;
 
-	read_lock(&addrconf_lock);
+	rcu_read_lock();
 	if ((in6_dev = __in6_dev_get(dev)) == NULL) {
-		read_unlock(&addrconf_lock);
+		rcu_read_unlock();
 		return;
 	}
 
@@ -343,7 +342,7 @@
 	}
 
 	read_unlock(&in6_dev->lock);
-	read_unlock(&addrconf_lock);
+	rcu_read_unlock();
 }
 
 /* Initialize a sockaddr_storage from in incoming skb. */
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 30b710c..37074a3 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -467,6 +467,7 @@
 
 	switch(reason) {
 	case SCTP_RTXR_T3_RTX:
+		SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
 		/* Update the retran path if the T3-rtx timer has expired for
 		 * the current retran path.
@@ -475,12 +476,15 @@
 			sctp_assoc_update_retran_path(transport->asoc);
 		break;
 	case SCTP_RTXR_FAST_RTX:
+		SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
 		fast_retransmit = 1;
 		break;
 	case SCTP_RTXR_PMTUD:
-	default:
+		SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
 		break;
+	default:
+		BUG();
 	}
 
 	sctp_retransmit_mark(q, transport, fast_retransmit);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 5b3b0e0..a356d8d 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -57,6 +57,21 @@
 	SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS),
 	SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS),
 	SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS),
+	SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS),
+	SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS),
+	SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS),
+	SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS),
+	SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS),
+	SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS),
+	SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ),
+	SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG),
+	SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS),
+	SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -328,8 +343,8 @@
 			   "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
 			   assoc, sk, sctp_sk(sk)->type, sk->sk_state,
 			   assoc->state, hash, assoc->assoc_id,
-			   (sk->sk_rcvbuf - assoc->rwnd),
 			   assoc->sndbuf_used,
+			   (sk->sk_rcvbuf - assoc->rwnd),
 			   sock_i_uid(sk), sock_i_ino(sk),
 			   epb->bind_addr.port,
 			   assoc->peer.port);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ab03a2..fac7674 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -61,7 +61,7 @@
 #include <net/inet_ecn.h>
 
 /* Global data structures. */
-struct sctp_globals sctp_globals;
+struct sctp_globals sctp_globals __read_mostly;
 struct proc_dir_entry	*proc_net_sctp;
 DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
 
@@ -82,13 +82,6 @@
 kmem_cache_t *sctp_chunk_cachep __read_mostly;
 kmem_cache_t *sctp_bucket_cachep __read_mostly;
 
-extern int sctp_snmp_proc_init(void);
-extern int sctp_snmp_proc_exit(void);
-extern int sctp_eps_proc_init(void);
-extern int sctp_eps_proc_exit(void);
-extern int sctp_assocs_proc_init(void);
-extern int sctp_assocs_proc_exit(void);
-
 /* Return the address of the control sock. */
 struct sock *sctp_get_ctl_sock(void)
 {
@@ -1049,7 +1042,7 @@
 	sctp_rto_beta			= SCTP_RTO_BETA;
 
 	/* Valid.Cookie.Life        - 60  seconds */
-	sctp_valid_cookie_life		= 60 * HZ;
+	sctp_valid_cookie_life		= SCTP_DEFAULT_COOKIE_LIFE;
 
 	/* Whether Cookie Preservative is enabled(1) or not(0) */
 	sctp_cookie_preserve_enable 	= 1;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 17b5092..7745bde 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1282,10 +1282,8 @@
 
 	retval = kmalloc(*cookie_len, GFP_ATOMIC);
 
-	if (!retval) {
-		*cookie_len = 0;
+	if (!retval)
 		goto nodata;
-	}
 
 	/* Clear this memory since we are sending this data structure
 	 * out on the network.
@@ -1321,19 +1319,29 @@
 	       ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
 
   	if (sctp_sk(ep->base.sk)->hmac) {
+		struct hash_desc desc;
+
 		/* Sign the message.  */
 		sg.page = virt_to_page(&cookie->c);
 		sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE;
 		sg.length = bodysize;
 		keylen = SCTP_SECRET_SIZE;
 		key = (char *)ep->secret_key[ep->current_key];
+  		desc.tfm = sctp_sk(ep->base.sk)->hmac;
+  		desc.flags = 0;
 
-		sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
-				 &sg, 1, cookie->signature);
+		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+		    crypto_hash_digest(&desc, &sg, bodysize, cookie->signature))
+			goto free_cookie;
 	}
 
-nodata:
 	return retval;
+
+free_cookie:
+	kfree(retval);
+nodata:
+	*cookie_len = 0;
+	return NULL;
 }
 
 /* Unpack the cookie from COOKIE ECHO chunk, recreating the association.  */
@@ -1354,6 +1362,7 @@
 	sctp_scope_t scope;
 	struct sk_buff *skb = chunk->skb;
 	struct timeval tv;
+	struct hash_desc desc;
 
 	/* Header size is static data prior to the actual cookie, including
 	 * any padding.
@@ -1389,17 +1398,25 @@
 	sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE;
 	sg.length = bodysize;
 	key = (char *)ep->secret_key[ep->current_key];
+	desc.tfm = sctp_sk(ep->base.sk)->hmac;
+	desc.flags = 0;
 
 	memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-	sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg,
-			 1, digest);
+	if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+	    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
+		*error = -SCTP_IERROR_NOMEM;
+		goto fail;
+	}
 
 	if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
 		/* Try the previous key. */
 		key = (char *)ep->secret_key[ep->last_key];
 		memset(digest, 0x00, SCTP_SIGNATURE_SIZE);
-		sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen,
-				 &sg, 1, digest);
+		if (crypto_hash_setkey(desc.tfm, key, keylen) ||
+		    crypto_hash_digest(&desc, &sg, bodysize, digest)) {
+			*error = -SCTP_IERROR_NOMEM;
+			goto fail;
+		}
 
 		if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
 			/* Yikes!  Still bad signature! */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5b5ae79..1c42fe9 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -187,10 +187,9 @@
 	 */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
 					     0, 0, 0, GFP_ATOMIC);
-	if (!ev)
-		goto nomem;
-
-	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+	if (ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+			        SCTP_ULPEVENT(ev));
 
 	/* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint
 	 * will verify that it is in SHUTDOWN-ACK-SENT state, if it is
@@ -215,9 +214,6 @@
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
 	return SCTP_DISPOSITION_DELETE_TCB;
-
-nomem:
-	return SCTP_DISPOSITION_NOMEM;
 }
 
 /*
@@ -347,8 +343,6 @@
 			       GFP_ATOMIC))
 		goto nomem_init;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
-
 	/* B) "Z" shall respond immediately with an INIT ACK chunk.  */
 
 	/* If there are errors need to be reported for unknown parameters,
@@ -360,11 +354,11 @@
 			sizeof(sctp_chunkhdr_t);
 
 	if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0)
-		goto nomem_ack;
+		goto nomem_init;
 
 	repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
 	if (!repl)
-		goto nomem_ack;
+		goto nomem_init;
 
 	/* If there are errors need to be reported for unknown parameters,
 	 * include them in the outgoing INIT ACK as "Unrecognized parameter"
@@ -388,6 +382,8 @@
 		sctp_chunk_free(err_chunk);
 	}
 
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 
 	/*
@@ -400,12 +396,11 @@
 
 	return SCTP_DISPOSITION_DELETE_TCB;
 
-nomem_ack:
-	if (err_chunk)
-		sctp_chunk_free(err_chunk);
 nomem_init:
 	sctp_association_free(new_asoc);
 nomem:
+	if (err_chunk)
+		sctp_chunk_free(err_chunk);
 	return SCTP_DISPOSITION_NOMEM;
 }
 
@@ -600,7 +595,7 @@
 	struct sctp_association *new_asoc;
 	sctp_init_chunk_t *peer_init;
 	struct sctp_chunk *repl;
-	struct sctp_ulpevent *ev;
+	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	int error = 0;
 	struct sctp_chunk *err_chk_p;
 
@@ -659,20 +654,10 @@
 		};
 	}
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
-			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
-	SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
-	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 
-	if (new_asoc->autoclose)
-		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
-				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
-
-	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
-
-	/* Re-build the bind address for the association is done in
+	/* Delay state machine commands until later.
+	 *
+	 * Re-build the bind address for the association is done in
 	 * the sctp_unpack_cookie() already.
 	 */
 	/* This is a brand-new association, so these are not yet side
@@ -687,9 +672,7 @@
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
 	if (!repl)
-		goto nomem_repl;
-
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+		goto nomem_init;
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
@@ -704,28 +687,53 @@
 	if (!ev)
 		goto nomem_ev;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
-
 	/* Sockets API Draft Section 5.3.1.6 	
 	 * When a peer sends a Adaption Layer Indication parameter , SCTP
 	 * delivers this notification to inform the application that of the
 	 * peers requested adaption layer.
 	 */
 	if (new_asoc->peer.adaption_ind) {
-		ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+		ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc,
 							    GFP_ATOMIC);
-		if (!ev)
-			goto nomem_ev;
-
-		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-				SCTP_ULPEVENT(ev));
+		if (!ai_ev)
+			goto nomem_aiev;
 	}
 
+	/* Add all the state machine commands now since we've created
+	 * everything.  This way we don't introduce memory corruptions
+	 * during side-effect processing and correclty count established
+	 * associations.
+	 */
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+			SCTP_STATE(SCTP_STATE_ESTABLISHED));
+	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
+
+	if (new_asoc->autoclose)
+		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
+				SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE));
+
+	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
+
+	/* This will send the COOKIE ACK */
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+
+	/* Queue the ASSOC_CHANGE event */
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
+	/* Send up the Adaptation Layer Indication event */
+	if (ai_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ai_ev));
+
 	return SCTP_DISPOSITION_CONSUME;
 
+nomem_aiev:
+	sctp_ulpevent_free(ev);
 nomem_ev:
 	sctp_chunk_free(repl);
-nomem_repl:
 nomem_init:
 	sctp_association_free(new_asoc);
 nomem:
@@ -1360,10 +1368,8 @@
 	if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type,
 			       sctp_source(chunk),
 			       (sctp_init_chunk_t *)chunk->chunk_hdr,
-			       GFP_ATOMIC)) {
-		retval = SCTP_DISPOSITION_NOMEM;
-		goto nomem_init;
-	}
+			       GFP_ATOMIC))
+		goto nomem;
 
 	/* Make sure no new addresses are being added during the
 	 * restart.   Do not do this check for COOKIE-WAIT state,
@@ -1374,7 +1380,7 @@
 		if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk,
 						 commands)) {
 			retval = SCTP_DISPOSITION_CONSUME;
-			goto cleanup_asoc;
+			goto nomem_retval;
 		}
 	}
 
@@ -1430,17 +1436,17 @@
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 	retval = SCTP_DISPOSITION_CONSUME;
 
+	return retval;
+
+nomem:
+	retval = SCTP_DISPOSITION_NOMEM;
+nomem_retval:
+	if (new_asoc)
+		sctp_association_free(new_asoc);
 cleanup:
 	if (err_chunk)
 		sctp_chunk_free(err_chunk);
 	return retval;
-nomem:
-	retval = SCTP_DISPOSITION_NOMEM;
-	goto cleanup;
-nomem_init:
-cleanup_asoc:
-	sctp_association_free(new_asoc);
-	goto cleanup;
 }
 
 /*
@@ -1611,15 +1617,10 @@
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
 
-	/* Update the content of current association. */
-	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
-
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
 	if (!repl)
 		goto nomem;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
-
 	/* Report association restart to upper layer. */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
 					     new_asoc->c.sinit_num_ostreams,
@@ -1628,6 +1629,9 @@
 	if (!ev)
 		goto nomem_ev;
 
+	/* Update the content of current association. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
 	return SCTP_DISPOSITION_CONSUME;
 
@@ -1751,7 +1755,7 @@
 					sctp_cmd_seq_t *commands,
 					struct sctp_association *new_asoc)
 {
-	struct sctp_ulpevent *ev = NULL;
+	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
 	struct sctp_chunk *repl;
 
 	/* Clarification from Implementor's Guide:
@@ -1778,29 +1782,25 @@
 		 * SCTP user upon reception of a valid COOKIE
 		 * ECHO chunk.
 		 */
-		ev = sctp_ulpevent_make_assoc_change(new_asoc, 0,
+		ev = sctp_ulpevent_make_assoc_change(asoc, 0,
 					     SCTP_COMM_UP, 0,
-					     new_asoc->c.sinit_num_ostreams,
-					     new_asoc->c.sinit_max_instreams,
+					     asoc->c.sinit_num_ostreams,
+					     asoc->c.sinit_max_instreams,
                                              GFP_ATOMIC);
 		if (!ev)
 			goto nomem;
-		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-				SCTP_ULPEVENT(ev));
 
 		/* Sockets API Draft Section 5.3.1.6
 		 * When a peer sends a Adaption Layer Indication parameter,
 		 * SCTP delivers this notification to inform the application
 		 * that of the peers requested adaption layer.
 		 */
-		if (new_asoc->peer.adaption_ind) {
-			ev = sctp_ulpevent_make_adaption_indication(new_asoc,
+		if (asoc->peer.adaption_ind) {
+			ai_ev = sctp_ulpevent_make_adaption_indication(asoc,
 								 GFP_ATOMIC);
-			if (!ev)
+			if (!ai_ev)
 				goto nomem;
 
-			sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-					SCTP_ULPEVENT(ev));
 		}
 	}
 	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
@@ -1809,12 +1809,21 @@
 	if (!repl)
 		goto nomem;
 
+	if (ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	if (ai_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+					SCTP_ULPEVENT(ai_ev));
+
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
 	sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
+	if (ai_ev)
+		sctp_ulpevent_free(ai_ev);
 	if (ev)
 		sctp_ulpevent_free(ev);
 	return SCTP_DISPOSITION_NOMEM;
@@ -2663,9 +2672,11 @@
 		break;
 	case SCTP_IERROR_HIGH_TSN:
 	case SCTP_IERROR_BAD_STREAM:
+		SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_noforce;
 	case SCTP_IERROR_DUP_TSN:
 	case SCTP_IERROR_IGNORE_TSN:
+		SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_force;
 	case SCTP_IERROR_NO_DATA:
 		goto consume;
@@ -3017,7 +3028,6 @@
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
 		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
 						  commands);
-
 	/* 10.2 H) SHUTDOWN COMPLETE notification
 	 *
 	 * When SCTP completes the shutdown procedures (section 9.2) this
@@ -3028,6 +3038,14 @@
 	if (!ev)
 		goto nomem;
 
+	/* ...send a SHUTDOWN COMPLETE chunk to its peer, */
+	reply = sctp_make_shutdown_complete(asoc, chunk);
+	if (!reply)
+		goto nomem_chunk;
+
+	/* Do all the commands now (after allocation), so that we
+	 * have consistent state if memory allocation failes
+	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
 
 	/* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall
@@ -3039,11 +3057,6 @@
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	/* ...send a SHUTDOWN COMPLETE chunk to its peer, */
-	reply = sctp_make_shutdown_complete(asoc, chunk);
-	if (!reply)
-		goto nomem;
-
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
 	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
@@ -3054,6 +3067,8 @@
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 	return SCTP_DISPOSITION_DELETE_TCB;
 
+nomem_chunk:
+	sctp_ulpevent_free(ev);
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
 }
@@ -3652,6 +3667,7 @@
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
+	SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
@@ -4548,6 +4564,8 @@
 {
 	struct sctp_transport *transport = arg;
 
+	SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
+
 	if (asoc->overall_error_count >= asoc->max_retrans) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 				SCTP_ERROR(ETIMEDOUT));
@@ -4616,6 +4634,7 @@
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
+	SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
 	return SCTP_DISPOSITION_CONSUME;
 }
@@ -4650,6 +4669,7 @@
 	int attempts = asoc->init_err_counter + 1;
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
+	SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
 
 	if (attempts <= asoc->max_init_attempts) {
 		bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -4709,6 +4729,7 @@
 	int attempts = asoc->init_err_counter + 1;
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
+	SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
 
 	if (attempts <= asoc->max_init_attempts) {
 		repl = sctp_make_cookie_echo(asoc, NULL);
@@ -4753,6 +4774,8 @@
 	struct sctp_chunk *reply = NULL;
 
 	SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
+	SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
+
 	if (asoc->overall_error_count >= asoc->max_retrans) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 				SCTP_ERROR(ETIMEDOUT));
@@ -4814,6 +4837,8 @@
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
 
+	SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
+
 	/* ADDIP 4.1 B1) Increment the error counters and perform path failure
 	 * detection on the appropriate destination address as defined in
 	 * RFC2960 [5] section 8.1 and 8.2.
@@ -4880,6 +4905,7 @@
 	struct sctp_chunk *reply = NULL;
 
 	SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
+	SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
 
 	reply = sctp_make_abort(asoc, NULL, 0);
 	if (!reply)
@@ -4910,6 +4936,8 @@
 {
 	int disposition;
 
+	SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
+
 	/* From 9.2 Shutdown of an Association
 	 * Upon receipt of the SHUTDOWN primitive from its upper
 	 * layer, the endpoint enters SHUTDOWN-PENDING state and
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dab1594..79c3e07 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2081,13 +2081,13 @@
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
  */
-int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
-				struct sctp_transport   *trans,
-				struct sctp_association *asoc,
-				struct sctp_sock        *sp,
-				int                      hb_change,
-				int                      pmtud_change,
-				int                      sackdelay_change)
+static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
+				       struct sctp_transport   *trans,
+				       struct sctp_association *asoc,
+				       struct sctp_sock        *sp,
+				       int                      hb_change,
+				       int                      pmtud_change,
+				       int                      sackdelay_change)
 {
 	int error;
 
@@ -2970,7 +2970,7 @@
 		goto out;
 	}
 
-	timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 	error = sctp_wait_for_accept(sk, timeo);
 	if (error)
@@ -3045,14 +3045,14 @@
 	sp->initmsg.sinit_num_ostreams   = sctp_max_outstreams;
 	sp->initmsg.sinit_max_instreams  = sctp_max_instreams;
 	sp->initmsg.sinit_max_attempts   = sctp_max_retrans_init;
-	sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max);
+	sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
 
 	/* Initialize default RTO related parameters.  These parameters can
 	 * be modified for with the SCTP_RTOINFO socket option.
 	 */
-	sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial);
-	sp->rtoinfo.srto_max     = jiffies_to_msecs(sctp_rto_max);
-	sp->rtoinfo.srto_min     = jiffies_to_msecs(sctp_rto_min);
+	sp->rtoinfo.srto_initial = sctp_rto_initial;
+	sp->rtoinfo.srto_max     = sctp_rto_max;
+	sp->rtoinfo.srto_min     = sctp_rto_min;
 
 	/* Initialize default association related parameters. These parameters
 	 * can be modified with the SCTP_ASSOCINFO socket option.
@@ -3061,8 +3061,7 @@
 	sp->assocparams.sasoc_number_peer_destinations = 0;
 	sp->assocparams.sasoc_peer_rwnd = 0;
 	sp->assocparams.sasoc_local_rwnd = 0;
-	sp->assocparams.sasoc_cookie_life = 
-		jiffies_to_msecs(sctp_valid_cookie_life);
+	sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
 
 	/* Initialize default event subscriptions. By default, all the
 	 * options are off. 
@@ -3072,10 +3071,10 @@
 	/* Default Peer Address Parameters.  These defaults can
 	 * be modified via SCTP_PEER_ADDR_PARAMS
 	 */
-	sp->hbinterval  = jiffies_to_msecs(sctp_hb_interval);
+	sp->hbinterval  = sctp_hb_interval;
 	sp->pathmaxrxt  = sctp_max_retrans_path;
 	sp->pathmtu     = 0; // allow default discovery
-	sp->sackdelay   = jiffies_to_msecs(sctp_sack_timeout);
+	sp->sackdelay   = sctp_sack_timeout;
 	sp->param_flags = SPP_HB_ENABLE |
 	                  SPP_PMTUD_ENABLE |
 	                  SPP_SACKDELAY_ENABLE;
@@ -4898,7 +4897,7 @@
 int sctp_inet_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
-	struct crypto_tfm *tfm=NULL;
+	struct crypto_hash *tfm = NULL;
 	int err = -EINVAL;
 
 	if (unlikely(backlog < 0))
@@ -4911,7 +4910,7 @@
 
 	/* Allocate HMAC for generating cookie. */
 	if (sctp_hmac_alg) {
-		tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0);
+		tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
 		if (!tfm) {
 			err = -ENOSYS;
 			goto out;
@@ -4937,7 +4936,7 @@
 	sctp_release_sock(sk);
 	return err;
 cleanup:
-	sctp_crypto_free_tfm(tfm);
+	crypto_free_hash(tfm);
 	goto out;
 }
 
@@ -5619,6 +5618,8 @@
 	/* Copy the bind_addr list from the original endpoint to the new
 	 * endpoint so that we can handle restarts properly
 	 */
+	if (PF_INET6 == assoc->base.sk->sk_family)
+		flags = SCTP_ADDR6_ALLOWED;
 	if (assoc->peer.ipv4_address)
 		flags |= SCTP_ADDR4_PEERSUPP;
 	if (assoc->peer.ipv6_address)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index dc6f3ff..633cd17 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -45,9 +45,10 @@
 #include <net/sctp/sctp.h>
 #include <linux/sysctl.h>
 
-static ctl_handler sctp_sysctl_jiffies_ms;
-static long rto_timer_min = 1;
-static long rto_timer_max = 86400000; /* One day */
+static int zero = 0;
+static int one = 1;
+static int timer_max = 86400000; /* ms in one day */
+static int int_max = INT_MAX;
 static long sack_timer_min = 1;
 static long sack_timer_max = 500;
 
@@ -56,45 +57,45 @@
 		.ctl_name	= NET_SCTP_RTO_INITIAL,
 		.procname	= "rto_initial",
 		.data		= &sctp_rto_initial,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_MIN,
 		.procname	= "rto_min",
 		.data		= &sctp_rto_min,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_MAX,
 		.procname	= "rto_max",
 		.data		= &sctp_rto_max,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_VALID_COOKIE_LIFE,
 		.procname	= "valid_cookie_life",
 		.data		= &sctp_valid_cookie_life,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_MAX_BURST,
@@ -102,7 +103,10 @@
 		.data		= &sctp_max_burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_ASSOCIATION_MAX_RETRANS,
@@ -110,7 +114,10 @@
 		.data		= &sctp_max_retrans_association,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_SNDBUF_POLICY,
@@ -118,7 +125,8 @@
 		.data		= &sctp_sndbuf_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RCVBUF_POLICY,
@@ -126,7 +134,8 @@
 		.data		= &sctp_rcvbuf_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_PATH_MAX_RETRANS,
@@ -134,7 +143,10 @@
 		.data		= &sctp_max_retrans_path,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_MAX_INIT_RETRANSMITS,
@@ -142,18 +154,21 @@
 		.data		= &sctp_max_retrans_init,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+		.extra2		= &int_max
 	},
 	{
 		.ctl_name	= NET_SCTP_HB_INTERVAL,
 		.procname	= "hb_interval",
 		.data		= &sctp_hb_interval,
-		.maxlen		= sizeof(long),
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
-		.extra1         = &rto_timer_min,
-		.extra2         = &rto_timer_max
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
 		.ctl_name	= NET_SCTP_PRESERVE_ENABLE,
@@ -161,23 +176,26 @@
 		.data		= &sctp_cookie_preserve_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_ALPHA,
 		.procname	= "rto_alpha_exp_divisor",
 		.data		= &sctp_rto_alpha,
 		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_RTO_BETA,
 		.procname	= "rto_beta_exp_divisor",
 		.data		= &sctp_rto_beta,
 		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_ADDIP_ENABLE,
@@ -185,7 +203,8 @@
 		.data		= &sctp_addip_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_PRSCTP_ENABLE,
@@ -193,7 +212,8 @@
 		.data		= &sctp_prsctp_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec
 	},
 	{
 		.ctl_name	= NET_SCTP_SACK_TIMEOUT,
@@ -201,8 +221,8 @@
 		.data		= &sctp_sack_timeout,
 		.maxlen		= sizeof(long),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_ms_jiffies_minmax,
-		.strategy	= &sctp_sysctl_jiffies_ms,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
 		.extra1         = &sack_timer_min,
 		.extra2         = &sack_timer_max,
 	},
@@ -242,37 +262,3 @@
 {
 	unregister_sysctl_table(sctp_sysctl_header);
 }
-
-/* Strategy function to convert jiffies to milliseconds.  */
-static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen,
-		void __user *oldval, size_t __user *oldlenp,
-		void __user *newval, size_t newlen, void **context) {
-
-	if (oldval) {
-		size_t olen;
-
-		if (oldlenp) {
-			if (get_user(olen, oldlenp))
-				return -EFAULT;
-
-			if (olen != sizeof (int))
-				return -EINVAL;
-		}
-		if (put_user((*(int *)(table->data) * 1000) / HZ,
-			(int __user *)oldval) ||
-		    (oldlenp && put_user(sizeof (int), oldlenp)))
-			return -EFAULT;
-	}
-	if (newval && newlen) {
-		int new;
-
-		if (newlen != sizeof (int))
-			return -EINVAL;
-
-		if (get_user(new, (int __user *)newval))
-			return -EFAULT;
-
-		*(int *)(table->data) = (new * HZ) / 1000;
-	}
-	return 1;
-}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2763aa9..3e5936a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,7 +75,7 @@
 	 * parameter 'RTO.Initial'.
 	 */
 	peer->rtt = 0;
-	peer->rto = sctp_rto_initial;
+	peer->rto = msecs_to_jiffies(sctp_rto_initial);
 	peer->rttvar = 0;
 	peer->srtt = 0;
 	peer->rto_pending = 0;
diff --git a/net/socket.c b/net/socket.c
index 6d261bf..1bc4167 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -42,7 +42,7 @@
  *		Andi Kleen	:	Some small cleanups, optimizations,
  *					and fixed a copy_from_user() bug.
  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
- *		Tigran Aivazian	:	Made listen(2) backlog sanity checks 
+ *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
  *					protocol-independent
  *
  *
@@ -53,17 +53,17 @@
  *
  *
  *	This module is effectively the top level interface to the BSD socket
- *	paradigm. 
+ *	paradigm.
  *
  *	Based upon Swansea University Computer Society NET3.039
  */
 
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/file.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
+#include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -96,25 +96,24 @@
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
-			 size_t size, loff_t pos);
+			     size_t size, loff_t pos);
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
-			  size_t size, loff_t pos);
-static int sock_mmap(struct file *file, struct vm_area_struct * vma);
+			      size_t size, loff_t pos);
+static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 static int sock_close(struct inode *inode, struct file *file);
 static unsigned int sock_poll(struct file *file,
 			      struct poll_table_struct *wait);
-static long sock_ioctl(struct file *file,
-		      unsigned int cmd, unsigned long arg);
+static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file,
-		      unsigned int cmd, unsigned long arg);
+			      unsigned int cmd, unsigned long arg);
 #endif
 static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 			  unsigned long count, loff_t *ppos);
 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
-			  unsigned long count, loff_t *ppos);
+			   unsigned long count, loff_t *ppos);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 
@@ -147,52 +146,8 @@
  *	The protocol list. Each protocol is registered in here.
  */
 
-static struct net_proto_family *net_families[NPROTO];
-
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-static atomic_t net_family_lockct = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(net_family_lock);
-
-/* The strategy is: modifications net_family vector are short, do not
-   sleep and veeery rare, but read access should be free of any exclusive
-   locks.
- */
-
-static void net_family_write_lock(void)
-{
-	spin_lock(&net_family_lock);
-	while (atomic_read(&net_family_lockct) != 0) {
-		spin_unlock(&net_family_lock);
-
-		yield();
-
-		spin_lock(&net_family_lock);
-	}
-}
-
-static __inline__ void net_family_write_unlock(void)
-{
-	spin_unlock(&net_family_lock);
-}
-
-static __inline__ void net_family_read_lock(void)
-{
-	atomic_inc(&net_family_lockct);
-	spin_unlock_wait(&net_family_lock);
-}
-
-static __inline__ void net_family_read_unlock(void)
-{
-	atomic_dec(&net_family_lockct);
-}
-
-#else
-#define net_family_write_lock() do { } while(0)
-#define net_family_write_unlock() do { } while(0)
-#define net_family_read_lock() do { } while(0)
-#define net_family_read_unlock() do { } while(0)
-#endif
-
+static const struct net_proto_family *net_families[NPROTO] __read_mostly;
 
 /*
  *	Statistics counters of the socket lists
@@ -201,19 +156,20 @@
 static DEFINE_PER_CPU(int, sockets_in_use) = 0;
 
 /*
- *	Support routines. Move socket addresses back and forth across the kernel/user
- *	divide and look after the messy bits.
+ * Support routines.
+ * Move socket addresses back and forth across the kernel/user
+ * divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain - 
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
 					   16 for IP, 16 for IPX,
 					   24 for IPv6,
-					   about 80 for AX.25 
+					   about 80 for AX.25
 					   must be at least one bigger than
 					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).  
+					   :unix_mkname()).
 					 */
-					 
+
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
@@ -227,11 +183,11 @@
 
 int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
 {
-	if(ulen<0||ulen>MAX_SOCK_ADDR)
+	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
 		return -EINVAL;
-	if(ulen==0)
+	if (ulen == 0)
 		return 0;
-	if(copy_from_user(kaddr,uaddr,ulen))
+	if (copy_from_user(kaddr, uaddr, ulen))
 		return -EFAULT;
 	return audit_sockaddr(ulen, kaddr);
 }
@@ -252,51 +208,52 @@
  *	length of the data is written over the length limit the user
  *	specified. Zero is returned for a success.
  */
- 
-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen)
+
+int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+		      int __user *ulen)
 {
 	int err;
 	int len;
 
-	if((err=get_user(len, ulen)))
+	err = get_user(len, ulen);
+	if (err)
 		return err;
-	if(len>klen)
-		len=klen;
-	if(len<0 || len> MAX_SOCK_ADDR)
+	if (len > klen)
+		len = klen;
+	if (len < 0 || len > MAX_SOCK_ADDR)
 		return -EINVAL;
-	if(len)
-	{
+	if (len) {
 		if (audit_sockaddr(klen, kaddr))
 			return -ENOMEM;
-		if(copy_to_user(uaddr,kaddr,len))
+		if (copy_to_user(uaddr, kaddr, len))
 			return -EFAULT;
 	}
 	/*
-	 *	"fromlen shall refer to the value before truncation.."
-	 *			1003.1g
+	 *      "fromlen shall refer to the value before truncation.."
+	 *                      1003.1g
 	 */
 	return __put_user(klen, ulen);
 }
 
 #define SOCKFS_MAGIC 0x534F434B
 
-static kmem_cache_t * sock_inode_cachep __read_mostly;
+static kmem_cache_t *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {
 	struct socket_alloc *ei;
-	ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
+
+	ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
 	init_waitqueue_head(&ei->socket.wait);
-	
+
 	ei->socket.fasync_list = NULL;
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
 	ei->socket.ops = NULL;
 	ei->socket.sk = NULL;
 	ei->socket.file = NULL;
-	ei->socket.flags = 0;
 
 	return &ei->vfs_inode;
 }
@@ -307,22 +264,25 @@
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	struct socket_alloc *ei = (struct socket_alloc *) foo;
+	struct socket_alloc *ei = (struct socket_alloc *)foo;
 
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
+	    == SLAB_CTOR_CONSTRUCTOR)
 		inode_init_once(&ei->vfs_inode);
 }
- 
+
 static int init_inodecache(void)
 {
 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
-				sizeof(struct socket_alloc),
-				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
-					SLAB_MEM_SPREAD),
-				init_once, NULL);
+					      sizeof(struct socket_alloc),
+					      0,
+					      (SLAB_HWCACHE_ALIGN |
+					       SLAB_RECLAIM_ACCOUNT |
+					       SLAB_MEM_SPREAD),
+					      init_once,
+					      NULL);
 	if (sock_inode_cachep == NULL)
 		return -ENOMEM;
 	return 0;
@@ -335,7 +295,8 @@
 };
 
 static int sockfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+			 int flags, const char *dev_name, void *data,
+			 struct vfsmount *mnt)
 {
 	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
 			     mnt);
@@ -348,12 +309,13 @@
 	.get_sb =	sockfs_get_sb,
 	.kill_sb =	kill_anon_super,
 };
+
 static int sockfs_delete_dentry(struct dentry *dentry)
 {
 	return 1;
 }
 static struct dentry_operations sockfs_dentry_operations = {
-	.d_delete =	sockfs_delete_dentry,
+	.d_delete = sockfs_delete_dentry,
 };
 
 /*
@@ -477,10 +439,12 @@
 	struct file *file;
 	struct socket *sock;
 
-	if (!(file = fget(fd))) {
+	file = fget(fd);
+	if (!file) {
 		*err = -EBADF;
 		return NULL;
 	}
+
 	sock = sock_from_file(file, err);
 	if (!sock)
 		fput(file);
@@ -505,7 +469,7 @@
 
 /**
  *	sock_alloc	-	allocate a socket
- *	
+ *
  *	Allocate a new inode and socket object. The two are bound together
  *	and initialised. The socket is then returned. If we are out of inodes
  *	NULL is returned.
@@ -513,8 +477,8 @@
 
 static struct socket *sock_alloc(void)
 {
-	struct inode * inode;
-	struct socket * sock;
+	struct inode *inode;
+	struct socket *sock;
 
 	inode = new_inode(sock_mnt->mnt_sb);
 	if (!inode)
@@ -522,7 +486,7 @@
 
 	sock = SOCKET_I(inode);
 
-	inode->i_mode = S_IFSOCK|S_IRWXUGO;
+	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 
@@ -536,7 +500,7 @@
  *	a back door. Remember to keep it shut otherwise you'll let the
  *	creepy crawlies in.
  */
-  
+
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 {
 	return -ENXIO;
@@ -553,9 +517,9 @@
  *
  *	The socket is released from the protocol stack if it has a release
  *	callback, and the inode is then released if the socket is bound to
- *	an inode not a file. 
+ *	an inode not a file.
  */
- 
+
 void sock_release(struct socket *sock)
 {
 	if (sock->ops) {
@@ -575,10 +539,10 @@
 		iput(SOCK_INODE(sock));
 		return;
 	}
-	sock->file=NULL;
+	sock->file = NULL;
 }
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size)
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
@@ -621,14 +585,14 @@
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec,
+	msg->msg_iov = (struct iovec *)vec;
 	msg->msg_iovlen = num;
 	result = sock_sendmsg(sock, msg, size);
 	set_fs(oldfs);
 	return result;
 }
 
-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
 	int err;
@@ -647,14 +611,14 @@
 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 }
 
-int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
+int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
 {
 	struct kiocb iocb;
 	struct sock_iocb siocb;
 	int ret;
 
-        init_sync_kiocb(&iocb, NULL);
+	init_sync_kiocb(&iocb, NULL);
 	iocb.private = &siocb;
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
@@ -662,9 +626,8 @@
 	return ret;
 }
 
-int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 
-		   struct kvec *vec, size_t num,
-		   size_t size, int flags)
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
+		   struct kvec *vec, size_t num, size_t size, int flags)
 {
 	mm_segment_t oldfs = get_fs();
 	int result;
@@ -674,8 +637,7 @@
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec,
-	msg->msg_iovlen = num;
+	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
 	result = sock_recvmsg(sock, msg, size, flags);
 	set_fs(oldfs);
 	return result;
@@ -702,7 +664,8 @@
 }
 
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-		char __user *ubuf, size_t size, struct sock_iocb *siocb)
+					 char __user *ubuf, size_t size,
+					 struct sock_iocb *siocb)
 {
 	if (!is_sync_kiocb(iocb)) {
 		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
@@ -720,20 +683,21 @@
 }
 
 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, struct iovec *iov, unsigned long nr_segs)
+			    struct file *file, struct iovec *iov,
+			    unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
 	size_t size = 0;
 	int i;
 
-        for (i = 0 ; i < nr_segs ; i++)
-                size += iov[i].iov_len;
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iov = (struct iovec *)iov;
 	msg->msg_iovlen = nr_segs;
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
@@ -748,7 +712,7 @@
 	struct msghdr msg;
 	int ret;
 
-        init_sync_kiocb(&iocb, NULL);
+	init_sync_kiocb(&iocb, NULL);
 	iocb.private = &siocb;
 
 	ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
@@ -758,7 +722,7 @@
 }
 
 static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
-			 size_t count, loff_t pos)
+			     size_t count, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
@@ -771,24 +735,25 @@
 	if (!x)
 		return -ENOMEM;
 	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+			    &x->async_iov, 1);
 }
 
 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
-		struct file *file, struct iovec *iov, unsigned long nr_segs)
+			     struct file *file, struct iovec *iov,
+			     unsigned long nr_segs)
 {
 	struct socket *sock = file->private_data;
 	size_t size = 0;
 	int i;
 
-        for (i = 0 ; i < nr_segs ; i++)
-                size += iov[i].iov_len;
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
 
 	msg->msg_name = NULL;
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *) iov;
+	msg->msg_iov = (struct iovec *)iov;
 	msg->msg_iovlen = nr_segs;
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
@@ -815,7 +780,7 @@
 }
 
 static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
-			  size_t count, loff_t pos)
+			      size_t count, loff_t pos)
 {
 	struct sock_iocb siocb, *x;
 
@@ -829,46 +794,48 @@
 		return -ENOMEM;
 
 	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
-			&x->async_iov, 1);
+			     &x->async_iov, 1);
 }
 
-
 /*
  * Atomic setting of ioctl hooks to avoid race
  * with module unload.
  */
 
 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
 
-void brioctl_set(int (*hook)(unsigned int, void __user *))
+void brioctl_set(int (*hook) (unsigned int, void __user *))
 {
 	mutex_lock(&br_ioctl_mutex);
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(brioctl_set);
 
 static DEFINE_MUTEX(vlan_ioctl_mutex);
-static int (*vlan_ioctl_hook)(void __user *arg);
+static int (*vlan_ioctl_hook) (void __user *arg);
 
-void vlan_ioctl_set(int (*hook)(void __user *))
+void vlan_ioctl_set(int (*hook) (void __user *))
 {
 	mutex_lock(&vlan_ioctl_mutex);
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(vlan_ioctl_set);
 
 static DEFINE_MUTEX(dlci_ioctl_mutex);
-static int (*dlci_ioctl_hook)(unsigned int, void __user *);
+static int (*dlci_ioctl_hook) (unsigned int, void __user *);
 
-void dlci_ioctl_set(int (*hook)(unsigned int, void __user *))
+void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 {
 	mutex_lock(&dlci_ioctl_mutex);
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
+
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 /*
@@ -890,8 +857,8 @@
 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 		err = dev_ioctl(cmd, argp);
 	} else
-#endif	/* CONFIG_WIRELESS_EXT */
-	switch (cmd) {
+#endif				/* CONFIG_WIRELESS_EXT */
+		switch (cmd) {
 		case FIOSETOWN:
 		case SIOCSPGRP:
 			err = -EFAULT;
@@ -901,7 +868,8 @@
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
-			err = put_user(sock->file->f_owner.pid, (int __user *)argp);
+			err = put_user(sock->file->f_owner.pid,
+				       (int __user *)argp);
 			break;
 		case SIOCGIFBR:
 		case SIOCSIFBR:
@@ -912,7 +880,7 @@
 				request_module("bridge");
 
 			mutex_lock(&br_ioctl_mutex);
-			if (br_ioctl_hook) 
+			if (br_ioctl_hook)
 				err = br_ioctl_hook(cmd, argp);
 			mutex_unlock(&br_ioctl_mutex);
 			break;
@@ -929,7 +897,7 @@
 			break;
 		case SIOCGIFDIVERT:
 		case SIOCSIFDIVERT:
-		/* Convert this to call through a hook */
+			/* Convert this to call through a hook */
 			err = divert_ioctl(cmd, argp);
 			break;
 		case SIOCADDDLCI:
@@ -954,7 +922,7 @@
 			if (err == -ENOIOCTLCMD)
 				err = dev_ioctl(cmd, argp);
 			break;
-	}
+		}
 	return err;
 }
 
@@ -962,7 +930,7 @@
 {
 	int err;
 	struct socket *sock = NULL;
-	
+
 	err = security_socket_create(family, type, protocol, 1);
 	if (err)
 		goto out;
@@ -973,26 +941,33 @@
 		goto out;
 	}
 
-	security_socket_post_create(sock, family, type, protocol, 1);
 	sock->type = type;
+	err = security_socket_post_create(sock, family, type, protocol, 1);
+	if (err)
+		goto out_release;
+
 out:
 	*res = sock;
 	return err;
+out_release:
+	sock_release(sock);
+	sock = NULL;
+	goto out;
 }
 
 /* No kernel lock held - perfect */
-static unsigned int sock_poll(struct file *file, poll_table * wait)
+static unsigned int sock_poll(struct file *file, poll_table *wait)
 {
 	struct socket *sock;
 
 	/*
-	 *	We can't return errors to poll, so it's either yes or no. 
+	 *      We can't return errors to poll, so it's either yes or no.
 	 */
 	sock = file->private_data;
 	return sock->ops->poll(file, sock, wait);
 }
 
-static int sock_mmap(struct file * file, struct vm_area_struct * vma)
+static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct socket *sock = file->private_data;
 
@@ -1002,12 +977,11 @@
 static int sock_close(struct inode *inode, struct file *filp)
 {
 	/*
-	 *	It was possible the inode is NULL we were 
-	 *	closing an unfinished socket. 
+	 *      It was possible the inode is NULL we were
+	 *      closing an unfinished socket.
 	 */
 
-	if (!inode)
-	{
+	if (!inode) {
 		printk(KERN_DEBUG "sock_close: NULL inode\n");
 		return 0;
 	}
@@ -1033,57 +1007,52 @@
 
 static int sock_fasync(int fd, struct file *filp, int on)
 {
-	struct fasync_struct *fa, *fna=NULL, **prev;
+	struct fasync_struct *fa, *fna = NULL, **prev;
 	struct socket *sock;
 	struct sock *sk;
 
-	if (on)
-	{
+	if (on) {
 		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-		if(fna==NULL)
+		if (fna == NULL)
 			return -ENOMEM;
 	}
 
 	sock = filp->private_data;
 
-	if ((sk=sock->sk) == NULL) {
+	sk = sock->sk;
+	if (sk == NULL) {
 		kfree(fna);
 		return -EINVAL;
 	}
 
 	lock_sock(sk);
 
-	prev=&(sock->fasync_list);
+	prev = &(sock->fasync_list);
 
-	for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
-		if (fa->fa_file==filp)
+	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
+		if (fa->fa_file == filp)
 			break;
 
-	if(on)
-	{
-		if(fa!=NULL)
-		{
+	if (on) {
+		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
-			fa->fa_fd=fd;
+			fa->fa_fd = fd;
 			write_unlock_bh(&sk->sk_callback_lock);
 
 			kfree(fna);
 			goto out;
 		}
-		fna->fa_file=filp;
-		fna->fa_fd=fd;
-		fna->magic=FASYNC_MAGIC;
-		fna->fa_next=sock->fasync_list;
+		fna->fa_file = filp;
+		fna->fa_fd = fd;
+		fna->magic = FASYNC_MAGIC;
+		fna->fa_next = sock->fasync_list;
 		write_lock_bh(&sk->sk_callback_lock);
-		sock->fasync_list=fna;
+		sock->fasync_list = fna;
 		write_unlock_bh(&sk->sk_callback_lock);
-	}
-	else
-	{
-		if (fa!=NULL)
-		{
+	} else {
+		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
-			*prev=fa->fa_next;
+			*prev = fa->fa_next;
 			write_unlock_bh(&sk->sk_callback_lock);
 			kfree(fa);
 		}
@@ -1100,10 +1069,9 @@
 {
 	if (!sock || !sock->fasync_list)
 		return -1;
-	switch (how)
-	{
+	switch (how) {
 	case 1:
-		
+
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 			break;
 		goto call_kill;
@@ -1112,7 +1080,7 @@
 			break;
 		/* fall through */
 	case 0:
-	call_kill:
+call_kill:
 		__kill_fasync(sock->fasync_list, SIGIO, band);
 		break;
 	case 3:
@@ -1121,13 +1089,15 @@
 	return 0;
 }
 
-static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
+static int __sock_create(int family, int type, int protocol,
+			 struct socket **res, int kern)
 {
 	int err;
 	struct socket *sock;
+	const struct net_proto_family *pf;
 
 	/*
-	 *	Check protocol is in range
+	 *      Check protocol is in range
 	 */
 	if (family < 0 || family >= NPROTO)
 		return -EAFNOSUPPORT;
@@ -1140,10 +1110,11 @@
 	   deadlock in module load.
 	 */
 	if (family == PF_INET && type == SOCK_PACKET) {
-		static int warned; 
+		static int warned;
 		if (!warned) {
 			warned = 1;
-			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
+			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+			       current->comm);
 		}
 		family = PF_PACKET;
 	}
@@ -1151,79 +1122,84 @@
 	err = security_socket_create(family, type, protocol, kern);
 	if (err)
 		return err;
-		
+
+	/*
+	 *	Allocate the socket and allow the family to set things up. if
+	 *	the protocol is 0, the family is instructed to select an appropriate
+	 *	default.
+	 */
+	sock = sock_alloc();
+	if (!sock) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "socket: no more sockets\n");
+		return -ENFILE;	/* Not exactly a match, but its the
+				   closest posix thing */
+	}
+
+	sock->type = type;
+
 #if defined(CONFIG_KMOD)
-	/* Attempt to load a protocol module if the find failed. 
-	 * 
-	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
+	/* Attempt to load a protocol module if the find failed.
+	 *
+	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
 	 * requested real, full-featured networking support upon configuration.
 	 * Otherwise module support will break!
 	 */
-	if (net_families[family]==NULL)
-	{
-		request_module("net-pf-%d",family);
-	}
+	if (net_families[family] == NULL)
+		request_module("net-pf-%d", family);
 #endif
 
-	net_family_read_lock();
-	if (net_families[family] == NULL) {
-		err = -EAFNOSUPPORT;
-		goto out;
-	}
-
-/*
- *	Allocate the socket and allow the family to set things up. if
- *	the protocol is 0, the family is instructed to select an appropriate
- *	default.
- */
-
-	if (!(sock = sock_alloc())) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "socket: no more sockets\n");
-		err = -ENFILE;		/* Not exactly a match, but its the
-					   closest posix thing */
-		goto out;
-	}
-
-	sock->type  = type;
+	rcu_read_lock();
+	pf = rcu_dereference(net_families[family]);
+	err = -EAFNOSUPPORT;
+	if (!pf)
+		goto out_release;
 
 	/*
 	 * We will call the ->create function, that possibly is in a loadable
 	 * module, so we have to bump that loadable module refcnt first.
 	 */
-	err = -EAFNOSUPPORT;
-	if (!try_module_get(net_families[family]->owner))
+	if (!try_module_get(pf->owner))
 		goto out_release;
 
-	if ((err = net_families[family]->create(sock, protocol)) < 0) {
-		sock->ops = NULL;
+	/* Now protected by module ref count */
+	rcu_read_unlock();
+
+	err = pf->create(sock, protocol);
+	if (err < 0)
 		goto out_module_put;
-	}
 
 	/*
 	 * Now to bump the refcnt of the [loadable] module that owns this
 	 * socket at sock_release time we decrement its refcnt.
 	 */
-	if (!try_module_get(sock->ops->owner)) {
-		sock->ops = NULL;
-		goto out_module_put;
-	}
+	if (!try_module_get(sock->ops->owner))
+		goto out_module_busy;
+
 	/*
 	 * Now that we're done with the ->create function, the [loadable]
 	 * module can have its refcnt decremented
 	 */
-	module_put(net_families[family]->owner);
+	module_put(pf->owner);
+	err = security_socket_post_create(sock, family, type, protocol, kern);
+	if (err)
+		goto out_release;
 	*res = sock;
-	security_socket_post_create(sock, family, type, protocol, kern);
 
-out:
-	net_family_read_unlock();
-	return err;
+	return 0;
+
+out_module_busy:
+	err = -EAFNOSUPPORT;
 out_module_put:
-	module_put(net_families[family]->owner);
-out_release:
+	sock->ops = NULL;
+	module_put(pf->owner);
+out_sock_release:
 	sock_release(sock);
-	goto out;
+	return err;
+
+out_release:
+	rcu_read_unlock();
+	goto out_sock_release;
 }
 
 int sock_create(int family, int type, int protocol, struct socket **res)
@@ -1262,7 +1238,8 @@
  *	Create a pair of connected sockets.
  */
 
-asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
+asmlinkage long sys_socketpair(int family, int type, int protocol,
+			       int __user *usockvec)
 {
 	struct socket *sock1, *sock2;
 	int fd1, fd2, err;
@@ -1281,7 +1258,7 @@
 		goto out_release_1;
 
 	err = sock1->ops->socketpair(sock1, sock2);
-	if (err < 0) 
+	if (err < 0)
 		goto out_release_both;
 
 	fd1 = fd2 = -1;
@@ -1300,7 +1277,7 @@
 	 * Not kernel problem.
 	 */
 
-	err = put_user(fd1, &usockvec[0]); 
+	err = put_user(fd1, &usockvec[0]);
 	if (!err)
 		err = put_user(fd2, &usockvec[1]);
 	if (!err)
@@ -1311,19 +1288,18 @@
 	return err;
 
 out_close_1:
-        sock_release(sock2);
+	sock_release(sock2);
 	sys_close(fd1);
 	return err;
 
 out_release_both:
-        sock_release(sock2);
+	sock_release(sock2);
 out_release_1:
-        sock_release(sock1);
+	sock_release(sock1);
 out:
 	return err;
 }
 
-
 /*
  *	Bind a name to a socket. Nothing much to do here since it's
  *	the protocol's responsibility to handle the local address.
@@ -1338,35 +1314,39 @@
 	char address[MAX_SOCK_ADDR];
 	int err, fput_needed;
 
-	if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
-	{
-		if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
-			err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if(sock) {
+		err = move_addr_to_kernel(umyaddr, addrlen, address);
+		if (err >= 0) {
+			err = security_socket_bind(sock,
+						   (struct sockaddr *)address,
+						   addrlen);
 			if (!err)
 				err = sock->ops->bind(sock,
-					(struct sockaddr *)address, addrlen);
+						      (struct sockaddr *)
+						      address, addrlen);
 		}
 		fput_light(sock->file, fput_needed);
-	}			
+	}
 	return err;
 }
 
-
 /*
  *	Perform a listen. Basically, we allow the protocol to do anything
  *	necessary for a listen, and if that works, we mark the socket as
  *	ready for listening.
  */
 
-int sysctl_somaxconn = SOMAXCONN;
+int sysctl_somaxconn __read_mostly = SOMAXCONN;
 
 asmlinkage long sys_listen(int fd, int backlog)
 {
 	struct socket *sock;
 	int err, fput_needed;
-	
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
-		if ((unsigned) backlog > sysctl_somaxconn)
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock) {
+		if ((unsigned)backlog > sysctl_somaxconn)
 			backlog = sysctl_somaxconn;
 
 		err = security_socket_listen(sock, backlog);
@@ -1378,7 +1358,6 @@
 	return err;
 }
 
-
 /*
  *	For accept, we attempt to create a new socket, set up the link
  *	with the client, wake up the client, then return the new
@@ -1391,7 +1370,8 @@
  *	clean when we restucture accept also.
  */
 
-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
@@ -1403,7 +1383,7 @@
 		goto out;
 
 	err = -ENFILE;
-	if (!(newsock = sock_alloc())) 
+	if (!(newsock = sock_alloc()))
 		goto out_put;
 
 	newsock->type = sock->type;
@@ -1435,11 +1415,13 @@
 		goto out_fd;
 
 	if (upeer_sockaddr) {
-		if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
+		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+					  &len, 2) < 0) {
 			err = -ECONNABORTED;
 			goto out_fd;
 		}
-		err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
+		err = move_addr_to_user(address, len, upeer_sockaddr,
+					upeer_addrlen);
 		if (err < 0)
 			goto out_fd;
 	}
@@ -1461,7 +1443,6 @@
 	goto out_put;
 }
 
-
 /*
  *	Attempt to connect to a socket with the server address.  The address
  *	is in user space so we verify it is OK and move it to kernel space.
@@ -1474,7 +1455,8 @@
  *	include the -EINPROGRESS status for such sockets.
  */
 
-asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
+asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
+			    int addrlen)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
@@ -1487,11 +1469,12 @@
 	if (err < 0)
 		goto out_put;
 
-	err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+	err =
+	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
 	if (err)
 		goto out_put;
 
-	err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
 				 sock->file->f_flags);
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1504,12 +1487,13 @@
  *	name to user space.
  */
 
-asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	int len, err, fput_needed;
-	
+
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
 		goto out;
@@ -1534,22 +1518,27 @@
  *	name to user space.
  */
 
-asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
+asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	int len, err, fput_needed;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_getpeername(sock);
 		if (err) {
 			fput_light(sock->file, fput_needed);
 			return err;
 		}
 
-		err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
+		err =
+		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
+				       1);
 		if (!err)
-			err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
+			err = move_addr_to_user(address, len, usockaddr,
+						usockaddr_len);
 		fput_light(sock->file, fput_needed);
 	}
 	return err;
@@ -1561,8 +1550,9 @@
  *	the protocol.
  */
 
-asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
-			   struct sockaddr __user *addr, int addr_len)
+asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
+			   unsigned flags, struct sockaddr __user *addr,
+			   int addr_len)
 {
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
@@ -1579,54 +1569,55 @@
 	sock = sock_from_file(sock_file, &err);
 	if (!sock)
 		goto out_put;
-	iov.iov_base=buff;
-	iov.iov_len=len;
-	msg.msg_name=NULL;
-	msg.msg_iov=&iov;
-	msg.msg_iovlen=1;
-	msg.msg_control=NULL;
-	msg.msg_controllen=0;
-	msg.msg_namelen=0;
+	iov.iov_base = buff;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
 	if (addr) {
 		err = move_addr_to_kernel(addr, addr_len, address);
 		if (err < 0)
 			goto out_put;
-		msg.msg_name=address;
-		msg.msg_namelen=addr_len;
+		msg.msg_name = address;
+		msg.msg_namelen = addr_len;
 	}
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	msg.msg_flags = flags;
 	err = sock_sendmsg(sock, &msg, len);
 
-out_put:		
+out_put:
 	fput_light(sock_file, fput_needed);
 	return err;
 }
 
 /*
- *	Send a datagram down a socket. 
+ *	Send a datagram down a socket.
  */
 
-asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
+asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
 {
 	return sys_sendto(fd, buff, len, flags, NULL, 0);
 }
 
 /*
- *	Receive a frame from the socket and optionally record the address of the 
+ *	Receive a frame from the socket and optionally record the address of the
  *	sender. We verify the buffers are writable and if needed move the
  *	sender address from kernel to user space.
  */
 
-asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
-			     struct sockaddr __user *addr, int __user *addr_len)
+asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
+			     unsigned flags, struct sockaddr __user *addr,
+			     int __user *addr_len)
 {
 	struct socket *sock;
 	struct iovec iov;
 	struct msghdr msg;
 	char address[MAX_SOCK_ADDR];
-	int err,err2;
+	int err, err2;
 	struct file *sock_file;
 	int fput_needed;
 
@@ -1638,23 +1629,22 @@
 	if (!sock)
 		goto out;
 
-	msg.msg_control=NULL;
-	msg.msg_controllen=0;
-	msg.msg_iovlen=1;
-	msg.msg_iov=&iov;
-	iov.iov_len=size;
-	iov.iov_base=ubuf;
-	msg.msg_name=address;
-	msg.msg_namelen=MAX_SOCK_ADDR;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = &iov;
+	iov.iov_len = size;
+	iov.iov_base = ubuf;
+	msg.msg_name = address;
+	msg.msg_namelen = MAX_SOCK_ADDR;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
-	err=sock_recvmsg(sock, &msg, size, flags);
+	err = sock_recvmsg(sock, &msg, size, flags);
 
-	if(err >= 0 && addr != NULL)
-	{
-		err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
-		if(err2<0)
-			err=err2;
+	if (err >= 0 && addr != NULL) {
+		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+		if (err2 < 0)
+			err = err2;
 	}
 out:
 	fput_light(sock_file, fput_needed);
@@ -1662,10 +1652,11 @@
 }
 
 /*
- *	Receive a datagram from a socket. 
+ *	Receive a datagram from a socket.
  */
 
-asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
+asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
+			 unsigned flags)
 {
 	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
 }
@@ -1675,24 +1666,29 @@
  *	to pass the user mode parameter for the protocols to sort out.
  */
 
-asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
+asmlinkage long sys_setsockopt(int fd, int level, int optname,
+			       char __user *optval, int optlen)
 {
 	int err, fput_needed;
 	struct socket *sock;
 
 	if (optlen < 0)
 		return -EINVAL;
-			
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err)
 			goto out_put;
 
 		if (level == SOL_SOCKET)
-			err=sock_setsockopt(sock,level,optname,optval,optlen);
+			err =
+			    sock_setsockopt(sock, level, optname, optval,
+					    optlen);
 		else
-			err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
+			err =
+			    sock->ops->setsockopt(sock, level, optname, optval,
+						  optlen);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
@@ -1704,27 +1700,32 @@
  *	to pass a user mode parameter for the protocols to sort out.
  */
 
-asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen)
+asmlinkage long sys_getsockopt(int fd, int level, int optname,
+			       char __user *optval, int __user *optlen)
 {
 	int err, fput_needed;
 	struct socket *sock;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) {
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_getsockopt(sock, level, optname);
 		if (err)
 			goto out_put;
 
 		if (level == SOL_SOCKET)
-			err=sock_getsockopt(sock,level,optname,optval,optlen);
+			err =
+			    sock_getsockopt(sock, level, optname, optval,
+					    optlen);
 		else
-			err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
+			err =
+			    sock->ops->getsockopt(sock, level, optname, optval,
+						  optlen);
 out_put:
 		fput_light(sock->file, fput_needed);
 	}
 	return err;
 }
 
-
 /*
  *	Shutdown a socket.
  */
@@ -1734,8 +1735,8 @@
 	int err, fput_needed;
 	struct socket *sock;
 
-	if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL)
-	{
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
 		err = security_socket_shutdown(sock, how);
 		if (!err)
 			err = sock->ops->shutdown(sock, how);
@@ -1744,41 +1745,42 @@
 	return err;
 }
 
-/* A couple of helpful macros for getting the address of the 32/64 bit 
+/* A couple of helpful macros for getting the address of the 32/64 bit
  * fields which are the same type (int / unsigned) on our platforms.
  */
 #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
 #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
 #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
 
-
 /*
  *	BSD sendmsg interface
  */
 
 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
 {
-	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
 	char address[MAX_SOCK_ADDR];
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
-			__attribute__ ((aligned (sizeof(__kernel_size_t))));
-			/* 20 is size of ipv6_pktinfo */
+	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
+	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
 	struct msghdr msg_sys;
 	int err, ctl_len, iov_size, total_len;
 	int fput_needed;
-	
+
 	err = -EFAULT;
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock) 
+	if (!sock)
 		goto out;
 
 	/* do not move before msg_sys is valid */
@@ -1786,7 +1788,7 @@
 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
 		goto out_put;
 
-	/* Check whether to allocate the iovec area*/
+	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1800,7 +1802,7 @@
 		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
 	} else
 		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
-	if (err < 0) 
+	if (err < 0)
 		goto out_freeiov;
 	total_len = err;
 
@@ -1808,18 +1810,19 @@
 
 	if (msg_sys.msg_controllen > INT_MAX)
 		goto out_freeiov;
-	ctl_len = msg_sys.msg_controllen; 
+	ctl_len = msg_sys.msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
-		err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl));
+		err =
+		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+						     sizeof(ctl));
 		if (err)
 			goto out_freeiov;
 		ctl_buf = msg_sys.msg_control;
 		ctl_len = msg_sys.msg_controllen;
 	} else if (ctl_len) {
-		if (ctl_len > sizeof(ctl))
-		{
+		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
-			if (ctl_buf == NULL) 
+			if (ctl_buf == NULL)
 				goto out_freeiov;
 		}
 		err = -EFAULT;
@@ -1828,7 +1831,8 @@
 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
 		 * checking falls down on this.
 		 */
-		if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
+		if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
+				   ctl_len))
 			goto out_freectl;
 		msg_sys.msg_control = ctl_buf;
 	}
@@ -1839,14 +1843,14 @@
 	err = sock_sendmsg(sock, &msg_sys, total_len);
 
 out_freectl:
-	if (ctl_buf != ctl)    
+	if (ctl_buf != ctl)
 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
 out_put:
 	fput_light(sock->file, fput_needed);
-out:       
+out:
 	return err;
 }
 
@@ -1854,12 +1858,14 @@
  *	BSD recvmsg interface
  */
 
-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
+asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
+			    unsigned int flags)
 {
-	struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
 	struct socket *sock;
 	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov=iovstack;
+	struct iovec *iov = iovstack;
 	struct msghdr msg_sys;
 	unsigned long cmsg_ptr;
 	int err, iov_size, total_len, len;
@@ -1871,13 +1877,13 @@
 	/* user mode address pointers */
 	struct sockaddr __user *uaddr;
 	int __user *uaddr_len;
-	
+
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	} else
-		if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
-			return -EFAULT;
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
@@ -1886,8 +1892,8 @@
 	err = -EMSGSIZE;
 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
 		goto out_put;
-	
-	/* Check whether to allocate the iovec area*/
+
+	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
@@ -1897,11 +1903,11 @@
 	}
 
 	/*
-	 *	Save the user-mode address (verify_iovec will change the
-	 *	kernel msghdr to use the kernel address space)
+	 *      Save the user-mode address (verify_iovec will change the
+	 *      kernel msghdr to use the kernel address space)
 	 */
-	 
-	uaddr = (void __user *) msg_sys.msg_name;
+
+	uaddr = (void __user *)msg_sys.msg_name;
 	uaddr_len = COMPAT_NAMELEN(msg);
 	if (MSG_CMSG_COMPAT & flags) {
 		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
@@ -1909,13 +1915,13 @@
 		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
 	if (err < 0)
 		goto out_freeiov;
-	total_len=err;
+	total_len = err;
 
 	cmsg_ptr = (unsigned long)msg_sys.msg_control;
 	msg_sys.msg_flags = 0;
 	if (MSG_CMSG_COMPAT & flags)
 		msg_sys.msg_flags = MSG_CMSG_COMPAT;
-	
+
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
@@ -1924,7 +1930,8 @@
 	len = err;
 
 	if (uaddr != NULL) {
-		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+					uaddr_len);
 		if (err < 0)
 			goto out_freeiov;
 	}
@@ -1933,10 +1940,10 @@
 	if (err)
 		goto out_freeiov;
 	if (MSG_CMSG_COMPAT & flags)
-		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
 				 &msg_compat->msg_controllen);
 	else
-		err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
 				 &msg->msg_controllen);
 	if (err)
 		goto out_freeiov;
@@ -1955,163 +1962,187 @@
 
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+static const unsigned char nargs[18]={
+	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+};
+
 #undef AL
 
 /*
- *	System call vectors. 
+ *	System call vectors.
  *
  *	Argument checking cleaned up. Saved 20% in size.
  *  This function doesn't need to set the kernel lock because
- *  it is set by the callees. 
+ *  it is set by the callees.
  */
 
 asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 {
 	unsigned long a[6];
-	unsigned long a0,a1;
+	unsigned long a0, a1;
 	int err;
 
-	if(call<1||call>SYS_RECVMSG)
+	if (call < 1 || call > SYS_RECVMSG)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
 	if (copy_from_user(a, args, nargs[call]))
 		return -EFAULT;
 
-	err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
+	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
 	if (err)
 		return err;
 
-	a0=a[0];
-	a1=a[1];
-	
-	switch(call) 
-	{
-		case SYS_SOCKET:
-			err = sys_socket(a0,a1,a[2]);
-			break;
-		case SYS_BIND:
-			err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
-			break;
-		case SYS_CONNECT:
-			err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
-			break;
-		case SYS_LISTEN:
-			err = sys_listen(a0,a1);
-			break;
-		case SYS_ACCEPT:
-			err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_GETSOCKNAME:
-			err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_GETPEERNAME:
-			err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
-			break;
-		case SYS_SOCKETPAIR:
-			err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
-			break;
-		case SYS_SEND:
-			err = sys_send(a0, (void __user *)a1, a[2], a[3]);
-			break;
-		case SYS_SENDTO:
-			err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
-					 (struct sockaddr __user *)a[4], a[5]);
-			break;
-		case SYS_RECV:
-			err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
-			break;
-		case SYS_RECVFROM:
-			err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
-					   (struct sockaddr __user *)a[4], (int __user *)a[5]);
-			break;
-		case SYS_SHUTDOWN:
-			err = sys_shutdown(a0,a1);
-			break;
-		case SYS_SETSOCKOPT:
-			err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
-			break;
-		case SYS_GETSOCKOPT:
-			err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
-			break;
-		case SYS_SENDMSG:
-			err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
-			break;
-		case SYS_RECVMSG:
-			err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
-			break;
-		default:
-			err = -EINVAL;
-			break;
+	a0 = a[0];
+	a1 = a[1];
+
+	switch (call) {
+	case SYS_SOCKET:
+		err = sys_socket(a0, a1, a[2]);
+		break;
+	case SYS_BIND:
+		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_CONNECT:
+		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_LISTEN:
+		err = sys_listen(a0, a1);
+		break;
+	case SYS_ACCEPT:
+		err =
+		    sys_accept(a0, (struct sockaddr __user *)a1,
+			       (int __user *)a[2]);
+		break;
+	case SYS_GETSOCKNAME:
+		err =
+		    sys_getsockname(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_GETPEERNAME:
+		err =
+		    sys_getpeername(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_SOCKETPAIR:
+		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+		break;
+	case SYS_SEND:
+		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_SENDTO:
+		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
+				 (struct sockaddr __user *)a[4], a[5]);
+		break;
+	case SYS_RECV:
+		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_RECVFROM:
+		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+				   (struct sockaddr __user *)a[4],
+				   (int __user *)a[5]);
+		break;
+	case SYS_SHUTDOWN:
+		err = sys_shutdown(a0, a1);
+		break;
+	case SYS_SETSOCKOPT:
+		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+		break;
+	case SYS_GETSOCKOPT:
+		err =
+		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+				   (int __user *)a[4]);
+		break;
+	case SYS_SENDMSG:
+		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	case SYS_RECVMSG:
+		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	default:
+		err = -EINVAL;
+		break;
 	}
 	return err;
 }
 
-#endif /* __ARCH_WANT_SYS_SOCKETCALL */
+#endif				/* __ARCH_WANT_SYS_SOCKETCALL */
 
-/*
+/**
+ *	sock_register - add a socket protocol handler
+ *	@ops: description of protocol
+ *
  *	This function is called by a protocol handler that wants to
  *	advertise its address family, and have it linked into the
- *	SOCKET module.
+ *	socket interface. The value ops->family coresponds to the
+ *	socket system call protocol family.
  */
-
-int sock_register(struct net_proto_family *ops)
+int sock_register(const struct net_proto_family *ops)
 {
 	int err;
 
 	if (ops->family >= NPROTO) {
-		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
+		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
+		       NPROTO);
 		return -ENOBUFS;
 	}
-	net_family_write_lock();
-	err = -EEXIST;
-	if (net_families[ops->family] == NULL) {
-		net_families[ops->family]=ops;
+
+	spin_lock(&net_family_lock);
+	if (net_families[ops->family])
+		err = -EEXIST;
+	else {
+		net_families[ops->family] = ops;
 		err = 0;
 	}
-	net_family_write_unlock();
-	printk(KERN_INFO "NET: Registered protocol family %d\n",
-	       ops->family);
+	spin_unlock(&net_family_lock);
+
+	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
 
-/*
+/**
+ *	sock_unregister - remove a protocol handler
+ *	@family: protocol family to remove
+ *
  *	This function is called by a protocol handler that wants to
  *	remove its address family, and have it unlinked from the
- *	SOCKET module.
+ *	new socket creation.
+ *
+ *	If protocol handler is a module, then it can use module reference
+ *	counts to protect against new references. If protocol handler is not
+ *	a module then it needs to provide its own protection in
+ *	the ops->create routine.
  */
-
-int sock_unregister(int family)
+void sock_unregister(int family)
 {
-	if (family < 0 || family >= NPROTO)
-		return -1;
+	BUG_ON(family < 0 || family >= NPROTO);
 
-	net_family_write_lock();
-	net_families[family]=NULL;
-	net_family_write_unlock();
-	printk(KERN_INFO "NET: Unregistered protocol family %d\n",
-	       family);
-	return 0;
+	spin_lock(&net_family_lock);
+	net_families[family] = NULL;
+	spin_unlock(&net_family_lock);
+
+	synchronize_rcu();
+
+	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 }
 
 static int __init sock_init(void)
 {
 	/*
-	 *	Initialize sock SLAB cache.
+	 *      Initialize sock SLAB cache.
 	 */
-	 
+
 	sk_init();
 
 	/*
-	 *	Initialize skbuff SLAB cache 
+	 *      Initialize skbuff SLAB cache
 	 */
 	skb_init();
 
 	/*
-	 *	Initialize the protocols module. 
+	 *      Initialize the protocols module.
 	 */
 
 	init_inodecache();
@@ -2137,7 +2168,7 @@
 	int counter = 0;
 
 	for_each_possible_cpu(cpu)
-		counter += per_cpu(sockets_in_use, cpu);
+	    counter += per_cpu(sockets_in_use, cpu);
 
 	/* It can be negative, by the way. 8) */
 	if (counter < 0)
@@ -2145,11 +2176,11 @@
 
 	seq_printf(seq, "sockets: used %d\n", counter);
 }
-#endif /* CONFIG_PROC_FS */
+#endif				/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file, unsigned cmd,
-				unsigned long arg)
+			      unsigned long arg)
 {
 	struct socket *sock = file->private_data;
 	int ret = -ENOIOCTLCMD;
@@ -2161,6 +2192,109 @@
 }
 #endif
 
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+	return sock->ops->bind(sock, addr, addrlen);
+}
+
+int kernel_listen(struct socket *sock, int backlog)
+{
+	return sock->ops->listen(sock, backlog);
+}
+
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+			       newsock);
+	if (err < 0)
+		goto done;
+
+	err = sock->ops->accept(sock, *newsock, flags);
+	if (err < 0) {
+		sock_release(*newsock);
+		goto done;
+	}
+
+	(*newsock)->ops = sock->ops;
+
+done:
+	return err;
+}
+
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+                   int flags)
+{
+	return sock->ops->connect(sock, addr, addrlen, flags);
+}
+
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 0);
+}
+
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 1);
+}
+
+int kernel_getsockopt(struct socket *sock, int level, int optname,
+			char *optval, int *optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_getsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->getsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_setsockopt(struct socket *sock, int level, int optname,
+			char *optval, int optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_setsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->setsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags)
+{
+	if (sock->ops->sendpage)
+		return sock->ops->sendpage(sock, page, offset, size, flags);
+
+	return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	err = sock->ops->ioctl(sock, cmd, arg);
+	set_fs(oldfs);
+
+	return err;
+}
+
 /* ABI emulation layers need these two */
 EXPORT_SYMBOL(move_addr_to_kernel);
 EXPORT_SYMBOL(move_addr_to_user);
@@ -2177,3 +2311,13 @@
 EXPORT_SYMBOL(sockfd_lookup);
 EXPORT_SYMBOL(kernel_sendmsg);
 EXPORT_SYMBOL(kernel_recvmsg);
+EXPORT_SYMBOL(kernel_bind);
+EXPORT_SYMBOL(kernel_listen);
+EXPORT_SYMBOL(kernel_accept);
+EXPORT_SYMBOL(kernel_connect);
+EXPORT_SYMBOL(kernel_getsockname);
+EXPORT_SYMBOL(kernel_getpeername);
+EXPORT_SYMBOL(kernel_getsockopt);
+EXPORT_SYMBOL(kernel_setsockopt);
+EXPORT_SYMBOL(kernel_sendpage);
+EXPORT_SYMBOL(kernel_sock_ioctl);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ef1cf5b..6eed3e1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -88,7 +88,6 @@
 	struct list_head upcalls;
 	struct rpc_clnt *client;
 	struct dentry *dentry;
-	char path[48];
 	spinlock_t lock;
 };
 
@@ -690,10 +689,8 @@
 	if (err)
 		goto err_put_mech;
 
-	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
-			clnt->cl_pathname,
-			gss_auth->mech->gm_name);
-	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+	gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+			clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
 	if (IS_ERR(gss_auth->dentry)) {
 		err = PTR_ERR(gss_auth->dentry);
 		goto err_put_mech;
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 76b969e..e11a40b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,6 +34,7 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
+#include <linux/err.h>
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -49,7 +50,7 @@
 
 u32
 krb5_encrypt(
-	struct crypto_tfm *tfm,
+	struct crypto_blkcipher *tfm,
 	void * iv,
 	void * in,
 	void * out,
@@ -58,26 +59,27 @@
 	u32 ret = -EINVAL;
         struct scatterlist sg[1];
 	u8 local_iv[16] = {0};
+	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	dprintk("RPC:      krb5_encrypt: input data:\n");
 	print_hexl((u32 *)in, length, 0);
 
-	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > 16) {
 		dprintk("RPC:      gss_k5encrypt: tfm iv size to large %d\n",
-		         crypto_tfm_alg_ivsize(tfm));
+		         crypto_blkcipher_ivsize(tfm));
 		goto out;
 	}
 
 	if (iv)
-		memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_set_buf(sg, out, length);
 
-	ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv);
+	ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
 
 	dprintk("RPC:      krb5_encrypt: output data:\n");
 	print_hexl((u32 *)out, length, 0);
@@ -90,7 +92,7 @@
 
 u32
 krb5_decrypt(
-     struct crypto_tfm *tfm,
+     struct crypto_blkcipher *tfm,
      void * iv,
      void * in,
      void * out,
@@ -99,25 +101,26 @@
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[16] = {0};
+	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	dprintk("RPC:      krb5_decrypt: input data:\n");
 	print_hexl((u32 *)in, length, 0);
 
-	if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_tfm_alg_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > 16) {
 		dprintk("RPC:      gss_k5decrypt: tfm iv size to large %d\n",
-			crypto_tfm_alg_ivsize(tfm));
+			crypto_blkcipher_ivsize(tfm));
 		goto out;
 	}
 	if (iv)
-		memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm));
+		memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_set_buf(sg, out, length);
 
-	ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv);
+	ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
 
 	dprintk("RPC:      krb5_decrypt: output_data:\n");
 	print_hexl((u32 *)out, length, 0);
@@ -197,11 +200,9 @@
 static int
 checksummer(struct scatterlist *sg, void *data)
 {
-	struct crypto_tfm *tfm = (struct crypto_tfm *)data;
+	struct hash_desc *desc = data;
 
-	crypto_digest_update(tfm, sg, 1);
-
-	return 0;
+	return crypto_hash_update(desc, sg, sg->length);
 }
 
 /* checksum the plaintext data and hdrlen bytes of the token header */
@@ -210,8 +211,9 @@
 		   int body_offset, struct xdr_netobj *cksum)
 {
 	char                            *cksumname;
-	struct crypto_tfm               *tfm = NULL; /* XXX add to ctx? */
+	struct hash_desc                desc; /* XXX add to ctx? */
 	struct scatterlist              sg[1];
+	int err;
 
 	switch (cksumtype) {
 		case CKSUMTYPE_RSA_MD5:
@@ -222,25 +224,35 @@
 				" unsupported checksum %d", cksumtype);
 			return GSS_S_FAILURE;
 	}
-	if (!(tfm = crypto_alloc_tfm(cksumname, CRYPTO_TFM_REQ_MAY_SLEEP)))
+	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
 		return GSS_S_FAILURE;
-	cksum->len = crypto_tfm_alg_digestsize(tfm);
+	cksum->len = crypto_hash_digestsize(desc.tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
-	crypto_digest_init(tfm);
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
 	sg_set_buf(sg, header, hdrlen);
-	crypto_digest_update(tfm, sg, 1);
-	process_xdr_buf(body, body_offset, body->len - body_offset,
-			checksummer, tfm);
-	crypto_digest_final(tfm, cksum->data);
-	crypto_free_tfm(tfm);
-	return 0;
+	err = crypto_hash_update(&desc, sg, hdrlen);
+	if (err)
+		goto out;
+	err = process_xdr_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	err = crypto_hash_final(&desc, cksum->data);
+
+out:
+	crypto_free_hash(desc.tfm);
+	return err ? GSS_S_FAILURE : 0;
 }
 
 EXPORT_SYMBOL(make_checksum);
 
 struct encryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
-	struct crypto_tfm *tfm;
+	struct blkcipher_desc desc;
 	int pos;
 	struct xdr_buf *outbuf;
 	struct page **pages;
@@ -285,8 +297,8 @@
 	if (thislen == 0)
 		return 0;
 
-	ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
-					thislen, desc->iv);
+	ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags,
+					  desc->infrags, thislen);
 	if (ret)
 		return ret;
 	if (fraglen) {
@@ -305,16 +317,18 @@
 }
 
 int
-gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
-		struct page **pages)
+gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+		    int offset, struct page **pages)
 {
 	int ret;
 	struct encryptor_desc desc;
 
-	BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.tfm = tfm;
+	desc.desc.tfm = tfm;
+	desc.desc.info = desc.iv;
+	desc.desc.flags = 0;
 	desc.pos = offset;
 	desc.outbuf = buf;
 	desc.pages = pages;
@@ -329,7 +343,7 @@
 
 struct decryptor_desc {
 	u8 iv[8]; /* XXX hard-coded blocksize */
-	struct crypto_tfm *tfm;
+	struct blkcipher_desc desc;
 	struct scatterlist frags[4];
 	int fragno;
 	int fraglen;
@@ -355,8 +369,8 @@
 	if (thislen == 0)
 		return 0;
 
-	ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
-					thislen, desc->iv);
+	ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags,
+					  desc->frags, thislen);
 	if (ret)
 		return ret;
 	if (fraglen) {
@@ -373,15 +387,18 @@
 }
 
 int
-gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
+gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
+		    int offset)
 {
 	struct decryptor_desc desc;
 
 	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
-	desc.tfm = tfm;
+	desc.desc.tfm = tfm;
+	desc.desc.info = desc.iv;
+	desc.desc.flags = 0;
 	desc.fragno = 0;
 	desc.fraglen = 0;
 	return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 70e1e53..325e72e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -78,10 +79,10 @@
 }
 
 static inline const void *
-get_key(const void *p, const void *end, struct crypto_tfm **res)
+get_key(const void *p, const void *end, struct crypto_blkcipher **res)
 {
 	struct xdr_netobj	key;
-	int			alg, alg_mode;
+	int			alg;
 	char			*alg_name;
 
 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
@@ -93,18 +94,19 @@
 
 	switch (alg) {
 		case ENCTYPE_DES_CBC_RAW:
-			alg_name = "des";
-			alg_mode = CRYPTO_TFM_MODE_CBC;
+			alg_name = "cbc(des)";
 			break;
 		default:
 			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*res)) {
 		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
+		*res = NULL;
 		goto out_err_free_key;
 	}
-	if (crypto_cipher_setkey(*res, key.data, key.len)) {
+	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
 		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
 		goto out_err_free_tfm;
 	}
@@ -113,7 +115,7 @@
 	return p;
 
 out_err_free_tfm:
-	crypto_free_tfm(*res);
+	crypto_free_blkcipher(*res);
 out_err_free_key:
 	kfree(key.data);
 	p = ERR_PTR(-EINVAL);
@@ -172,9 +174,9 @@
 	return 0;
 
 out_err_free_key2:
-	crypto_free_tfm(ctx->seq);
+	crypto_free_blkcipher(ctx->seq);
 out_err_free_key1:
-	crypto_free_tfm(ctx->enc);
+	crypto_free_blkcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
 out_err_free_ctx:
@@ -187,8 +189,8 @@
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
 
-	crypto_free_tfm(kctx->seq);
-	crypto_free_tfm(kctx->enc);
+	crypto_free_blkcipher(kctx->seq);
+	crypto_free_blkcipher(kctx->enc);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c53ead3..c604baf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -41,7 +41,7 @@
 #endif
 
 s32
-krb5_make_seq_num(struct crypto_tfm *key,
+krb5_make_seq_num(struct crypto_blkcipher *key,
 		int direction,
 		s32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -62,7 +62,7 @@
 }
 
 s32
-krb5_get_seq_num(struct crypto_tfm *key,
+krb5_get_seq_num(struct crypto_blkcipher *key,
 	       unsigned char *cksum,
 	       unsigned char *buf,
 	       int *direction, s32 * seqnum)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 89d1f3e..f179415 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -149,7 +149,7 @@
 		goto out_err;
 	}
 
-	blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = blocksize + buf->len - offset;
@@ -346,7 +346,7 @@
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
-	blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	data_start = ptr + 22 + blocksize;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 88dcb52..bdedf45 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -34,6 +34,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -83,10 +84,11 @@
 }
 
 static inline const void *
-get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
+get_key(const void *p, const void *end, struct crypto_blkcipher **res,
+	int *resalg)
 {
 	struct xdr_netobj	key = { 0 };
-	int			alg_mode,setkey = 0;
+	int			setkey = 0;
 	char			*alg_name;
 
 	p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
@@ -98,14 +100,12 @@
 
 	switch (*resalg) {
 		case NID_des_cbc:
-			alg_name = "des";
-			alg_mode = CRYPTO_TFM_MODE_CBC;
+			alg_name = "cbc(des)";
 			setkey = 1;
 			break;
 		case NID_cast5_cbc:
 			/* XXXX here in name only, not used */
-			alg_name = "cast5";
-			alg_mode = CRYPTO_TFM_MODE_CBC;
+			alg_name = "cbc(cast5)";
 			setkey = 0; /* XXX will need to set to 1 */
 			break;
 		case NID_md5:
@@ -113,19 +113,20 @@
 				dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
 			}
 			alg_name = "md5";
-			alg_mode = 0;
 			setkey = 0;
 			break;
 		default:
 			dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
 			goto out_err_free_key;
 	}
-	if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) {
+	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*res)) {
 		printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
+		*res = NULL;
 		goto out_err_free_key;
 	}
 	if (setkey) {
-		if (crypto_cipher_setkey(*res, key.data, key.len)) {
+		if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
 			printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
 			goto out_err_free_tfm;
 		}
@@ -136,7 +137,7 @@
 	return p;
 
 out_err_free_tfm:
-	crypto_free_tfm(*res);
+	crypto_free_blkcipher(*res);
 out_err_free_key:
 	if(key.len > 0)
 		kfree(key.data);
@@ -204,9 +205,9 @@
 	return 0;
 
 out_err_free_key2:
-	crypto_free_tfm(ctx->derived_integ_key);
+	crypto_free_blkcipher(ctx->derived_integ_key);
 out_err_free_key1:
-	crypto_free_tfm(ctx->derived_conf_key);
+	crypto_free_blkcipher(ctx->derived_conf_key);
 out_err_free_s_key:
 	kfree(ctx->share_key.data);
 out_err_free_mech:
@@ -223,8 +224,8 @@
 gss_delete_sec_context_spkm3(void *internal_ctx) {
 	struct spkm3_ctx *sctx = internal_ctx;
 
-	crypto_free_tfm(sctx->derived_integ_key);
-	crypto_free_tfm(sctx->derived_conf_key);
+	crypto_free_blkcipher(sctx->derived_integ_key);
+	crypto_free_blkcipher(sctx->derived_conf_key);
 	kfree(sctx->share_key.data);
 	kfree(sctx->mech_used.data);
 	kfree(sctx);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3e19d32..084a0ad 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -97,17 +97,7 @@
 	}
 }
 
-/*
- * Create an RPC client
- * FIXME: This should also take a flags argument (as in task->tk_flags).
- * It's called (among others) from pmap_create_client, which may in
- * turn be called by an async task. In this case, rpciod should not be
- * made to sleep too long.
- */
-struct rpc_clnt *
-rpc_new_client(struct rpc_xprt *xprt, char *servname,
-		  struct rpc_program *program, u32 vers,
-		  rpc_authflavor_t flavor)
+static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
 {
 	struct rpc_version	*version;
 	struct rpc_clnt		*clnt = NULL;
@@ -147,16 +137,12 @@
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
 	clnt->cl_protname = program->name;
-	clnt->cl_pmap	  = &clnt->cl_pmap_default;
-	clnt->cl_port     = xprt->addr.sin_port;
 	clnt->cl_prog     = program->number;
 	clnt->cl_vers     = version->number;
-	clnt->cl_prot     = xprt->prot;
 	clnt->cl_stats    = program->stats;
 	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
-	rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
 
-	if (!clnt->cl_port)
+	if (!xprt_bound(clnt->cl_xprt))
 		clnt->cl_autobind = 1;
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
@@ -191,40 +177,71 @@
 		kfree(clnt->cl_server);
 	kfree(clnt);
 out_err:
-	xprt_destroy(xprt);
+	xprt_put(xprt);
 out_no_xprt:
 	return ERR_PTR(err);
 }
 
-/**
- * Create an RPC client
- * @xprt - pointer to xprt struct
- * @servname - name of server
- * @info - rpc_program
- * @version - rpc_program version
- * @authflavor - rpc_auth flavour to use
+/*
+ * rpc_create - create an RPC client and transport with one call
+ * @args: rpc_clnt create argument structure
  *
- * Creates an RPC client structure, then pings the server in order to
- * determine if it is up, and if it supports this program and version.
+ * Creates and initializes an RPC transport and an RPC client.
  *
- * This function should never be called by asynchronous tasks such as
- * the portmapper.
+ * It can ping the server in order to determine if it is up, and to see if
+ * it supports this program and version.  RPC_CLNT_CREATE_NOPING disables
+ * this behavior so asynchronous tasks can also use rpc_create.
  */
-struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
-		struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
+struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 {
+	struct rpc_xprt *xprt;
 	struct rpc_clnt *clnt;
-	int err;
-	
-	clnt = rpc_new_client(xprt, servname, info, version, authflavor);
+
+	xprt = xprt_create_transport(args->protocol, args->address,
+					args->addrsize, args->timeout);
+	if (IS_ERR(xprt))
+		return (struct rpc_clnt *)xprt;
+
+	/*
+	 * By default, kernel RPC client connects from a reserved port.
+	 * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
+	 * but it is always enabled for rpciod, which handles the connect
+	 * operation.
+	 */
+	xprt->resvport = 1;
+	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
+		xprt->resvport = 0;
+
+	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
+		args->program->name, args->servername, xprt);
+
+	clnt = rpc_new_client(xprt, args->servername, args->program,
+				args->version, args->authflavor);
 	if (IS_ERR(clnt))
 		return clnt;
-	err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
-	if (err == 0)
-		return clnt;
-	rpc_shutdown_client(clnt);
-	return ERR_PTR(err);
+
+	if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
+		int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+		if (err != 0) {
+			rpc_shutdown_client(clnt);
+			return ERR_PTR(err);
+		}
+	}
+
+	clnt->cl_softrtry = 1;
+	if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+		clnt->cl_softrtry = 0;
+
+	if (args->flags & RPC_CLNT_CREATE_INTR)
+		clnt->cl_intr = 1;
+	if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+		clnt->cl_autobind = 1;
+	if (args->flags & RPC_CLNT_CREATE_ONESHOT)
+		clnt->cl_oneshot = 1;
+
+	return clnt;
 }
+EXPORT_SYMBOL_GPL(rpc_create);
 
 /*
  * This function clones the RPC client structure. It allows us to share the
@@ -244,8 +261,7 @@
 	atomic_set(&new->cl_users, 0);
 	new->cl_parent = clnt;
 	atomic_inc(&clnt->cl_count);
-	/* Duplicate portmapper */
-	rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
+	new->cl_xprt = xprt_get(clnt->cl_xprt);
 	/* Turn off autobind on clones */
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
@@ -255,8 +271,7 @@
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
-	new->cl_pmap		= &new->cl_pmap_default;
-	new->cl_metrics         = rpc_alloc_iostats(clnt);
+	new->cl_metrics = rpc_alloc_iostats(clnt);
 	return new;
 out_no_clnt:
 	printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -323,15 +338,12 @@
 		rpc_rmdir(clnt->cl_dentry);
 		rpc_put_mount();
 	}
-	if (clnt->cl_xprt) {
-		xprt_destroy(clnt->cl_xprt);
-		clnt->cl_xprt = NULL;
-	}
 	if (clnt->cl_server != clnt->cl_inline_name)
 		kfree(clnt->cl_server);
 out_free:
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
+	xprt_put(clnt->cl_xprt);
 	kfree(clnt);
 	return 0;
 }
@@ -540,6 +552,40 @@
 		task->tk_action = rpc_exit_task;
 }
 
+/**
+ * rpc_peeraddr - extract remote peer address from clnt's xprt
+ * @clnt: RPC client structure
+ * @buf: target buffer
+ * @size: length of target buffer
+ *
+ * Returns the number of bytes that are actually in the stored address.
+ */
+size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
+{
+	size_t bytes;
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+
+	bytes = sizeof(xprt->addr);
+	if (bytes > bufsize)
+		bytes = bufsize;
+	memcpy(buf, &clnt->cl_xprt->addr, bytes);
+	return xprt->addrlen;
+}
+EXPORT_SYMBOL_GPL(rpc_peeraddr);
+
+/**
+ * rpc_peeraddr2str - return remote peer address in printable format
+ * @clnt: RPC client structure
+ * @format: address format
+ *
+ */
+char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
+{
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+	return xprt->ops->print_addr(xprt, format);
+}
+EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
+
 void
 rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
 {
@@ -560,7 +606,7 @@
 {
 	return clnt->cl_xprt->max_payload;
 }
-EXPORT_SYMBOL(rpc_max_payload);
+EXPORT_SYMBOL_GPL(rpc_max_payload);
 
 /**
  * rpc_force_rebind - force transport to check that remote port is unchanged
@@ -570,9 +616,9 @@
 void rpc_force_rebind(struct rpc_clnt *clnt)
 {
 	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+		xprt_clear_bound(clnt->cl_xprt);
 }
-EXPORT_SYMBOL(rpc_force_rebind);
+EXPORT_SYMBOL_GPL(rpc_force_rebind);
 
 /*
  * Restart an (async) RPC call. Usually called from within the
@@ -781,16 +827,16 @@
 static void
 call_bind(struct rpc_task *task)
 {
-	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	dprintk("RPC: %4d call_bind (status %d)\n",
 				task->tk_pid, task->tk_status);
 
 	task->tk_action = call_connect;
-	if (!clnt->cl_port) {
+	if (!xprt_bound(xprt)) {
 		task->tk_action = call_bind_status;
-		task->tk_timeout = task->tk_xprt->bind_timeout;
-		rpc_getport(task, clnt);
+		task->tk_timeout = xprt->bind_timeout;
+		xprt->ops->rpcbind(task);
 	}
 }
 
@@ -815,15 +861,11 @@
 		dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
 				task->tk_pid);
 		rpc_delay(task, 3*HZ);
-		goto retry_bind;
+		goto retry_timeout;
 	case -ETIMEDOUT:
 		dprintk("RPC: %4d rpcbind request timed out\n",
 				task->tk_pid);
-		if (RPC_IS_SOFT(task)) {
-			status = -EIO;
-			break;
-		}
-		goto retry_bind;
+		goto retry_timeout;
 	case -EPFNOSUPPORT:
 		dprintk("RPC: %4d remote rpcbind service unavailable\n",
 				task->tk_pid);
@@ -836,16 +878,13 @@
 		dprintk("RPC: %4d unrecognized rpcbind error (%d)\n",
 				task->tk_pid, -task->tk_status);
 		status = -EIO;
-		break;
 	}
 
 	rpc_exit(task, status);
 	return;
 
-retry_bind:
-	task->tk_status = 0;
-	task->tk_action = call_bind;
-	return;
+retry_timeout:
+	task->tk_action = call_timeout;
 }
 
 /*
@@ -893,14 +932,16 @@
 
 	switch (status) {
 	case -ENOTCONN:
-	case -ETIMEDOUT:
 	case -EAGAIN:
 		task->tk_action = call_bind;
-		break;
-	default:
-		rpc_exit(task, -EIO);
-		break;
+		if (!RPC_IS_SOFT(task))
+			return;
+		/* if soft mounted, test if we've timed out */
+	case -ETIMEDOUT:
+		task->tk_action = call_timeout;
+		return;
 	}
+	rpc_exit(task, -EIO);
 }
 
 /*
@@ -982,6 +1023,14 @@
 
 	task->tk_status = 0;
 	switch(status) {
+	case -EHOSTDOWN:
+	case -EHOSTUNREACH:
+	case -ENETUNREACH:
+		/*
+		 * Delay any retries for 3 seconds, then handle as if it
+		 * were a timeout.
+		 */
+		rpc_delay(task, 3*HZ);
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
 		break;
@@ -1001,7 +1050,6 @@
 		printk("%s: RPC call returned error %d\n",
 			       clnt->cl_protname, -status);
 		rpc_exit(task, status);
-		break;
 	}
 }
 
@@ -1069,10 +1117,10 @@
 			clnt->cl_stats->rpcretrans++;
 			goto out_retry;
 		}
-		printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
+		dprintk("%s: too small RPC reply size (%d bytes)\n",
 			clnt->cl_protname, task->tk_status);
-		rpc_exit(task, -EIO);
-		return;
+		task->tk_action = call_timeout;
+		goto out_retry;
 	}
 
 	/*
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 623180f..c04609d 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -1,7 +1,9 @@
 /*
- * linux/net/sunrpc/pmap.c
+ * linux/net/sunrpc/pmap_clnt.c
  *
- * Portmapper client.
+ * In-kernel RPC portmapper client.
+ *
+ * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
  *
  * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
  */
@@ -13,7 +15,6 @@
 #include <linux/uio.h>
 #include <linux/in.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 
 #ifdef RPC_DEBUG
@@ -24,80 +25,141 @@
 #define PMAP_UNSET		2
 #define PMAP_GETPORT		3
 
+struct portmap_args {
+	u32			pm_prog;
+	u32			pm_vers;
+	u32			pm_prot;
+	unsigned short		pm_port;
+	struct rpc_xprt *	pm_xprt;
+};
+
 static struct rpc_procinfo	pmap_procedures[];
 static struct rpc_clnt *	pmap_create(char *, struct sockaddr_in *, int, int);
-static void			pmap_getport_done(struct rpc_task *);
+static void			pmap_getport_done(struct rpc_task *, void *);
 static struct rpc_program	pmap_program;
-static DEFINE_SPINLOCK(pmap_lock);
 
-/*
- * Obtain the port for a given RPC service on a given host. This one can
- * be called for an ongoing RPC request.
- */
-void
-rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
+static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
 {
-	struct rpc_portmap *map = clnt->cl_pmap;
-	struct sockaddr_in *sap = &clnt->cl_xprt->addr;
+	struct portmap_args *map = calldata;
 	struct rpc_message msg = {
 		.rpc_proc	= &pmap_procedures[PMAP_GETPORT],
 		.rpc_argp	= map,
-		.rpc_resp	= &clnt->cl_port,
-		.rpc_cred	= NULL
+		.rpc_resp	= &map->pm_port,
 	};
-	struct rpc_clnt	*pmap_clnt;
-	struct rpc_task	*child;
 
-	dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n",
+	rpc_call_setup(task, &msg, 0);
+}
+
+static inline struct portmap_args *pmap_map_alloc(void)
+{
+	return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
+}
+
+static inline void pmap_map_free(struct portmap_args *map)
+{
+	kfree(map);
+}
+
+static void pmap_map_release(void *data)
+{
+	pmap_map_free(data);
+}
+
+static const struct rpc_call_ops pmap_getport_ops = {
+	.rpc_call_prepare	= pmap_getport_prepare,
+	.rpc_call_done		= pmap_getport_done,
+	.rpc_release		= pmap_map_release,
+};
+
+static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
+{
+	xprt_clear_binding(xprt);
+	rpc_wake_up_status(&xprt->binding, status);
+}
+
+/**
+ * rpc_getport - obtain the port for a given RPC service on a given host
+ * @task: task that is waiting for portmapper request
+ *
+ * This one can be called for an ongoing RPC request, and can be used in
+ * an async (rpciod) context.
+ */
+void rpc_getport(struct rpc_task *task)
+{
+	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct sockaddr_in addr;
+	struct portmap_args *map;
+	struct rpc_clnt	*pmap_clnt;
+	struct rpc_task *child;
+	int status;
+
+	dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n",
 			task->tk_pid, clnt->cl_server,
-			map->pm_prog, map->pm_vers, map->pm_prot);
+			clnt->cl_prog, clnt->cl_vers, xprt->prot);
 
 	/* Autobind on cloned rpc clients is discouraged */
 	BUG_ON(clnt->cl_parent != clnt);
 
-	spin_lock(&pmap_lock);
-	if (map->pm_binding) {
-		rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
-		spin_unlock(&pmap_lock);
+	if (xprt_test_and_set_binding(xprt)) {
+		task->tk_status = -EACCES;	/* tell caller to check again */
+		rpc_sleep_on(&xprt->binding, task, NULL, NULL);
 		return;
 	}
-	map->pm_binding = 1;
-	spin_unlock(&pmap_lock);
 
-	pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0);
-	if (IS_ERR(pmap_clnt)) {
-		task->tk_status = PTR_ERR(pmap_clnt);
-		goto bailout;
-	}
-	task->tk_status = 0;
+	/* Someone else may have bound if we slept */
+	status = 0;
+	if (xprt_bound(xprt))
+		goto bailout_nofree;
 
-	/*
-	 * Note: rpc_new_child will release client after a failure.
-	 */
-	if (!(child = rpc_new_child(pmap_clnt, task)))
+	status = -ENOMEM;
+	map = pmap_map_alloc();
+	if (!map)
+		goto bailout_nofree;
+	map->pm_prog = clnt->cl_prog;
+	map->pm_vers = clnt->cl_vers;
+	map->pm_prot = xprt->prot;
+	map->pm_port = 0;
+	map->pm_xprt = xprt_get(xprt);
+
+	rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
+	pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
+	status = PTR_ERR(pmap_clnt);
+	if (IS_ERR(pmap_clnt))
 		goto bailout;
 
-	/* Setup the call info struct */
-	rpc_call_setup(child, &msg, 0);
+	status = -EIO;
+	child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
+	if (IS_ERR(child))
+		goto bailout;
+	rpc_release_task(child);
 
-	/* ... and run the child task */
+	rpc_sleep_on(&xprt->binding, task, NULL, NULL);
+
 	task->tk_xprt->stat.bind_count++;
-	rpc_run_child(task, child, pmap_getport_done);
 	return;
 
 bailout:
-	spin_lock(&pmap_lock);
-	map->pm_binding = 0;
-	rpc_wake_up(&map->pm_bindwait);
-	spin_unlock(&pmap_lock);
-	rpc_exit(task, -EIO);
+	pmap_map_free(map);
+	xprt_put(xprt);
+bailout_nofree:
+	task->tk_status = status;
+	pmap_wake_portmap_waiters(xprt, status);
 }
 
 #ifdef CONFIG_ROOT_NFS
-int
-rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
+/**
+ * rpc_getport_external - obtain the port for a given RPC service on a given host
+ * @sin: address of remote peer
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ *
+ * This one is called from outside the RPC client in a synchronous task context.
+ */
+int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
 {
-	struct rpc_portmap map = {
+	struct portmap_args map = {
 		.pm_prog	= prog,
 		.pm_vers	= vers,
 		.pm_prot	= prot,
@@ -112,7 +174,7 @@
 	char		hostname[32];
 	int		status;
 
-	dprintk("RPC:      rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n",
+	dprintk("RPC:      rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
 			NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
 	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
@@ -132,45 +194,53 @@
 }
 #endif
 
-static void
-pmap_getport_done(struct rpc_task *task)
+/*
+ * Portmapper child task invokes this callback via tk_exit.
+ */
+static void pmap_getport_done(struct rpc_task *child, void *data)
 {
-	struct rpc_clnt	*clnt = task->tk_client;
-	struct rpc_xprt *xprt = task->tk_xprt;
-	struct rpc_portmap *map = clnt->cl_pmap;
+	struct portmap_args *map = data;
+	struct rpc_xprt *xprt = map->pm_xprt;
+	int status = child->tk_status;
 
-	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
-			task->tk_pid, task->tk_status, clnt->cl_port);
-
-	xprt->ops->set_port(xprt, 0);
-	if (task->tk_status < 0) {
-		/* Make the calling task exit with an error */
-		task->tk_action = rpc_exit_task;
-	} else if (clnt->cl_port == 0) {
-		/* Program not registered */
-		rpc_exit(task, -EACCES);
+	if (status < 0) {
+		/* Portmapper not available */
+		xprt->ops->set_port(xprt, 0);
+	} else if (map->pm_port == 0) {
+		/* Requested RPC service wasn't registered */
+		xprt->ops->set_port(xprt, 0);
+		status = -EACCES;
 	} else {
-		xprt->ops->set_port(xprt, clnt->cl_port);
-		clnt->cl_port = htons(clnt->cl_port);
+		/* Succeeded */
+		xprt->ops->set_port(xprt, map->pm_port);
+		xprt_set_bound(xprt);
+		status = 0;
 	}
-	spin_lock(&pmap_lock);
-	map->pm_binding = 0;
-	rpc_wake_up(&map->pm_bindwait);
-	spin_unlock(&pmap_lock);
+
+	dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n",
+			child->tk_pid, status, map->pm_port);
+
+	pmap_wake_portmap_waiters(xprt, status);
+	xprt_put(xprt);
 }
 
-/*
- * Set or unset a port registration with the local portmapper.
+/**
+ * rpc_register - set or unset a port registration with the local portmapper
+ * @prog: RPC program number to bind
+ * @vers: RPC version number to bind
+ * @prot: transport protocol to use to make this request
+ * @port: port value to register
+ * @okay: result code
+ *
  * port == 0 means unregister, port != 0 means register.
  */
-int
-rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
+int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
 	struct sockaddr_in	sin = {
 		.sin_family	= AF_INET,
 		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
 	};
-	struct rpc_portmap	map = {
+	struct portmap_args	map = {
 		.pm_prog	= prog,
 		.pm_vers	= vers,
 		.pm_prot	= prot,
@@ -184,7 +254,7 @@
 	struct rpc_clnt		*pmap_clnt;
 	int error = 0;
 
-	dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
+	dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
 			prog, vers, prot, port);
 
 	pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
@@ -207,38 +277,32 @@
 	return error;
 }
 
-static struct rpc_clnt *
-pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
+static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
 {
-	struct rpc_xprt	*xprt;
-	struct rpc_clnt	*clnt;
+	struct rpc_create_args args = {
+		.protocol	= proto,
+		.address	= (struct sockaddr *)srvaddr,
+		.addrsize	= sizeof(*srvaddr),
+		.servername	= hostname,
+		.program	= &pmap_program,
+		.version	= RPC_PMAP_VERSION,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+				   RPC_CLNT_CREATE_NOPING),
+	};
 
-	/* printk("pmap: create xprt\n"); */
-	xprt = xprt_create_proto(proto, srvaddr, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-	xprt->ops->set_port(xprt, RPC_PMAP_PORT);
+	srvaddr->sin_port = htons(RPC_PMAP_PORT);
 	if (!privileged)
-		xprt->resvport = 0;
-
-	/* printk("pmap: create clnt\n"); */
-	clnt = rpc_new_client(xprt, hostname,
-				&pmap_program, RPC_PMAP_VERSION,
-				RPC_AUTH_UNIX);
-	if (!IS_ERR(clnt)) {
-		clnt->cl_softrtry = 1;
-		clnt->cl_oneshot  = 1;
-	}
-	return clnt;
+		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+	return rpc_create(&args);
 }
 
 /*
  * XDR encode/decode functions for PMAP
  */
-static int
-xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
+static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
 {
-	dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
+	dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
 		map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
 	*p++ = htonl(map->pm_prog);
 	*p++ = htonl(map->pm_vers);
@@ -249,15 +313,13 @@
 	return 0;
 }
 
-static int
-xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
+static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
 {
 	*portp = (unsigned short) ntohl(*p++);
 	return 0;
 }
 
-static int
-xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
+static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
 {
 	*boolp = (unsigned int) ntohl(*p++);
 	return 0;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b1a1ac..dfa504f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -327,10 +327,8 @@
 	seq_printf(m, "RPC server: %s\n", clnt->cl_server);
 	seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
 			clnt->cl_prog, clnt->cl_vers);
-	seq_printf(m, "address: %u.%u.%u.%u\n",
-			NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
-	seq_printf(m, "protocol: %s\n",
-			clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+	seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
 	return 0;
 }
 
@@ -623,17 +621,13 @@
 }
 
 static struct dentry *
-rpc_lookup_negative(char *path, struct nameidata *nd)
+rpc_lookup_create(struct dentry *parent, const char *name, int len)
 {
+	struct inode *dir = parent->d_inode;
 	struct dentry *dentry;
-	struct inode *dir;
-	int error;
 
-	if ((error = rpc_lookup_parent(path, nd)) != 0)
-		return ERR_PTR(error);
-	dir = nd->dentry->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len);
+	dentry = lookup_one_len(name, parent, len);
 	if (IS_ERR(dentry))
 		goto out_err;
 	if (dentry->d_inode) {
@@ -644,7 +638,20 @@
 	return dentry;
 out_err:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(nd);
+	return dentry;
+}
+
+static struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+	struct dentry *dentry;
+	int error;
+
+	if ((error = rpc_lookup_parent(path, nd)) != 0)
+		return ERR_PTR(error);
+	dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
+	if (IS_ERR(dentry))
+		rpc_release_path(nd);
 	return dentry;
 }
 
@@ -703,18 +710,17 @@
 }
 
 struct dentry *
-rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
+rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
 {
-	struct nameidata nd;
 	struct dentry *dentry;
 	struct inode *dir, *inode;
 	struct rpc_inode *rpci;
 
-	dentry = rpc_lookup_negative(path, &nd);
+	dentry = rpc_lookup_create(parent, name, strlen(name));
 	if (IS_ERR(dentry))
 		return dentry;
-	dir = nd.dentry->d_inode;
-	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
+	dir = parent->d_inode;
+	inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
 	if (!inode)
 		goto err_dput;
 	inode->i_ino = iunique(dir->i_sb, 100);
@@ -728,13 +734,13 @@
 	dget(dentry);
 out:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
 	return dentry;
 err_dput:
 	dput(dentry);
 	dentry = ERR_PTR(-ENOMEM);
-	printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
-			__FILE__, __FUNCTION__, path, -ENOMEM);
+	printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
+			__FILE__, __FUNCTION__, parent->d_name.name, name,
+			-ENOMEM);
 	goto out;
 }
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5c3eee76..6390461 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -21,7 +21,6 @@
 #include <linux/mutex.h>
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 
 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY		RPCDBG_SCHED
@@ -45,12 +44,6 @@
 static void			rpc_async_schedule(void *);
 
 /*
- * RPC tasks that create another task (e.g. for contacting the portmapper)
- * will wait on this queue for their child's completion
- */
-static RPC_WAITQ(childq, "childq");
-
-/*
  * RPC tasks sit here while waiting for conditions to improve.
  */
 static RPC_WAITQ(delay_queue, "delayq");
@@ -324,16 +317,6 @@
 }
 
 /*
- * Place a newly initialized task on the workqueue.
- */
-static inline void
-rpc_schedule_run(struct rpc_task *task)
-{
-	rpc_set_active(task);
-	rpc_make_runnable(task);
-}
-
-/*
  * Prepare for sleeping on a wait queue.
  * By always appending tasks to the list we ensure FIFO behavior.
  * NB: An RPC task will only receive interrupt-driven events as long
@@ -559,24 +542,20 @@
 	spin_unlock_bh(&queue->lock);
 }
 
+static void __rpc_atrun(struct rpc_task *task)
+{
+	rpc_wake_up_task(task);
+}
+
 /*
  * Run a task at a later time
  */
-static void	__rpc_atrun(struct rpc_task *);
-void
-rpc_delay(struct rpc_task *task, unsigned long delay)
+void rpc_delay(struct rpc_task *task, unsigned long delay)
 {
 	task->tk_timeout = delay;
 	rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
 }
 
-static void
-__rpc_atrun(struct rpc_task *task)
-{
-	task->tk_status = 0;
-	rpc_wake_up_task(task);
-}
-
 /*
  * Helper to call task->tk_ops->rpc_call_prepare
  */
@@ -933,72 +912,6 @@
 }
 EXPORT_SYMBOL(rpc_run_task);
 
-/**
- * rpc_find_parent - find the parent of a child task.
- * @child: child task
- * @parent: parent task
- *
- * Checks that the parent task is still sleeping on the
- * queue 'childq'. If so returns a pointer to the parent.
- * Upon failure returns NULL.
- *
- * Caller must hold childq.lock
- */
-static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
-{
-	struct rpc_task	*task;
-	struct list_head *le;
-
-	task_for_each(task, le, &childq.tasks[0])
-		if (task == parent)
-			return parent;
-
-	return NULL;
-}
-
-static void rpc_child_exit(struct rpc_task *child, void *calldata)
-{
-	struct rpc_task	*parent;
-
-	spin_lock_bh(&childq.lock);
-	if ((parent = rpc_find_parent(child, calldata)) != NULL) {
-		parent->tk_status = child->tk_status;
-		__rpc_wake_up_task(parent);
-	}
-	spin_unlock_bh(&childq.lock);
-}
-
-static const struct rpc_call_ops rpc_child_ops = {
-	.rpc_call_done = rpc_child_exit,
-};
-
-/*
- * Note: rpc_new_task releases the client after a failure.
- */
-struct rpc_task *
-rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
-{
-	struct rpc_task	*task;
-
-	task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
-	if (!task)
-		goto fail;
-	return task;
-
-fail:
-	parent->tk_status = -ENOMEM;
-	return NULL;
-}
-
-void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
-{
-	spin_lock_bh(&childq.lock);
-	/* N.B. Is it possible for the child to have already finished? */
-	__rpc_sleep_on(&childq, task, func, NULL);
-	rpc_schedule_run(child);
-	spin_unlock_bh(&childq.lock);
-}
-
 /*
  * Kill all tasks for the given client.
  * XXX: kill their descendants as well?
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index eb330d4..6f17527 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -168,7 +168,7 @@
 		return -1;
 	if ((unsigned short)csum_fold(desc.csum))
 		return -1;
-	if (unlikely(skb->ip_summed == CHECKSUM_HW))
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f38f939c..26c0531 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -36,8 +36,6 @@
 EXPORT_SYMBOL(rpc_release_task);
 
 /* RPC client functions */
-EXPORT_SYMBOL(rpc_create_client);
-EXPORT_SYMBOL(rpc_new_client);
 EXPORT_SYMBOL(rpc_clone_client);
 EXPORT_SYMBOL(rpc_bind_new_program);
 EXPORT_SYMBOL(rpc_destroy_client);
@@ -57,7 +55,6 @@
 EXPORT_SYMBOL(rpc_mkpipe);
 
 /* Client transport */
-EXPORT_SYMBOL(xprt_create_proto);
 EXPORT_SYMBOL(xprt_set_timeout);
 
 /* Client credential cache */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d9a9573..953aff8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -388,7 +388,7 @@
 	/* send head */
 	if (slen == xdr->head[0].iov_len)
 		flags = 0;
-	len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
+	len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
 	if (len != xdr->head[0].iov_len)
 		goto out;
 	slen -= xdr->head[0].iov_len;
@@ -400,7 +400,7 @@
 	while (pglen > 0) {
 		if (slen == size)
 			flags = 0;
-		result = sock->ops->sendpage(sock, *ppage, base, size, flags);
+		result = kernel_sendpage(sock, *ppage, base, size, flags);
 		if (result > 0)
 			len += result;
 		if (result != size)
@@ -413,7 +413,7 @@
 	}
 	/* send tail */
 	if (xdr->tail[0].iov_len) {
-		result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], 
+		result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
 					     ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
 					     xdr->tail[0].iov_len, 0);
 
@@ -434,13 +434,10 @@
 static int
 svc_recv_available(struct svc_sock *svsk)
 {
-	mm_segment_t	oldfs;
 	struct socket	*sock = svsk->sk_sock;
 	int		avail, err;
 
-	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
-	set_fs(oldfs);
+	err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
 
 	return (err >= 0)? avail : err;
 }
@@ -472,7 +469,7 @@
 	 * at accept time. FIXME
 	 */
 	alen = sizeof(rqstp->rq_addr);
-	sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
+	kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
 
 	dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
 		rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
@@ -758,7 +755,6 @@
 	struct svc_serv	*serv = svsk->sk_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
-	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
 
@@ -766,29 +762,23 @@
 	if (!sock)
 		return;
 
-	err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
-	if (err) {
+	clear_bit(SK_CONN, &svsk->sk_flags);
+	err = kernel_accept(sock, &newsock, O_NONBLOCK);
+	if (err < 0) {
 		if (err == -ENOMEM)
 			printk(KERN_WARNING "%s: no more sockets!\n",
 			       serv->sv_name);
+		else if (err != -EAGAIN && net_ratelimit())
+			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
+				   serv->sv_name, -err);
 		return;
 	}
 
-	dprintk("svc: tcp_accept %p allocated\n", newsock);
-	newsock->ops = ops = sock->ops;
-
-	clear_bit(SK_CONN, &svsk->sk_flags);
-	if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
-		if (err != -EAGAIN && net_ratelimit())
-			printk(KERN_WARNING "%s: accept failed (err %d)!\n",
-				   serv->sv_name, -err);
-		goto failed;		/* aborted connection or whatever */
-	}
 	set_bit(SK_CONN, &svsk->sk_flags);
 	svc_sock_enqueue(svsk);
 
 	slen = sizeof(sin);
-	err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
+	err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen);
 	if (err < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "%s: peername failed (err %d)!\n",
@@ -1406,14 +1396,14 @@
 	if (sin != NULL) {
 		if (type == SOCK_STREAM)
 			sock->sk->sk_reuse = 1; /* allow address reuse */
-		error = sock->ops->bind(sock, (struct sockaddr *) sin,
+		error = kernel_bind(sock, (struct sockaddr *) sin,
 						sizeof(*sin));
 		if (error < 0)
 			goto bummer;
 	}
 
 	if (protocol == IPPROTO_TCP) {
-		if ((error = sock->ops->listen(sock, 64)) < 0)
+		if ((error = kernel_listen(sock, 64)) < 0)
 			goto bummer;
 	}
 
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index bcbdf64..8142fdb 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -19,8 +19,6 @@
 #include <linux/unistd.h>
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
-#include <linux/sunrpc/timer.h>
 
 #define RPC_RTO_MAX (60*HZ)
 #define RPC_RTO_INIT (HZ/5)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e8c2bc4..1f786f6 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -534,7 +534,7 @@
 	dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
 			xprt, (xprt_connected(xprt) ? "is" : "is not"));
 
-	if (!xprt->addr.sin_port) {
+	if (!xprt_bound(xprt)) {
 		task->tk_status = -EIO;
 		return;
 	}
@@ -585,13 +585,6 @@
 				task->tk_pid, -task->tk_status, task->tk_client->cl_server);
 		xprt_release_write(xprt, task);
 		task->tk_status = -EIO;
-		return;
-	}
-
-	/* if soft mounted, just cause this RPC to fail */
-	if (RPC_IS_SOFT(task)) {
-		xprt_release_write(xprt, task);
-		task->tk_status = -EIO;
 	}
 }
 
@@ -829,6 +822,7 @@
 	req->rq_bufsize = 0;
 	req->rq_xid     = xprt_alloc_xid(xprt);
 	req->rq_release_snd_buf = NULL;
+	xprt_reset_majortimeo(req);
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
 			req, ntohl(req->rq_xid));
 }
@@ -887,16 +881,32 @@
 	to->to_exponential = 0;
 }
 
-static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
+/**
+ * xprt_create_transport - create an RPC transport
+ * @proto: requested transport protocol
+ * @ap: remote peer address
+ * @size: length of address
+ * @to: timeout parameters
+ *
+ */
+struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
 {
 	int result;
 	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
 
-	if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
+	if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
+		dprintk("RPC:      xprt_create_transport: no memory\n");
 		return ERR_PTR(-ENOMEM);
-
-	xprt->addr = *ap;
+	}
+	if (size <= sizeof(xprt->addr)) {
+		memcpy(&xprt->addr, ap, size);
+		xprt->addrlen = size;
+	} else {
+		kfree(xprt);
+		dprintk("RPC:      xprt_create_transport: address too large\n");
+		return ERR_PTR(-EBADF);
+	}
 
 	switch (proto) {
 	case IPPROTO_UDP:
@@ -908,14 +918,15 @@
 	default:
 		printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
 				proto);
-		result = -EIO;
-		break;
+		return ERR_PTR(-EIO);
 	}
 	if (result) {
 		kfree(xprt);
+		dprintk("RPC:      xprt_create_transport: failed, %d\n", result);
 		return ERR_PTR(result);
 	}
 
+	kref_init(&xprt->kref);
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
 
@@ -928,6 +939,7 @@
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
 
+	rpc_init_wait_queue(&xprt->binding, "xprt_binding");
 	rpc_init_wait_queue(&xprt->pending, "xprt_pending");
 	rpc_init_wait_queue(&xprt->sending, "xprt_sending");
 	rpc_init_wait_queue(&xprt->resend, "xprt_resend");
@@ -941,41 +953,43 @@
 
 	dprintk("RPC:      created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
-	
-	return xprt;
-}
 
-/**
- * xprt_create_proto - create an RPC client transport
- * @proto: requested transport protocol
- * @sap: remote peer's address
- * @to: timeout parameters for new transport
- *
- */
-struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
-{
-	struct rpc_xprt	*xprt;
-
-	xprt = xprt_setup(proto, sap, to);
-	if (IS_ERR(xprt))
-		dprintk("RPC:      xprt_create_proto failed\n");
-	else
-		dprintk("RPC:      xprt_create_proto created xprt %p\n", xprt);
 	return xprt;
 }
 
 /**
  * xprt_destroy - destroy an RPC transport, killing off all requests.
- * @xprt: transport to destroy
+ * @kref: kref for the transport to destroy
  *
  */
-int xprt_destroy(struct rpc_xprt *xprt)
+static void xprt_destroy(struct kref *kref)
 {
+	struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
+
 	dprintk("RPC:      destroying transport %p\n", xprt);
 	xprt->shutdown = 1;
 	del_timer_sync(&xprt->timer);
 	xprt->ops->destroy(xprt);
 	kfree(xprt);
+}
 
-	return 0;
+/**
+ * xprt_put - release a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+void xprt_put(struct rpc_xprt *xprt)
+{
+	kref_put(&xprt->kref, xprt_destroy);
+}
+
+/**
+ * xprt_get - return a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
+{
+	kref_get(&xprt->kref);
+	return xprt;
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 441bd53..9b62923 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -125,6 +125,47 @@
 }
 #endif
 
+static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+	char *buf;
+
+	buf = kzalloc(20, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 20, "%u.%u.%u.%u",
+				NIPQUAD(addr->sin_addr.s_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%u",
+				ntohs(addr->sin_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+	if (xprt->prot == IPPROTO_UDP)
+		xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
+	else
+		xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+
+	buf = kzalloc(48, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port),
+			xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+}
+
+static void xs_free_peer_addresses(struct rpc_xprt *xprt)
+{
+	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+}
+
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
 static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
@@ -174,7 +215,6 @@
 	struct page **ppage = xdr->pages;
 	unsigned int len, pglen = xdr->page_len;
 	int err, ret = 0;
-	ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
 
 	if (unlikely(!sock))
 		return -ENOTCONN;
@@ -207,7 +247,6 @@
 		base &= ~PAGE_CACHE_MASK;
 	}
 
-	sendpage = sock->ops->sendpage ? : sock_no_sendpage;
 	do {
 		int flags = XS_SENDMSG_FLAGS;
 
@@ -220,10 +259,7 @@
 		if (pglen != len || xdr->tail[0].iov_len != 0)
 			flags |= MSG_MORE;
 
-		/* Hmm... We might be dealing with highmem pages */
-		if (PageHighMem(*ppage))
-			sendpage = sock_no_sendpage;
-		err = sendpage(sock, *ppage, base, len, flags);
+		err = kernel_sendpage(sock, *ppage, base, len, flags);
 		if (ret == 0)
 			ret = err;
 		else if (err > 0)
@@ -300,7 +336,7 @@
 
 	req->rq_xtime = jiffies;
 	status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
-				sizeof(xprt->addr), xdr, req->rq_bytes_sent);
+				xprt->addrlen, xdr, req->rq_bytes_sent);
 
 	dprintk("RPC:      xs_udp_send_request(%u) = %d\n",
 			xdr->len - req->rq_bytes_sent, status);
@@ -490,6 +526,7 @@
 
 	xprt_disconnect(xprt);
 	xs_close(xprt);
+	xs_free_peer_addresses(xprt);
 	kfree(xprt->slot);
 }
 
@@ -965,6 +1002,19 @@
 }
 
 /**
+ * xs_print_peer_address - format an IPv4 address for printing
+ * @xprt: generic transport
+ * @format: flags field indicating which parts of the address to render
+ */
+static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
+{
+	if (xprt->address_strings[format] != NULL)
+		return xprt->address_strings[format];
+	else
+		return "unprintable";
+}
+
+/**
  * xs_set_port - reset the port number in the remote endpoint address
  * @xprt: generic transport
  * @port: new port number
@@ -972,8 +1022,11 @@
  */
 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 {
+	struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+
 	dprintk("RPC:      setting port for xprt %p to %u\n", xprt, port);
-	xprt->addr.sin_port = htons(port);
+
+	sap->sin_port = htons(port);
 }
 
 static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
@@ -986,7 +1039,7 @@
 
 	do {
 		myaddr.sin_port = htons(port);
-		err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
 		if (err == 0) {
 			xprt->port = port;
@@ -1016,11 +1069,9 @@
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
-	if (xprt->shutdown || xprt->addr.sin_port == 0)
+	if (xprt->shutdown || !xprt_bound(xprt))
 		goto out;
 
-	dprintk("RPC:      xs_udp_connect_worker for xprt %p\n", xprt);
-
 	/* Start by resetting any existing state */
 	xs_close(xprt);
 
@@ -1034,6 +1085,9 @@
 		goto out;
 	}
 
+	dprintk("RPC:      worker connecting xprt %p to address: %s\n",
+			xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	if (!xprt->inet) {
 		struct sock *sk = sock->sk;
 
@@ -1081,7 +1135,7 @@
 	 */
 	memset(&any, 0, sizeof(any));
 	any.sa_family = AF_UNSPEC;
-	result = sock->ops->connect(sock, &any, sizeof(any), 0);
+	result = kernel_connect(sock, &any, sizeof(any), 0);
 	if (result)
 		dprintk("RPC:      AF_UNSPEC connect return code %d\n",
 				result);
@@ -1099,11 +1153,9 @@
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
-	if (xprt->shutdown || xprt->addr.sin_port == 0)
+	if (xprt->shutdown || !xprt_bound(xprt))
 		goto out;
 
-	dprintk("RPC:      xs_tcp_connect_worker for xprt %p\n", xprt);
-
 	if (!xprt->sock) {
 		/* start from scratch */
 		if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
@@ -1119,6 +1171,9 @@
 		/* "close" the socket, preserving the local port */
 		xs_tcp_reuse_connection(xprt);
 
+	dprintk("RPC:      worker connecting xprt %p to address: %s\n",
+			xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	if (!xprt->inet) {
 		struct sock *sk = sock->sk;
 
@@ -1151,8 +1206,8 @@
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
-	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
-			sizeof(xprt->addr), O_NONBLOCK);
+	status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
+			xprt->addrlen, O_NONBLOCK);
 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
 	if (status < 0) {
@@ -1260,8 +1315,10 @@
 
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
+	.print_addr		= xs_print_peer_address,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
+	.rpcbind		= rpc_getport,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
@@ -1276,8 +1333,10 @@
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
+	.print_addr		= xs_print_peer_address,
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
+	.rpcbind		= rpc_getport,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
@@ -1298,8 +1357,7 @@
 int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 {
 	size_t slot_table_size;
-
-	dprintk("RPC:      setting up udp-ipv4 transport...\n");
+	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
 
 	xprt->max_reqs = xprt_udp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
@@ -1307,10 +1365,12 @@
 	if (xprt->slot == NULL)
 		return -ENOMEM;
 
-	xprt->prot = IPPROTO_UDP;
+	if (ntohs(addr->sin_port != 0))
+		xprt_set_bound(xprt);
 	xprt->port = xs_get_random_port();
+
+	xprt->prot = IPPROTO_UDP;
 	xprt->tsh_size = 0;
-	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
@@ -1327,6 +1387,10 @@
 	else
 		xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
 
+	xs_format_peer_addresses(xprt);
+	dprintk("RPC:      set up transport to address %s\n",
+			xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	return 0;
 }
 
@@ -1339,8 +1403,7 @@
 int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 {
 	size_t slot_table_size;
-
-	dprintk("RPC:      setting up tcp-ipv4 transport...\n");
+	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
 
 	xprt->max_reqs = xprt_tcp_slot_table_entries;
 	slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
@@ -1348,10 +1411,12 @@
 	if (xprt->slot == NULL)
 		return -ENOMEM;
 
-	xprt->prot = IPPROTO_TCP;
+	if (ntohs(addr->sin_port) != 0)
+		xprt_set_bound(xprt);
 	xprt->port = xs_get_random_port();
+
+	xprt->prot = IPPROTO_TCP;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
-	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
@@ -1367,5 +1432,9 @@
 	else
 		xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
 
+	xs_format_peer_addresses(xprt);
+	dprintk("RPC:      set up transport to address %s\n",
+			xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+
 	return 0;
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index de6ec51..b43a278 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
-int sysctl_unix_max_dgram_qlen = 10;
+int sysctl_unix_max_dgram_qlen __read_mostly = 10;
 
 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 DEFINE_SPINLOCK(unix_table_lock);
@@ -2060,10 +2060,7 @@
 	int rc = -1;
 	struct sk_buff *dummy_skb;
 
-	if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
-		printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
-		goto out;
-	}
+	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
 
 	rc = proto_register(&unix_proto, 1);
         if (rc != 0) {
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 0c1c043..0faab63 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -6,14 +6,24 @@
        depends on NET
 
 config XFRM_USER
-	tristate "IPsec user configuration interface"
+	tristate "Transformation user configuration interface"
 	depends on INET && XFRM
 	---help---
-	  Support for IPsec user configuration interface used
-	  by native Linux tools.
+	  Support for Transformation(XFRM) user configuration interface
+	  like IPsec used by native Linux tools.
 
 	  If unsure, say Y.
 
+config XFRM_SUB_POLICY
+	bool "Transformation sub policy support (EXPERIMENTAL)"
+	depends on XFRM && EXPERIMENTAL
+	---help---
+	  Support sub policy for developers. By using sub policy with main
+	  one, two policies can be applied to the same packet at once.
+	  Policy which lives shorter time in kernel should be a sub.
+
+	  If unsure, say N.
+
 config NET_KEY
 	tristate "PF_KEY sockets"
 	select XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 693aac1..de3c1a6 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the XFRM subsystem.
 #
 
-obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o
+obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
+		      xfrm_input.o xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 04e1aea..5a0dbeb 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -30,7 +30,8 @@
  */
 static struct xfrm_algo_desc aalg_list[] = {
 {
-	.name = "digest_null",
+	.name = "hmac(digest_null)",
+	.compat = "digest_null",
 	
 	.uinfo = {
 		.auth = {
@@ -47,7 +48,8 @@
 	}
 },
 {
-	.name = "md5",
+	.name = "hmac(md5)",
+	.compat = "md5",
 
 	.uinfo = {
 		.auth = {
@@ -64,7 +66,8 @@
 	}
 },
 {
-	.name = "sha1",
+	.name = "hmac(sha1)",
+	.compat = "sha1",
 
 	.uinfo = {
 		.auth = {
@@ -81,7 +84,8 @@
 	}
 },
 {
-	.name = "sha256",
+	.name = "hmac(sha256)",
+	.compat = "sha256",
 
 	.uinfo = {
 		.auth = {
@@ -98,7 +102,8 @@
 	}
 },
 {
-	.name = "ripemd160",
+	.name = "hmac(ripemd160)",
+	.compat = "ripemd160",
 
 	.uinfo = {
 		.auth = {
@@ -118,7 +123,8 @@
 
 static struct xfrm_algo_desc ealg_list[] = {
 {
-	.name = "cipher_null",
+	.name = "ecb(cipher_null)",
+	.compat = "cipher_null",
 	
 	.uinfo = {
 		.encr = {
@@ -135,7 +141,8 @@
 	}
 },
 {
-	.name = "des",
+	.name = "cbc(des)",
+	.compat = "des",
 
 	.uinfo = {
 		.encr = {
@@ -152,7 +159,8 @@
 	}
 },
 {
-	.name = "des3_ede",
+	.name = "cbc(des3_ede)",
+	.compat = "des3_ede",
 
 	.uinfo = {
 		.encr = {
@@ -169,7 +177,8 @@
 	}
 },
 {
-	.name = "cast128",
+	.name = "cbc(cast128)",
+	.compat = "cast128",
 
 	.uinfo = {
 		.encr = {
@@ -186,7 +195,8 @@
 	}
 },
 {
-	.name = "blowfish",
+	.name = "cbc(blowfish)",
+	.compat = "blowfish",
 
 	.uinfo = {
 		.encr = {
@@ -203,7 +213,8 @@
 	}
 },
 {
-	.name = "aes",
+	.name = "cbc(aes)",
+	.compat = "aes",
 
 	.uinfo = {
 		.encr = {
@@ -220,7 +231,8 @@
 	}
 },
 {
-        .name = "serpent",
+        .name = "cbc(serpent)",
+        .compat = "serpent",
 
         .uinfo = {
                 .encr = {
@@ -237,7 +249,8 @@
         }
 },
 {
-        .name = "twofish",
+        .name = "cbc(twofish)",
+        .compat = "twofish",
                  
         .uinfo = {
                 .encr = {
@@ -350,8 +363,8 @@
 EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
 
 static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list,
-					      int entries, char *name,
-					      int probe)
+					      int entries, u32 type, u32 mask,
+					      char *name, int probe)
 {
 	int i, status;
 
@@ -359,7 +372,8 @@
 		return NULL;
 
 	for (i = 0; i < entries; i++) {
-		if (strcmp(name, list[i].name))
+		if (strcmp(name, list[i].name) &&
+		    (!list[i].compat || strcmp(name, list[i].compat)))
 			continue;
 
 		if (list[i].available)
@@ -368,7 +382,7 @@
 		if (!probe)
 			break;
 
-		status = crypto_alg_available(name, 0);
+		status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC);
 		if (!status)
 			break;
 
@@ -380,19 +394,25 @@
 
 struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe)
 {
-	return xfrm_get_byname(aalg_list, aalg_entries(), name, probe);
+	return xfrm_get_byname(aalg_list, aalg_entries(),
+			       CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK,
+			       name, probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
 
 struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe)
 {
-	return xfrm_get_byname(ealg_list, ealg_entries(), name, probe);
+	return xfrm_get_byname(ealg_list, ealg_entries(),
+			       CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK,
+			       name, probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
 
 struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe)
 {
-	return xfrm_get_byname(calg_list, calg_entries(), name, probe);
+	return xfrm_get_byname(calg_list, calg_entries(),
+			       CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK,
+			       name, probe);
 }
 EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
 
@@ -427,19 +447,22 @@
 	BUG_ON(in_softirq());
 
 	for (i = 0; i < aalg_entries(); i++) {
-		status = crypto_alg_available(aalg_list[i].name, 0);
+		status = crypto_has_hash(aalg_list[i].name, 0,
+					 CRYPTO_ALG_ASYNC);
 		if (aalg_list[i].available != status)
 			aalg_list[i].available = status;
 	}
 	
 	for (i = 0; i < ealg_entries(); i++) {
-		status = crypto_alg_available(ealg_list[i].name, 0);
+		status = crypto_has_blkcipher(ealg_list[i].name, 0,
+					      CRYPTO_ALG_ASYNC);
 		if (ealg_list[i].available != status)
 			ealg_list[i].available = status;
 	}
 	
 	for (i = 0; i < calg_entries(); i++) {
-		status = crypto_alg_available(calg_list[i].name, 0);
+		status = crypto_has_comp(calg_list[i].name, 0,
+					 CRYPTO_ALG_ASYNC);
 		if (calg_list[i].available != status)
 			calg_list[i].available = status;
 	}
@@ -471,11 +494,12 @@
 
 /* Move to common area: it is shared with AH. */
 
-void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
-		  int offset, int len, icv_update_fn_t icv_update)
+int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
+		 int offset, int len, icv_update_fn_t icv_update)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
+	int err;
 	struct scatterlist sg;
 
 	/* Checksum header. */
@@ -487,10 +511,12 @@
 		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
 		sg.length = copy;
 		
-		icv_update(tfm, &sg, 1);
+		err = icv_update(desc, &sg, copy);
+		if (unlikely(err))
+			return err;
 		
 		if ((len -= copy) == 0)
-			return;
+			return 0;
 		offset += copy;
 	}
 
@@ -510,10 +536,12 @@
 			sg.offset = frag->page_offset + offset-start;
 			sg.length = copy;
 			
-			icv_update(tfm, &sg, 1);
+			err = icv_update(desc, &sg, copy);
+			if (unlikely(err))
+				return err;
 
 			if (!(len -= copy))
-				return;
+				return 0;
 			offset += copy;
 		}
 		start = end;
@@ -531,15 +559,19 @@
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
+				err = skb_icv_walk(list, desc, offset-start,
+						   copy, icv_update);
+				if (unlikely(err))
+					return err;
 				if ((len -= copy) == 0)
-					return;
+					return 0;
 				offset += copy;
 			}
 			start = end;
 		}
 	}
 	BUG_ON(len);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(skb_icv_walk);
 
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
new file mode 100644
index 0000000..37643bb
--- /dev/null
+++ b/net/xfrm/xfrm_hash.c
@@ -0,0 +1,41 @@
+/* xfrm_hash.c: Common hash table code.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/xfrm.h>
+
+#include "xfrm_hash.h"
+
+struct hlist_head *xfrm_hash_alloc(unsigned int sz)
+{
+	struct hlist_head *n;
+
+	if (sz <= PAGE_SIZE)
+		n = kmalloc(sz, GFP_KERNEL);
+	else if (hashdist)
+		n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
+	else
+		n = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(sz));
+
+	if (n)
+		memset(n, 0, sz);
+
+	return n;
+}
+
+void xfrm_hash_free(struct hlist_head *n, unsigned int sz)
+{
+	if (sz <= PAGE_SIZE)
+		kfree(n);
+	else if (hashdist)
+		vfree(n);
+	else
+		free_pages((unsigned long)n, get_order(sz));
+}
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
new file mode 100644
index 0000000..d3abb0b
--- /dev/null
+++ b/net/xfrm/xfrm_hash.h
@@ -0,0 +1,128 @@
+#ifndef _XFRM_HASH_H
+#define _XFRM_HASH_H
+
+#include <linux/xfrm.h>
+#include <linux/socket.h>
+
+static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
+{
+	return ntohl(addr->a4);
+}
+
+static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
+{
+	return ntohl(addr->a6[2] ^ addr->a6[3]);
+}
+
+static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a4 ^ saddr->a4);
+}
+
+static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
+{
+	return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
+		     saddr->a6[2] ^ saddr->a6[3]);
+}
+
+static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr,
+					   u32 reqid, unsigned short family,
+					   unsigned int hmask)
+{
+	unsigned int h = family ^ reqid;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	}
+	return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
+				       unsigned short family,
+				       unsigned int hmask)
+{
+	unsigned int h = family;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_addr_hash(saddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_addr_hash(saddr);
+		break;
+	};
+	return (h ^ (h >> 16)) & hmask;
+}
+
+static inline unsigned int
+__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family,
+		unsigned int hmask)
+{
+	unsigned int h = spi ^ proto;
+	switch (family) {
+	case AF_INET:
+		h ^= __xfrm4_addr_hash(daddr);
+		break;
+	case AF_INET6:
+		h ^= __xfrm6_addr_hash(daddr);
+		break;
+	}
+	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
+static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
+{
+	return (index ^ (index >> 8)) & hmask;
+}
+
+static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
+{
+	xfrm_address_t *daddr = &sel->daddr;
+	xfrm_address_t *saddr = &sel->saddr;
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		if (sel->prefixlen_d != 32 ||
+		    sel->prefixlen_s != 32)
+			return hmask + 1;
+
+		h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+
+	case AF_INET6:
+		if (sel->prefixlen_d != 128 ||
+		    sel->prefixlen_s != 128)
+			return hmask + 1;
+
+		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
+{
+	unsigned int h = 0;
+
+	switch (family) {
+	case AF_INET:
+		h = __xfrm4_daddr_saddr_hash(daddr, saddr);
+		break;
+
+	case AF_INET6:
+		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
+		break;
+	};
+	h ^= (h >> 16);
+	return h & hmask;
+}
+
+extern struct hlist_head *xfrm_hash_alloc(unsigned int sz);
+extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz);
+
+#endif /* _XFRM_HASH_H */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 891a609..dfc90bb 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -82,8 +82,6 @@
 {
 	secpath_cachep = kmem_cache_create("secpath_cache",
 					   sizeof(struct sec_path),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!secpath_cachep)
-		panic("XFRM: failed to allocate secpath_cache\n");
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3da67ca..b6e2e79 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,16 +22,19 @@
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/cache.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
+#include "xfrm_hash.h"
+
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
-struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
-EXPORT_SYMBOL(xfrm_policy_list);
+unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
+EXPORT_SYMBOL(xfrm_policy_count);
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -39,8 +42,7 @@
 static kmem_cache_t *xfrm_dst_cache __read_mostly;
 
 static struct work_struct xfrm_policy_gc_work;
-static struct list_head xfrm_policy_gc_list =
-	LIST_HEAD_INIT(xfrm_policy_gc_list);
+static HLIST_HEAD(xfrm_policy_gc_list);
 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
@@ -310,8 +312,10 @@
 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 
 	if (policy) {
-		atomic_set(&policy->refcnt, 1);
+		INIT_HLIST_NODE(&policy->bydst);
+		INIT_HLIST_NODE(&policy->byidx);
 		rwlock_init(&policy->lock);
+		atomic_set(&policy->refcnt, 1);
 		init_timer(&policy->timer);
 		policy->timer.data = (unsigned long)policy;
 		policy->timer.function = xfrm_policy_timer;
@@ -357,17 +361,16 @@
 static void xfrm_policy_gc_task(void *data)
 {
 	struct xfrm_policy *policy;
-	struct list_head *entry, *tmp;
-	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_policy_gc_lock);
-	list_splice_init(&xfrm_policy_gc_list, &gc_list);
+	gc_list.first = xfrm_policy_gc_list.first;
+	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
 	spin_unlock_bh(&xfrm_policy_gc_lock);
 
-	list_for_each_safe(entry, tmp, &gc_list) {
-		policy = list_entry(entry, struct xfrm_policy, list);
+	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
 		xfrm_policy_gc_kill(policy);
-	}
 }
 
 /* Rule must be locked. Release descentant resources, announce
@@ -389,70 +392,275 @@
 	}
 
 	spin_lock(&xfrm_policy_gc_lock);
-	list_add(&policy->list, &xfrm_policy_gc_list);
+	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
 	spin_unlock(&xfrm_policy_gc_lock);
 
 	schedule_work(&xfrm_policy_gc_work);
 }
 
+struct xfrm_policy_hash {
+	struct hlist_head	*table;
+	unsigned int		hmask;
+};
+
+static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
+static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
+static struct hlist_head *xfrm_policy_byidx __read_mostly;
+static unsigned int xfrm_idx_hmask __read_mostly;
+static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+
+static inline unsigned int idx_hash(u32 index)
+{
+	return __idx_hash(index, xfrm_idx_hmask);
+}
+
+static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hash = __sel_hash(sel, family, hmask);
+
+	return (hash == hmask + 1 ?
+		&xfrm_policy_inexact[dir] :
+		xfrm_policy_bydst[dir].table + hash);
+}
+
+static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
+
+	return xfrm_policy_bydst[dir].table + hash;
+}
+
+static void xfrm_dst_hash_transfer(struct hlist_head *list,
+				   struct hlist_head *ndsttable,
+				   unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_policy *pol;
+
+	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
+		unsigned int h;
+
+		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
+				pol->family, nhashmask);
+		hlist_add_head(&pol->bydst, ndsttable+h);
+	}
+}
+
+static void xfrm_idx_hash_transfer(struct hlist_head *list,
+				   struct hlist_head *nidxtable,
+				   unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_policy *pol;
+
+	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
+		unsigned int h;
+
+		h = __idx_hash(pol->index, nhashmask);
+		hlist_add_head(&pol->byidx, nidxtable+h);
+	}
+}
+
+static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
+{
+	return ((old_hmask + 1) << 1) - 1;
+}
+
+static void xfrm_bydst_resize(int dir)
+{
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
+	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
+	int i;
+
+	if (!ndst)
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+
+	for (i = hmask; i >= 0; i--)
+		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
+
+	xfrm_policy_bydst[dir].table = ndst;
+	xfrm_policy_bydst[dir].hmask = nhashmask;
+
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static void xfrm_byidx_resize(int total)
+{
+	unsigned int hmask = xfrm_idx_hmask;
+	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
+	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
+	struct hlist_head *oidx = xfrm_policy_byidx;
+	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
+	int i;
+
+	if (!nidx)
+		return;
+
+	write_lock_bh(&xfrm_policy_lock);
+
+	for (i = hmask; i >= 0; i--)
+		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
+
+	xfrm_policy_byidx = nidx;
+	xfrm_idx_hmask = nhashmask;
+
+	write_unlock_bh(&xfrm_policy_lock);
+
+	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
+}
+
+static inline int xfrm_bydst_should_resize(int dir, int *total)
+{
+	unsigned int cnt = xfrm_policy_count[dir];
+	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
+
+	if (total)
+		*total += cnt;
+
+	if ((hmask + 1) < xfrm_policy_hashmax &&
+	    cnt > hmask)
+		return 1;
+
+	return 0;
+}
+
+static inline int xfrm_byidx_should_resize(int total)
+{
+	unsigned int hmask = xfrm_idx_hmask;
+
+	if ((hmask + 1) < xfrm_policy_hashmax &&
+	    total > hmask)
+		return 1;
+
+	return 0;
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+	int dir, total;
+
+	mutex_lock(&hash_resize_mutex);
+
+	total = 0;
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		if (xfrm_bydst_should_resize(dir, &total))
+			xfrm_bydst_resize(dir);
+	}
+	if (xfrm_byidx_should_resize(total))
+		xfrm_byidx_resize(total);
+
+	mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
+
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
-static u32 xfrm_gen_index(int dir)
+static u32 xfrm_gen_index(u8 type, int dir)
 {
-	u32 idx;
-	struct xfrm_policy *p;
 	static u32 idx_generator;
 
 	for (;;) {
+		struct hlist_node *entry;
+		struct hlist_head *list;
+		struct xfrm_policy *p;
+		u32 idx;
+		int found;
+
 		idx = (idx_generator | dir);
 		idx_generator += 8;
 		if (idx == 0)
 			idx = 8;
-		for (p = xfrm_policy_list[dir]; p; p = p->next) {
-			if (p->index == idx)
+		list = xfrm_policy_byidx + idx_hash(idx);
+		found = 0;
+		hlist_for_each_entry(p, entry, list, byidx) {
+			if (p->index == idx) {
+				found = 1;
 				break;
+			}
 		}
-		if (!p)
+		if (!found)
 			return idx;
 	}
 }
 
+static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
+{
+	u32 *p1 = (u32 *) s1;
+	u32 *p2 = (u32 *) s2;
+	int len = sizeof(struct xfrm_selector) / sizeof(u32);
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (p1[i] != p2[i])
+			return 1;
+	}
+
+	return 0;
+}
+
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 {
-	struct xfrm_policy *pol, **p;
-	struct xfrm_policy *delpol = NULL;
-	struct xfrm_policy **newpos = NULL;
+	struct xfrm_policy *pol;
+	struct xfrm_policy *delpol;
+	struct hlist_head *chain;
+	struct hlist_node *entry, *newpos, *last;
 	struct dst_entry *gc_list;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
-		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
+	delpol = NULL;
+	newpos = NULL;
+	last = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (!delpol &&
+		    pol->type == policy->type &&
+		    !selector_cmp(&pol->selector, &policy->selector) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
 				write_unlock_bh(&xfrm_policy_lock);
 				return -EEXIST;
 			}
-			*p = pol->next;
 			delpol = pol;
 			if (policy->priority > pol->priority)
 				continue;
 		} else if (policy->priority >= pol->priority) {
-			p = &pol->next;
+			last = &pol->bydst;
 			continue;
 		}
 		if (!newpos)
-			newpos = p;
+			newpos = &pol->bydst;
 		if (delpol)
 			break;
-		p = &pol->next;
+		last = &pol->bydst;
 	}
+	if (!newpos)
+		newpos = last;
 	if (newpos)
-		p = newpos;
+		hlist_add_after(newpos, &policy->bydst);
+	else
+		hlist_add_head(&policy->bydst, chain);
 	xfrm_pol_hold(policy);
-	policy->next = *p;
-	*p = policy;
+	xfrm_policy_count[dir]++;
 	atomic_inc(&flow_cache_genid);
-	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
+	if (delpol) {
+		hlist_del(&delpol->bydst);
+		hlist_del(&delpol->byidx);
+		xfrm_policy_count[dir]--;
+	}
+	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
+	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
 	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -461,10 +669,13 @@
 
 	if (delpol)
 		xfrm_policy_kill(delpol);
+	else if (xfrm_bydst_should_resize(dir, NULL))
+		schedule_work(&xfrm_hash_work);
 
 	read_lock_bh(&xfrm_policy_lock);
 	gc_list = NULL;
-	for (policy = policy->next; policy; policy = policy->next) {
+	entry = &policy->bydst;
+	hlist_for_each_entry_continue(policy, entry, bydst) {
 		struct dst_entry *dst;
 
 		write_lock(&policy->lock);
@@ -493,87 +704,146 @@
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete)
 {
-	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *pol, *ret;
+	struct hlist_head *chain;
+	struct hlist_node *entry;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
-		    (xfrm_sec_ctx_match(ctx, pol->security))) {
+	chain = policy_hash_bysel(sel, sel->family, dir);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (pol->type == type &&
+		    !selector_cmp(sel, &pol->selector) &&
+		    xfrm_sec_ctx_match(ctx, pol->security)) {
 			xfrm_pol_hold(pol);
-			if (delete)
-				*p = pol->next;
+			if (delete) {
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				xfrm_policy_count[dir]--;
+			}
+			ret = pol;
 			break;
 		}
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (pol && delete) {
+	if (ret && delete) {
 		atomic_inc(&flow_cache_genid);
-		xfrm_policy_kill(pol);
+		xfrm_policy_kill(ret);
 	}
-	return pol;
+	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
+struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
 {
-	struct xfrm_policy *pol, **p;
+	struct xfrm_policy *pol, *ret;
+	struct hlist_head *chain;
+	struct hlist_node *entry;
 
 	write_lock_bh(&xfrm_policy_lock);
-	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if (pol->index == id) {
+	chain = xfrm_policy_byidx + idx_hash(id);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, byidx) {
+		if (pol->type == type && pol->index == id) {
 			xfrm_pol_hold(pol);
-			if (delete)
-				*p = pol->next;
+			if (delete) {
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				xfrm_policy_count[dir]--;
+			}
+			ret = pol;
 			break;
 		}
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (pol && delete) {
+	if (ret && delete) {
 		atomic_inc(&flow_cache_genid);
-		xfrm_policy_kill(pol);
+		xfrm_policy_kill(ret);
 	}
-	return pol;
+	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
-void xfrm_policy_flush(void)
+void xfrm_policy_flush(u8 type)
 {
-	struct xfrm_policy *xp;
 	int dir;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
-		while ((xp = xfrm_policy_list[dir]) != NULL) {
-			xfrm_policy_list[dir] = xp->next;
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		int i;
+
+	again1:
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			hlist_del(&pol->bydst);
+			hlist_del(&pol->byidx);
 			write_unlock_bh(&xfrm_policy_lock);
 
-			xfrm_policy_kill(xp);
+			xfrm_policy_kill(pol);
 
 			write_lock_bh(&xfrm_policy_lock);
+			goto again1;
 		}
+
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+	again2:
+			hlist_for_each_entry(pol, entry,
+					     xfrm_policy_bydst[dir].table + i,
+					     bydst) {
+				if (pol->type != type)
+					continue;
+				hlist_del(&pol->bydst);
+				hlist_del(&pol->byidx);
+				write_unlock_bh(&xfrm_policy_lock);
+
+				xfrm_policy_kill(pol);
+
+				write_lock_bh(&xfrm_policy_lock);
+				goto again2;
+			}
+		}
+
+		xfrm_policy_count[dir] = 0;
 	}
 	atomic_inc(&flow_cache_genid);
 	write_unlock_bh(&xfrm_policy_lock);
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
 
-int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
+int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
 		     void *data)
 {
-	struct xfrm_policy *xp;
-	int dir;
-	int count = 0;
-	int error = 0;
+	struct xfrm_policy *pol;
+	struct hlist_node *entry;
+	int dir, count, error;
 
 	read_lock_bh(&xfrm_policy_lock);
+	count = 0;
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
-			count++;
+		struct hlist_head *table = xfrm_policy_bydst[dir].table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type == type)
+				count++;
+		}
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst) {
+				if (pol->type == type)
+					count++;
+			}
+		}
 	}
 
 	if (count == 0) {
@@ -582,13 +852,28 @@
 	}
 
 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
-		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
-			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
+		struct hlist_head *table = xfrm_policy_bydst[dir].table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
 			if (error)
 				goto out;
 		}
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst) {
+				if (pol->type != type)
+					continue;
+				error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
+				if (error)
+					goto out;
+			}
+		}
 	}
-
+	error = 0;
 out:
 	read_unlock_bh(&xfrm_policy_lock);
 	return error;
@@ -597,29 +882,79 @@
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
+static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
+			     u8 type, u16 family, int dir)
+{
+	struct xfrm_selector *sel = &pol->selector;
+	int match;
+
+	if (pol->family != family ||
+	    pol->type != type)
+		return 0;
+
+	match = xfrm_selector_match(sel, fl, family);
+	if (match) {
+		if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
+			return 1;
+	}
+
+	return 0;
+}
+
+static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
+						     u16 family, u8 dir)
+{
+	struct xfrm_policy *pol, *ret;
+	xfrm_address_t *daddr, *saddr;
+	struct hlist_node *entry;
+	struct hlist_head *chain;
+	u32 priority = ~0U;
+
+	daddr = xfrm_flowi_daddr(fl, family);
+	saddr = xfrm_flowi_saddr(fl, family);
+	if (unlikely(!daddr || !saddr))
+		return NULL;
+
+	read_lock_bh(&xfrm_policy_lock);
+	chain = policy_hash_direct(daddr, saddr, family, dir);
+	ret = NULL;
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_policy_match(pol, fl, type, family, dir)) {
+			ret = pol;
+			priority = ret->priority;
+			break;
+		}
+	}
+	chain = &xfrm_policy_inexact[dir];
+	hlist_for_each_entry(pol, entry, chain, bydst) {
+		if (xfrm_policy_match(pol, fl, type, family, dir) &&
+		    pol->priority < priority) {
+			ret = pol;
+			break;
+		}
+	}
+	if (ret)
+		xfrm_pol_hold(ret);
+	read_unlock_bh(&xfrm_policy_lock);
+
+	return ret;
+}
+
+static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
 
-	read_lock_bh(&xfrm_policy_lock);
-	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
-		struct xfrm_selector *sel = &pol->selector;
-		int match;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	if (pol)
+		goto end;
+#endif
+	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 
-		if (pol->family != family)
-			continue;
-
-		match = xfrm_selector_match(sel, fl, family);
-
-		if (match) {
- 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
-				xfrm_pol_hold(pol);
-				break;
-			}
-		}
-	}
-	read_unlock_bh(&xfrm_policy_lock);
+#ifdef CONFIG_XFRM_SUB_POLICY
+end:
+#endif
 	if ((*objp = (void *) pol) != NULL)
 		*obj_refp = &pol->refcnt;
 }
@@ -641,7 +976,7 @@
 	};
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
 {
 	struct xfrm_policy *pol;
 
@@ -652,7 +987,7 @@
  		int err = 0;
 
 		if (match)
-		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+		  err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir));
 
  		if (match && !err)
 			xfrm_pol_hold(pol);
@@ -665,24 +1000,29 @@
 
 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 {
-	pol->next = xfrm_policy_list[dir];
-	xfrm_policy_list[dir] = pol;
+	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
+						     pol->family, dir);
+
+	hlist_add_head(&pol->bydst, chain);
+	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
+	xfrm_policy_count[dir]++;
 	xfrm_pol_hold(pol);
+
+	if (xfrm_bydst_should_resize(dir, NULL))
+		schedule_work(&xfrm_hash_work);
 }
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir)
 {
-	struct xfrm_policy **polp;
+	if (hlist_unhashed(&pol->bydst))
+		return NULL;
 
-	for (polp = &xfrm_policy_list[dir];
-	     *polp != NULL; polp = &(*polp)->next) {
-		if (*polp == pol) {
-			*polp = pol->next;
-			return pol;
-		}
-	}
-	return NULL;
+	hlist_del(&pol->bydst);
+	hlist_del(&pol->byidx);
+	xfrm_policy_count[dir]--;
+
+	return pol;
 }
 
 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
@@ -704,12 +1044,17 @@
 {
 	struct xfrm_policy *old_pol;
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
+		return -EINVAL;
+#endif
+
 	write_lock_bh(&xfrm_policy_lock);
 	old_pol = sk->sk_policy[dir];
 	sk->sk_policy[dir] = pol;
 	if (pol) {
 		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
-		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
+		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
 	if (old_pol)
@@ -738,6 +1083,7 @@
 		newp->flags = old->flags;
 		newp->xfrm_nr = old->xfrm_nr;
 		newp->index = old->index;
+		newp->type = old->type;
 		memcpy(newp->xfrm_vec, old->xfrm_vec,
 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 		write_lock_bh(&xfrm_policy_lock);
@@ -761,17 +1107,32 @@
 	return 0;
 }
 
+static int
+xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
+	       unsigned short family)
+{
+	int err;
+	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+
+	if (unlikely(afinfo == NULL))
+		return -EINVAL;
+	err = afinfo->get_saddr(local, remote);
+	xfrm_policy_put_afinfo(afinfo);
+	return err;
+}
+
 /* Resolve list of templates for the flow, given policy. */
 
 static int
-xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
-		  struct xfrm_state **xfrm,
-		  unsigned short family)
+xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
+		      struct xfrm_state **xfrm,
+		      unsigned short family)
 {
 	int nx;
 	int i, error;
 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
+	xfrm_address_t tmp;
 
 	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
 		struct xfrm_state *x;
@@ -779,9 +1140,15 @@
 		xfrm_address_t *local  = saddr;
 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
-		if (tmpl->mode) {
+		if (tmpl->mode == XFRM_MODE_TUNNEL) {
 			remote = &tmpl->id.daddr;
 			local = &tmpl->saddr;
+			if (xfrm_addr_any(local, family)) {
+				error = xfrm_get_saddr(&tmp, remote, family);
+				if (error)
+					goto fail;
+				local = &tmp;
+			}
 		}
 
 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
@@ -809,6 +1176,45 @@
 	return error;
 }
 
+static int
+xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
+		  struct xfrm_state **xfrm,
+		  unsigned short family)
+{
+	struct xfrm_state *tp[XFRM_MAX_DEPTH];
+	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
+	int cnx = 0;
+	int error;
+	int ret;
+	int i;
+
+	for (i = 0; i < npols; i++) {
+		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
+			error = -ENOBUFS;
+			goto fail;
+		}
+
+		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
+		if (ret < 0) {
+			error = ret;
+			goto fail;
+		} else
+			cnx += ret;
+	}
+
+	/* found states are sorted for outbound processing */
+	if (npols > 1)
+		xfrm_state_sort(xfrm, tpp, cnx, family);
+
+	return cnx;
+
+ fail:
+	for (cnx--; cnx>=0; cnx--)
+		xfrm_state_put(tpp[cnx]);
+	return error;
+
+}
+
 /* Check that the bundle accepts the flow and its components are
  * still valid.
  */
@@ -855,6 +1261,11 @@
 		struct sock *sk, int flags)
 {
 	struct xfrm_policy *policy;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int npols;
+	int pol_dead;
+	int xfrm_nr;
+	int pi;
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
 	struct dst_entry *dst, *dst_orig = *dst_p;
 	int nx = 0;
@@ -862,19 +1273,26 @@
 	u32 genid;
 	u16 family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
-	u32 sk_sid = security_sk_sid(sk, fl, dir);
+
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
+	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+		pols[pi] = NULL;
+	npols = 0;
+	pol_dead = 0;
+	xfrm_nr = 0;
+
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
-		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
+		if ((dst_orig->flags & DST_NOXFRM) ||
+		    !xfrm_policy_count[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
+		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
 	}
 
@@ -883,6 +1301,9 @@
 
 	family = dst_orig->ops->family;
 	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+	pols[0] = policy;
+	npols ++;
+	xfrm_nr += pols[0]->xfrm_nr;
 
 	switch (policy->action) {
 	case XFRM_POLICY_BLOCK:
@@ -891,11 +1312,13 @@
 		goto error;
 
 	case XFRM_POLICY_ALLOW:
+#ifndef CONFIG_XFRM_SUB_POLICY
 		if (policy->xfrm_nr == 0) {
 			/* Flow passes not transformed. */
 			xfrm_pol_put(policy);
 			return 0;
 		}
+#endif
 
 		/* Try to find matching bundle.
 		 *
@@ -911,7 +1334,36 @@
 		if (dst)
 			break;
 
-		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+#ifdef CONFIG_XFRM_SUB_POLICY
+		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+							    fl, family,
+							    XFRM_POLICY_OUT);
+			if (pols[1]) {
+				if (pols[1]->action == XFRM_POLICY_BLOCK) {
+					err = -EPERM;
+					goto error;
+				}
+				npols ++;
+				xfrm_nr += pols[1]->xfrm_nr;
+			}
+		}
+
+		/*
+		 * Because neither flowi nor bundle information knows about
+		 * transformation template size. On more than one policy usage
+		 * we can realize whether all of them is bypass or not after
+		 * they are searched. See above not-transformed bypass
+		 * is surrounded by non-sub policy configuration, too.
+		 */
+		if (xfrm_nr == 0) {
+			/* Flow passes not transformed. */
+			xfrm_pols_put(pols, npols);
+			return 0;
+		}
+
+#endif
+		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 		if (unlikely(nx<0)) {
 			err = nx;
@@ -924,7 +1376,7 @@
 				set_current_state(TASK_RUNNING);
 				remove_wait_queue(&km_waitq, &wait);
 
-				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
+				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
 
 				if (nx == -EAGAIN && signal_pending(current)) {
 					err = -ERESTART;
@@ -932,7 +1384,7 @@
 				}
 				if (nx == -EAGAIN ||
 				    genid != atomic_read(&flow_cache_genid)) {
-					xfrm_pol_put(policy);
+					xfrm_pols_put(pols, npols);
 					goto restart;
 				}
 				err = nx;
@@ -942,7 +1394,7 @@
 		}
 		if (nx == 0) {
 			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
+			xfrm_pols_put(pols, npols);
 			return 0;
 		}
 
@@ -956,8 +1408,14 @@
 			goto error;
 		}
 
+		for (pi = 0; pi < npols; pi++) {
+			read_lock_bh(&pols[pi]->lock);
+			pol_dead |= pols[pi]->dead;
+			read_unlock_bh(&pols[pi]->lock);
+		}
+
 		write_lock_bh(&policy->lock);
-		if (unlikely(policy->dead || stale_bundle(dst))) {
+		if (unlikely(pol_dead || stale_bundle(dst))) {
 			/* Wow! While we worked on resolving, this
 			 * policy has gone. Retry. It is not paranoia,
 			 * we just cannot enlist new bundle to dead object.
@@ -977,17 +1435,34 @@
 	}
 	*dst_p = dst;
 	dst_release(dst_orig);
-	xfrm_pol_put(policy);
+ 	xfrm_pols_put(pols, npols);
 	return 0;
 
 error:
 	dst_release(dst_orig);
-	xfrm_pol_put(policy);
+	xfrm_pols_put(pols, npols);
 	*dst_p = NULL;
 	return err;
 }
 EXPORT_SYMBOL(xfrm_lookup);
 
+static inline int
+xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
+{
+	struct xfrm_state *x;
+	int err;
+
+	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
+		return 0;
+	x = skb->sp->xvec[idx];
+	if (!x->type->reject)
+		return 0;
+	xfrm_state_hold(x);
+	err = x->type->reject(x, skb, fl);
+	xfrm_state_put(x);
+	return err;
+}
+
 /* When skb is transformed back to its "native" form, we have to
  * check policy restrictions. At the moment we make this in maximally
  * stupid way. Shame on me. :-) Of course, connected sockets must
@@ -1004,10 +1479,19 @@
 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
 		x->props.mode == tmpl->mode &&
-		(tmpl->aalgos & (1<<x->props.aalgo)) &&
-		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
+		((tmpl->aalgos & (1<<x->props.aalgo)) ||
+		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
+		!(x->props.mode != XFRM_MODE_TRANSPORT &&
+		  xfrm_state_addr_cmp(tmpl, x, family));
 }
 
+/*
+ * 0 or more than 0 is returned when validation is succeeded (either bypass
+ * because of optional transport mode, or next index of the mathced secpath
+ * state with the template.
+ * -1 is returned when no matching template is found.
+ * Otherwise "-2 - errored_index" is returned.
+ */
 static inline int
 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 	       unsigned short family)
@@ -1015,15 +1499,18 @@
 	int idx = start;
 
 	if (tmpl->optional) {
-		if (!tmpl->mode)
+		if (tmpl->mode == XFRM_MODE_TRANSPORT)
 			return start;
 	} else
 		start = -1;
 	for (; idx < sp->len; idx++) {
 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
 			return ++idx;
-		if (sp->xvec[idx]->props.mode)
+		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+			if (start == -1)
+				start = -2-idx;
 			break;
+		}
 	}
 	return start;
 }
@@ -1032,21 +1519,25 @@
 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl);
+	err = security_xfrm_decode_session(skb, &fl->secid);
 	xfrm_policy_put_afinfo(afinfo);
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL(xfrm_decode_session);
 
-static inline int secpath_has_tunnel(struct sec_path *sp, int k)
+static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
 {
 	for (; k < sp->len; k++) {
-		if (sp->xvec[k]->props.mode)
+		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
+			*idxp = k;
 			return 1;
+		}
 	}
 
 	return 0;
@@ -1056,16 +1547,18 @@
 			unsigned short family)
 {
 	struct xfrm_policy *pol;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int npols = 0;
+	int xfrm_nr;
+	int pi;
 	struct flowi fl;
 	u8 fl_dir = policy_to_flow_dir(dir);
-	u32 sk_sid;
+	int xerr_idx = -1;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0)
 		return 0;
 	nf_nat_decode_session(skb, &fl, family);
 
-	sk_sid = security_sk_sid(sk, &fl, fl_dir);
-
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
@@ -1079,46 +1572,90 @@
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
+		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
-	if (!pol)
-		return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
+	if (!pol) {
+		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
+			xfrm_secpath_reject(xerr_idx, skb, &fl);
+			return 0;
+		}
+		return 1;
+	}
 
 	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
+	pols[0] = pol;
+	npols ++;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
+						    &fl, family,
+						    XFRM_POLICY_IN);
+		if (pols[1]) {
+			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+			npols ++;
+		}
+	}
+#endif
+
 	if (pol->action == XFRM_POLICY_ALLOW) {
 		struct sec_path *sp;
 		static struct sec_path dummy;
+		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
+		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
+		struct xfrm_tmpl **tpp = tp;
+		int ti = 0;
 		int i, k;
 
 		if ((sp = skb->sp) == NULL)
 			sp = &dummy;
 
+		for (pi = 0; pi < npols; pi++) {
+			if (pols[pi] != pol &&
+			    pols[pi]->action != XFRM_POLICY_ALLOW)
+				goto reject;
+			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
+				goto reject_error;
+			for (i = 0; i < pols[pi]->xfrm_nr; i++)
+				tpp[ti++] = &pols[pi]->xfrm_vec[i];
+		}
+		xfrm_nr = ti;
+		if (npols > 1) {
+			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
+			tpp = stp;
+		}
+
 		/* For each tunnel xfrm, find the first matching tmpl.
 		 * For each tmpl before that, find corresponding xfrm.
 		 * Order is _important_. Later we will implement
 		 * some barriers, but at the moment barriers
 		 * are implied between each two transformations.
 		 */
-		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
-			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
-			if (k < 0)
+		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
+			k = xfrm_policy_ok(tpp[i], sp, k, family);
+			if (k < 0) {
+				if (k < -1)
+					/* "-2 - errored_index" returned */
+					xerr_idx = -(2+k);
 				goto reject;
+			}
 		}
 
-		if (secpath_has_tunnel(sp, k))
+		if (secpath_has_nontransport(sp, k, &xerr_idx))
 			goto reject;
 
-		xfrm_pol_put(pol);
+		xfrm_pols_put(pols, npols);
 		return 1;
 	}
 
 reject:
-	xfrm_pol_put(pol);
+	xfrm_secpath_reject(xerr_idx, skb, &fl);
+reject_error:
+	xfrm_pols_put(pols, npols);
 	return 0;
 }
 EXPORT_SYMBOL(__xfrm_policy_check);
@@ -1166,7 +1703,7 @@
 
 static int stale_bundle(struct dst_entry *dst)
 {
-	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
+	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
 }
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -1196,33 +1733,50 @@
 	return dst;
 }
 
+static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
+{
+	struct dst_entry *dst, **dstp;
+
+	write_lock(&pol->lock);
+	dstp = &pol->bundles;
+	while ((dst=*dstp) != NULL) {
+		if (func(dst)) {
+			*dstp = dst->next;
+			dst->next = *gc_list_p;
+			*gc_list_p = dst;
+		} else {
+			dstp = &dst->next;
+		}
+	}
+	write_unlock(&pol->lock);
+}
+
 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 {
-	int i;
-	struct xfrm_policy *pol;
-	struct dst_entry *dst, **dstp, *gc_list = NULL;
+	struct dst_entry *gc_list = NULL;
+	int dir;
 
 	read_lock_bh(&xfrm_policy_lock);
-	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
-		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
-			write_lock(&pol->lock);
-			dstp = &pol->bundles;
-			while ((dst=*dstp) != NULL) {
-				if (func(dst)) {
-					*dstp = dst->next;
-					dst->next = gc_list;
-					gc_list = dst;
-				} else {
-					dstp = &dst->next;
-				}
-			}
-			write_unlock(&pol->lock);
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		struct hlist_head *table;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst)
+			prune_one_bundle(pol, func, &gc_list);
+
+		table = xfrm_policy_bydst[dir].table;
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry, table + i, bydst)
+				prune_one_bundle(pol, func, &gc_list);
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
 
 	while (gc_list) {
-		dst = gc_list;
+		struct dst_entry *dst = gc_list;
 		gc_list = dst->next;
 		dst_free(dst);
 	}
@@ -1238,22 +1792,12 @@
 	xfrm_prune_bundles(unused_bundle);
 }
 
-int xfrm_flush_bundles(void)
+static int xfrm_flush_bundles(void)
 {
 	xfrm_prune_bundles(stale_bundle);
 	return 0;
 }
 
-static int always_true(struct dst_entry *dst)
-{
-	return 1;
-}
-
-void xfrm_flush_all_bundles(void)
-{
-	xfrm_prune_bundles(always_true);
-}
-
 void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -1281,7 +1825,7 @@
  * still valid.
  */
 
-int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
+int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict)
 {
 	struct dst_entry *dst = &first->u.dst;
 	struct xfrm_dst *last;
@@ -1298,8 +1842,16 @@
 
 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
 			return 0;
+		if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
+			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
+		if (xdst->genid != dst->xfrm->genid)
+			return 0;
+
+		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
+		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
+			return 0;
 
 		mtu = dst_mtu(dst->child);
 		if (xdst->child_mtu_cached != mtu) {
@@ -1448,12 +2000,33 @@
 
 static void __init xfrm_policy_init(void)
 {
+	unsigned int hmask, sz;
+	int dir;
+
 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 					   sizeof(struct xfrm_dst),
-					   0, SLAB_HWCACHE_ALIGN,
+					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					   NULL, NULL);
-	if (!xfrm_dst_cache)
-		panic("XFRM: failed to allocate xfrm_dst_cache\n");
+
+	hmask = 8 - 1;
+	sz = (hmask+1) * sizeof(struct hlist_head);
+
+	xfrm_policy_byidx = xfrm_hash_alloc(sz);
+	xfrm_idx_hmask = hmask;
+	if (!xfrm_policy_byidx)
+		panic("XFRM: failed to allocate byidx hash\n");
+
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		struct xfrm_policy_hash *htab;
+
+		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
+
+		htab = &xfrm_policy_bydst[dir];
+		htab->table = xfrm_hash_alloc(sz);
+		htab->hmask = hmask;
+		if (!htab->table)
+			panic("XFRM: failed to allocate bydst hash\n");
+	}
 
 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
 	register_netdevice_notifier(&xfrm_dev_notifier);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0021aad..9f63edd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -18,8 +18,11 @@
 #include <linux/pfkeyv2.h>
 #include <linux/ipsec.h>
 #include <linux/module.h>
+#include <linux/cache.h>
 #include <asm/uaccess.h>
 
+#include "xfrm_hash.h"
+
 struct sock *xfrm_nl;
 EXPORT_SYMBOL(xfrm_nl);
 
@@ -32,7 +35,7 @@
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
-   2. Hash table by daddr to find what SAs exist for given
+   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
       destination/tunnel endpoint. (output)
  */
 
@@ -44,8 +47,123 @@
  * Main use is finding SA after policy selected tunnel or transport mode.
  * Also, it can be used by ah/esp icmp error handler to find offending SA.
  */
-static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
-static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
+static struct hlist_head *xfrm_state_bydst __read_mostly;
+static struct hlist_head *xfrm_state_bysrc __read_mostly;
+static struct hlist_head *xfrm_state_byspi __read_mostly;
+static unsigned int xfrm_state_hmask __read_mostly;
+static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static unsigned int xfrm_state_num;
+static unsigned int xfrm_state_genid;
+
+static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
+					 xfrm_address_t *saddr,
+					 u32 reqid,
+					 unsigned short family)
+{
+	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
+}
+
+static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
+					 unsigned short family)
+{
+	return __xfrm_src_hash(addr, family, xfrm_state_hmask);
+}
+
+static inline unsigned int
+xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
+{
+	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
+}
+
+static void xfrm_hash_transfer(struct hlist_head *list,
+			       struct hlist_head *ndsttable,
+			       struct hlist_head *nsrctable,
+			       struct hlist_head *nspitable,
+			       unsigned int nhashmask)
+{
+	struct hlist_node *entry, *tmp;
+	struct xfrm_state *x;
+
+	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
+		unsigned int h;
+
+		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+				    x->props.reqid, x->props.family,
+				    nhashmask);
+		hlist_add_head(&x->bydst, ndsttable+h);
+
+		h = __xfrm_src_hash(&x->props.saddr, x->props.family,
+				    nhashmask);
+		hlist_add_head(&x->bysrc, nsrctable+h);
+
+		h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
+				    x->props.family, nhashmask);
+		hlist_add_head(&x->byspi, nspitable+h);
+	}
+}
+
+static unsigned long xfrm_hash_new_size(void)
+{
+	return ((xfrm_state_hmask + 1) << 1) *
+		sizeof(struct hlist_head);
+}
+
+static DEFINE_MUTEX(hash_resize_mutex);
+
+static void xfrm_hash_resize(void *__unused)
+{
+	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
+	unsigned long nsize, osize;
+	unsigned int nhashmask, ohashmask;
+	int i;
+
+	mutex_lock(&hash_resize_mutex);
+
+	nsize = xfrm_hash_new_size();
+	ndst = xfrm_hash_alloc(nsize);
+	if (!ndst)
+		goto out_unlock;
+	nsrc = xfrm_hash_alloc(nsize);
+	if (!nsrc) {
+		xfrm_hash_free(ndst, nsize);
+		goto out_unlock;
+	}
+	nspi = xfrm_hash_alloc(nsize);
+	if (!nspi) {
+		xfrm_hash_free(ndst, nsize);
+		xfrm_hash_free(nsrc, nsize);
+		goto out_unlock;
+	}
+
+	spin_lock_bh(&xfrm_state_lock);
+
+	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+	for (i = xfrm_state_hmask; i >= 0; i--)
+		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
+				   nhashmask);
+
+	odst = xfrm_state_bydst;
+	osrc = xfrm_state_bysrc;
+	ospi = xfrm_state_byspi;
+	ohashmask = xfrm_state_hmask;
+
+	xfrm_state_bydst = ndst;
+	xfrm_state_bysrc = nsrc;
+	xfrm_state_byspi = nspi;
+	xfrm_state_hmask = nhashmask;
+
+	spin_unlock_bh(&xfrm_state_lock);
+
+	osize = (ohashmask + 1) * sizeof(struct hlist_head);
+	xfrm_hash_free(odst, osize);
+	xfrm_hash_free(osrc, osize);
+	xfrm_hash_free(ospi, osize);
+
+out_unlock:
+	mutex_unlock(&hash_resize_mutex);
+}
+
+static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
 
 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
 EXPORT_SYMBOL(km_waitq);
@@ -54,11 +172,9 @@
 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
 
 static struct work_struct xfrm_state_gc_work;
-static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
+static HLIST_HEAD(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
-static int xfrm_state_gc_flush_bundles;
-
 int __xfrm_state_delete(struct xfrm_state *x);
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -69,14 +185,13 @@
 
 static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
-	if (del_timer(&x->timer))
-		BUG();
-	if (del_timer(&x->rtimer))
-		BUG();
+	del_timer_sync(&x->timer);
+	del_timer_sync(&x->rtimer);
 	kfree(x->aalg);
 	kfree(x->ealg);
 	kfree(x->calg);
 	kfree(x->encap);
+	kfree(x->coaddr);
 	if (x->mode)
 		xfrm_put_mode(x->mode);
 	if (x->type) {
@@ -90,22 +205,17 @@
 static void xfrm_state_gc_task(void *data)
 {
 	struct xfrm_state *x;
-	struct list_head *entry, *tmp;
-	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
-
-	if (xfrm_state_gc_flush_bundles) {
-		xfrm_state_gc_flush_bundles = 0;
-		xfrm_flush_bundles();
-	}
+	struct hlist_node *entry, *tmp;
+	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_splice_init(&xfrm_state_gc_list, &gc_list);
+	gc_list.first = xfrm_state_gc_list.first;
+	INIT_HLIST_HEAD(&xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	list_for_each_safe(entry, tmp, &gc_list) {
-		x = list_entry(entry, struct xfrm_state, bydst);
+	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
 		xfrm_state_gc_destroy(x);
-	}
+
 	wake_up(&km_waitq);
 }
 
@@ -168,9 +278,9 @@
 	if (warn)
 		km_state_expired(x, 0, 0);
 resched:
-	if (next != LONG_MAX &&
-	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
-		xfrm_state_hold(x);
+	if (next != LONG_MAX)
+		mod_timer(&x->timer, jiffies + make_jiffies(next));
+
 	goto out;
 
 expired:
@@ -185,7 +295,6 @@
 
 out:
 	spin_unlock(&x->lock);
-	xfrm_state_put(x);
 }
 
 static void xfrm_replay_timer_handler(unsigned long data);
@@ -199,8 +308,9 @@
 	if (x) {
 		atomic_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
-		INIT_LIST_HEAD(&x->bydst);
-		INIT_LIST_HEAD(&x->byspi);
+		INIT_HLIST_NODE(&x->bydst);
+		INIT_HLIST_NODE(&x->bysrc);
+		INIT_HLIST_NODE(&x->byspi);
 		init_timer(&x->timer);
 		x->timer.function = xfrm_timer_handler;
 		x->timer.data	  = (unsigned long)x;
@@ -225,7 +335,7 @@
 	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	list_add(&x->bydst, &xfrm_state_gc_list);
+	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 	schedule_work(&xfrm_state_gc_work);
 }
@@ -238,27 +348,12 @@
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&xfrm_state_lock);
-		list_del(&x->bydst);
-		__xfrm_state_put(x);
-		if (x->id.spi) {
-			list_del(&x->byspi);
-			__xfrm_state_put(x);
-		}
+		hlist_del(&x->bydst);
+		hlist_del(&x->bysrc);
+		if (x->id.spi)
+			hlist_del(&x->byspi);
+		xfrm_state_num--;
 		spin_unlock(&xfrm_state_lock);
-		if (del_timer(&x->timer))
-			__xfrm_state_put(x);
-		if (del_timer(&x->rtimer))
-			__xfrm_state_put(x);
-
-		/* The number two in this test is the reference
-		 * mentioned in the comment below plus the reference
-		 * our caller holds.  A larger value means that
-		 * there are DSTs attached to this xfrm_state.
-		 */
-		if (atomic_read(&x->refcnt) > 2) {
-			xfrm_state_gc_flush_bundles = 1;
-			schedule_work(&xfrm_state_gc_work);
-		}
 
 		/* All xfrm_state objects are created by xfrm_state_alloc.
 		 * The xfrm_state_alloc call gives a reference, and that
@@ -287,14 +382,15 @@
 void xfrm_state_flush(u8 proto)
 {
 	int i;
-	struct xfrm_state *x;
 
 	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
+	for (i = 0; i <= xfrm_state_hmask; i++) {
+		struct hlist_node *entry;
+		struct xfrm_state *x;
 restart:
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
-			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
+			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
 				spin_unlock_bh(&xfrm_state_lock);
 
@@ -325,29 +421,103 @@
 	return 0;
 }
 
+static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
+{
+	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
+	struct xfrm_state *x;
+	struct hlist_node *entry;
+
+	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
+		if (x->props.family != family ||
+		    x->id.spi       != spi ||
+		    x->id.proto     != proto)
+			continue;
+
+		switch (family) {
+		case AF_INET:
+			if (x->id.daddr.a4 != daddr->a4)
+				continue;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_equal((struct in6_addr *)daddr,
+					     (struct in6_addr *)
+					     x->id.daddr.a6))
+				continue;
+			break;
+		};
+
+		xfrm_state_hold(x);
+		return x;
+	}
+
+	return NULL;
+}
+
+static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
+{
+	unsigned int h = xfrm_src_hash(saddr, family);
+	struct xfrm_state *x;
+	struct hlist_node *entry;
+
+	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+		if (x->props.family != family ||
+		    x->id.proto     != proto)
+			continue;
+
+		switch (family) {
+		case AF_INET:
+			if (x->id.daddr.a4 != daddr->a4 ||
+			    x->props.saddr.a4 != saddr->a4)
+				continue;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_equal((struct in6_addr *)daddr,
+					     (struct in6_addr *)
+					     x->id.daddr.a6) ||
+			    !ipv6_addr_equal((struct in6_addr *)saddr,
+					     (struct in6_addr *)
+					     x->props.saddr.a6))
+				continue;
+			break;
+		};
+
+		xfrm_state_hold(x);
+		return x;
+	}
+
+	return NULL;
+}
+
+static inline struct xfrm_state *
+__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
+{
+	if (use_spi)
+		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
+					   x->id.proto, family);
+	else
+		return __xfrm_state_lookup_byaddr(&x->id.daddr,
+						  &x->props.saddr,
+						  x->id.proto, family);
+}
+
 struct xfrm_state *
 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
 		struct flowi *fl, struct xfrm_tmpl *tmpl,
 		struct xfrm_policy *pol, int *err,
 		unsigned short family)
 {
-	unsigned h = xfrm_dst_hash(daddr, family);
+	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
+	struct hlist_node *entry;
 	struct xfrm_state *x, *x0;
 	int acquire_in_progress = 0;
 	int error = 0;
 	struct xfrm_state *best = NULL;
-	struct xfrm_state_afinfo *afinfo;
 	
-	afinfo = xfrm_state_get_afinfo(family);
-	if (afinfo == NULL) {
-		*err = -EAFNOSUPPORT;
-		return NULL;
-	}
-
 	spin_lock_bh(&xfrm_state_lock);
-	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
@@ -367,7 +537,7 @@
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
 				if (!xfrm_selector_match(&x->sel, fl, family) ||
-				    !xfrm_sec_ctx_match(pol->security, x->security))
+				    !security_xfrm_state_pol_flow_match(x, pol, fl))
 					continue;
 				if (!best ||
 				    best->km.dying > x->km.dying ||
@@ -379,7 +549,7 @@
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
  				if (xfrm_selector_match(&x->sel, fl, family) &&
-				    xfrm_sec_ctx_match(pol->security, x->security))
+				    security_xfrm_state_pol_flow_match(x, pol, fl))
 					error = -ESRCH;
 			}
 		}
@@ -388,8 +558,8 @@
 	x = best;
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
-		    (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
-		                               tmpl->id.proto)) != NULL) {
+		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
+					      tmpl->id.proto, family)) != NULL) {
 			xfrm_state_put(x0);
 			error = -EEXIST;
 			goto out;
@@ -403,17 +573,24 @@
 		 * to current session. */
 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
 
+		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
+		if (error) {
+			x->km.state = XFRM_STATE_DEAD;
+			xfrm_state_put(x);
+			x = NULL;
+			goto out;
+		}
+
 		if (km_query(x, tmpl, pol) == 0) {
 			x->km.state = XFRM_STATE_ACQ;
-			list_add_tail(&x->bydst, xfrm_state_bydst+h);
-			xfrm_state_hold(x);
+			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+			h = xfrm_src_hash(saddr, family);
+			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
-				list_add(&x->byspi, xfrm_state_byspi+h);
-				xfrm_state_hold(x);
+				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
 			}
 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
-			xfrm_state_hold(x);
 			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
 			add_timer(&x->timer);
 		} else {
@@ -429,59 +606,167 @@
 	else
 		*err = acquire_in_progress ? -EAGAIN : error;
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 	return x;
 }
 
 static void __xfrm_state_insert(struct xfrm_state *x)
 {
-	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
+	unsigned int h;
 
-	list_add(&x->bydst, xfrm_state_bydst+h);
-	xfrm_state_hold(x);
+	x->genid = ++xfrm_state_genid;
 
-	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
+	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
+			  x->props.reqid, x->props.family);
+	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 
-	list_add(&x->byspi, xfrm_state_byspi+h);
-	xfrm_state_hold(x);
+	h = xfrm_src_hash(&x->props.saddr, x->props.family);
+	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 
-	if (!mod_timer(&x->timer, jiffies + HZ))
-		xfrm_state_hold(x);
+	if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
+		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
+				  x->props.family);
 
-	if (x->replay_maxage &&
-	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
-		xfrm_state_hold(x);
+		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+	}
+
+	mod_timer(&x->timer, jiffies + HZ);
+	if (x->replay_maxage)
+		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 
 	wake_up(&km_waitq);
+
+	xfrm_state_num++;
+
+	if (x->bydst.next != NULL &&
+	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
+	    xfrm_state_num > xfrm_state_hmask)
+		schedule_work(&xfrm_hash_work);
+}
+
+/* xfrm_state_lock is held */
+static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
+{
+	unsigned short family = xnew->props.family;
+	u32 reqid = xnew->props.reqid;
+	struct xfrm_state *x;
+	struct hlist_node *entry;
+	unsigned int h;
+
+	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+		if (x->props.family	== family &&
+		    x->props.reqid	== reqid &&
+		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
+		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
+			x->genid = xfrm_state_genid;
+	}
 }
 
 void xfrm_state_insert(struct xfrm_state *x)
 {
 	spin_lock_bh(&xfrm_state_lock);
+	__xfrm_state_bump_genids(x);
 	__xfrm_state_insert(x);
 	spin_unlock_bh(&xfrm_state_lock);
-
-	xfrm_flush_all_bundles();
 }
 EXPORT_SYMBOL(xfrm_state_insert);
 
+/* xfrm_state_lock is held */
+static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
+{
+	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
+	struct hlist_node *entry;
+	struct xfrm_state *x;
+
+	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+		if (x->props.reqid  != reqid ||
+		    x->props.mode   != mode ||
+		    x->props.family != family ||
+		    x->km.state     != XFRM_STATE_ACQ ||
+		    x->id.spi       != 0)
+			continue;
+
+		switch (family) {
+		case AF_INET:
+			if (x->id.daddr.a4    != daddr->a4 ||
+			    x->props.saddr.a4 != saddr->a4)
+				continue;
+			break;
+		case AF_INET6:
+			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
+					     (struct in6_addr *)daddr) ||
+			    !ipv6_addr_equal((struct in6_addr *)
+					     x->props.saddr.a6,
+					     (struct in6_addr *)saddr))
+				continue;
+			break;
+		};
+
+		xfrm_state_hold(x);
+		return x;
+	}
+
+	if (!create)
+		return NULL;
+
+	x = xfrm_state_alloc();
+	if (likely(x)) {
+		switch (family) {
+		case AF_INET:
+			x->sel.daddr.a4 = daddr->a4;
+			x->sel.saddr.a4 = saddr->a4;
+			x->sel.prefixlen_d = 32;
+			x->sel.prefixlen_s = 32;
+			x->props.saddr.a4 = saddr->a4;
+			x->id.daddr.a4 = daddr->a4;
+			break;
+
+		case AF_INET6:
+			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
+				       (struct in6_addr *)daddr);
+			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
+				       (struct in6_addr *)saddr);
+			x->sel.prefixlen_d = 128;
+			x->sel.prefixlen_s = 128;
+			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
+				       (struct in6_addr *)saddr);
+			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
+				       (struct in6_addr *)daddr);
+			break;
+		};
+
+		x->km.state = XFRM_STATE_ACQ;
+		x->id.proto = proto;
+		x->props.family = family;
+		x->props.mode = mode;
+		x->props.reqid = reqid;
+		x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
+		xfrm_state_hold(x);
+		x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
+		add_timer(&x->timer);
+		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+		h = xfrm_src_hash(saddr, family);
+		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+		wake_up(&km_waitq);
+	}
+
+	return x;
+}
+
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
 
 int xfrm_state_add(struct xfrm_state *x)
 {
-	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_state *x1;
 	int family;
 	int err;
+	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	family = x->props.family;
-	afinfo = xfrm_state_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
 
 	spin_lock_bh(&xfrm_state_lock);
 
-	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	x1 = __xfrm_state_locate(x, use_spi, family);
 	if (x1) {
 		xfrm_state_put(x1);
 		x1 = NULL;
@@ -489,7 +774,7 @@
 		goto out;
 	}
 
-	if (x->km.seq) {
+	if (use_spi && x->km.seq) {
 		x1 = __xfrm_find_acq_byseq(x->km.seq);
 		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
 			xfrm_state_put(x1);
@@ -497,20 +782,17 @@
 		}
 	}
 
-	if (!x1)
-		x1 = afinfo->find_acq(
-			x->props.mode, x->props.reqid, x->id.proto,
-			&x->id.daddr, &x->props.saddr, 0);
+	if (use_spi && !x1)
+		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
+				     x->id.proto,
+				     &x->id.daddr, &x->props.saddr, 0);
 
+	__xfrm_state_bump_genids(x);
 	__xfrm_state_insert(x);
 	err = 0;
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
-
-	if (!err)
-		xfrm_flush_all_bundles();
 
 	if (x1) {
 		xfrm_state_delete(x1);
@@ -523,16 +805,12 @@
 
 int xfrm_state_update(struct xfrm_state *x)
 {
-	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_state *x1;
 	int err;
-
-	afinfo = xfrm_state_get_afinfo(x->props.family);
-	if (unlikely(afinfo == NULL))
-		return -EAFNOSUPPORT;
+	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
 
 	spin_lock_bh(&xfrm_state_lock);
-	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
+	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
 
 	err = -ESRCH;
 	if (!x1)
@@ -552,7 +830,6 @@
 
 out:
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 
 	if (err)
 		return err;
@@ -568,11 +845,15 @@
 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
 		if (x->encap && x1->encap)
 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+		if (x->coaddr && x1->coaddr) {
+			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
+		}
+		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
+			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
 		x1->km.dying = 0;
 
-		if (!mod_timer(&x1->timer, jiffies + HZ))
-			xfrm_state_hold(x1);
+		mod_timer(&x1->timer, jiffies + HZ);
 		if (x1->curlft.use_time)
 			xfrm_state_check_expire(x1);
 
@@ -597,8 +878,7 @@
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-		if (!mod_timer(&x->timer, jiffies))
-			xfrm_state_hold(x);
+		mod_timer(&x->timer, jiffies);
 		return -EINVAL;
 	}
 
@@ -640,46 +920,93 @@
 		  unsigned short family)
 {
 	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->state_lookup(daddr, spi, proto);
+	x = __xfrm_state_lookup(daddr, spi, proto, family);
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
 
 struct xfrm_state *
+xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
+			 u8 proto, unsigned short family)
+{
+	struct xfrm_state *x;
+
+	spin_lock_bh(&xfrm_state_lock);
+	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
+	spin_unlock_bh(&xfrm_state_lock);
+	return x;
+}
+EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
+
+struct xfrm_state *
 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
 	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
 	      int create, unsigned short family)
 {
 	struct xfrm_state *x;
-	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
-	if (!afinfo)
-		return NULL;
 
 	spin_lock_bh(&xfrm_state_lock);
-	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
+	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
 	spin_unlock_bh(&xfrm_state_lock);
-	xfrm_state_put_afinfo(afinfo);
+
 	return x;
 }
 EXPORT_SYMBOL(xfrm_find_acq);
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+int
+xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
+	       unsigned short family)
+{
+	int err = 0;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	if (afinfo->tmpl_sort)
+		err = afinfo->tmpl_sort(dst, src, n);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_tmpl_sort);
+
+int
+xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
+		unsigned short family)
+{
+	int err = 0;
+	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+	if (!afinfo)
+		return -EAFNOSUPPORT;
+
+	spin_lock_bh(&xfrm_state_lock);
+	if (afinfo->state_sort)
+		err = afinfo->state_sort(dst, src, n);
+	spin_unlock_bh(&xfrm_state_lock);
+	xfrm_state_put_afinfo(afinfo);
+	return err;
+}
+EXPORT_SYMBOL(xfrm_state_sort);
+#endif
+
 /* Silly enough, but I'm lazy to build resolution list */
 
 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
 {
 	int i;
-	struct xfrm_state *x;
 
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
+	for (i = 0; i <= xfrm_state_hmask; i++) {
+		struct hlist_node *entry;
+		struct xfrm_state *x;
+
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+			if (x->km.seq == seq &&
+			    x->km.state == XFRM_STATE_ACQ) {
 				xfrm_state_hold(x);
 				return x;
 			}
@@ -715,7 +1042,7 @@
 void
 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
 {
-	u32 h;
+	unsigned int h;
 	struct xfrm_state *x0;
 
 	if (x->id.spi)
@@ -745,8 +1072,7 @@
 	if (x->id.spi) {
 		spin_lock_bh(&xfrm_state_lock);
 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-		list_add(&x->byspi, xfrm_state_byspi+h);
-		xfrm_state_hold(x);
+		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
 		spin_unlock_bh(&xfrm_state_lock);
 		wake_up(&km_waitq);
 	}
@@ -758,13 +1084,14 @@
 {
 	int i;
 	struct xfrm_state *x;
+	struct hlist_node *entry;
 	int count = 0;
 	int err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
+	for (i = 0; i <= xfrm_state_hmask; i++) {
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+			if (xfrm_id_proto_match(x->id.proto, proto))
 				count++;
 		}
 	}
@@ -773,9 +1100,9 @@
 		goto out;
 	}
 
-	for (i = 0; i < XFRM_DST_HSIZE; i++) {
-		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
-			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
+	for (i = 0; i <= xfrm_state_hmask; i++) {
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+			if (!xfrm_id_proto_match(x->id.proto, proto))
 				continue;
 			err = func(x, --count, data);
 			if (err)
@@ -832,10 +1159,8 @@
 	km_state_notify(x, &c);
 
 	if (x->replay_maxage &&
-	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
-		xfrm_state_hold(x);
+	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
 		x->xflags &= ~XFRM_TIME_DEFER;
-	}
 }
 EXPORT_SYMBOL(xfrm_replay_notify);
 
@@ -853,7 +1178,6 @@
 	}
 
 	spin_unlock(&x->lock);
-	xfrm_state_put(x);
 }
 
 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
@@ -997,6 +1321,25 @@
 }
 EXPORT_SYMBOL(km_policy_expired);
 
+int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
+{
+	int err = -EINVAL;
+	int ret;
+	struct xfrm_mgr *km;
+
+	read_lock(&xfrm_km_lock);
+	list_for_each_entry(km, &xfrm_km_list, list) {
+		if (km->report) {
+			ret = km->report(proto, sel, addr);
+			if (!ret)
+				err = ret;
+		}
+	}
+	read_unlock(&xfrm_km_lock);
+	return err;
+}
+EXPORT_SYMBOL(km_report);
+
 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
 	int err;
@@ -1018,7 +1361,7 @@
 	err = -EINVAL;
 	read_lock(&xfrm_km_lock);
 	list_for_each_entry(km, &xfrm_km_list, list) {
-		pol = km->compile_policy(sk->sk_family, optname, data,
+		pol = km->compile_policy(sk, optname, data,
 					 optlen, &err);
 		if (err >= 0)
 			break;
@@ -1065,11 +1408,8 @@
 	write_lock_bh(&xfrm_state_afinfo_lock);
 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
-	else {
-		afinfo->state_bydst = xfrm_state_bydst;
-		afinfo->state_byspi = xfrm_state_byspi;
+	else
 		xfrm_state_afinfo[afinfo->family] = afinfo;
-	}
 	write_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
 }
@@ -1086,11 +1426,8 @@
 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
 			err = -EINVAL;
-		else {
+		else
 			xfrm_state_afinfo[afinfo->family] = NULL;
-			afinfo->state_byspi = NULL;
-			afinfo->state_bydst = NULL;
-		}
 	}
 	write_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
@@ -1206,12 +1543,17 @@
  
 void __init xfrm_state_init(void)
 {
-	int i;
+	unsigned int sz;
 
-	for (i=0; i<XFRM_DST_HSIZE; i++) {
-		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
-		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
-	}
+	sz = sizeof(struct hlist_head) * 8;
+
+	xfrm_state_bydst = xfrm_hash_alloc(sz);
+	xfrm_state_bysrc = xfrm_hash_alloc(sz);
+	xfrm_state_byspi = xfrm_hash_alloc(sz);
+	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
+		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
+	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
+
 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
 }
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3e6a722..c59a78d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
@@ -27,6 +28,9 @@
 #include <net/xfrm.h>
 #include <net/netlink.h>
 #include <asm/uaccess.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <linux/in6.h>
+#endif
 
 static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 {
@@ -86,6 +90,22 @@
 	return 0;
 }
 
+static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type,
+			   xfrm_address_t **addrp)
+{
+	struct rtattr *rt = xfrma[type - 1];
+
+	if (!rt)
+		return 0;
+
+	if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp))
+		return -EINVAL;
+
+	if (addrp)
+		*addrp = RTA_DATA(rt);
+
+	return 0;
+}
 
 static inline int verify_sec_ctx_len(struct rtattr **xfrma)
 {
@@ -156,6 +176,19 @@
 			goto out;
 		break;
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case IPPROTO_DSTOPTS:
+	case IPPROTO_ROUTING:
+		if (xfrma[XFRMA_ALG_COMP-1]	||
+		    xfrma[XFRMA_ALG_AUTH-1]	||
+		    xfrma[XFRMA_ALG_CRYPT-1]	||
+		    xfrma[XFRMA_ENCAP-1]	||
+		    xfrma[XFRMA_SEC_CTX-1]	||
+		    !xfrma[XFRMA_COADDR-1])
+			goto out;
+		break;
+#endif
+
 	default:
 		goto out;
 	};
@@ -170,11 +203,14 @@
 		goto out;
 	if ((err = verify_sec_ctx_len(xfrma)))
 		goto out;
+	if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL)))
+		goto out;
 
 	err = -EINVAL;
 	switch (p->mode) {
-	case 0:
-	case 1:
+	case XFRM_MODE_TRANSPORT:
+	case XFRM_MODE_TUNNEL:
+	case XFRM_MODE_ROUTEOPTIMIZATION:
 		break;
 
 	default:
@@ -212,6 +248,7 @@
 		return -ENOMEM;
 
 	memcpy(p, ualg, len);
+	strcpy(p->alg_name, algo->name);
 	*algpp = p;
 	return 0;
 }
@@ -258,6 +295,24 @@
 	return security_xfrm_state_alloc(x, uctx);
 }
 
+static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg)
+{
+	struct rtattr *rta = u_arg;
+	xfrm_address_t *p, *uaddrp;
+
+	if (!rta)
+		return 0;
+
+	uaddrp = RTA_DATA(rta);
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	memcpy(p, uaddrp, sizeof(*p));
+	*addrpp = p;
+	return 0;
+}
+
 static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
 {
 	memcpy(&x->id, &p->id, sizeof(x->id));
@@ -347,7 +402,8 @@
 		goto error;
 	if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
 		goto error;
-
+	if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1])))
+		goto error;
 	err = xfrm_init_state(x);
 	if (err)
 		goto error;
@@ -416,16 +472,48 @@
 	return err;
 }
 
+static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
+						 struct rtattr **xfrma,
+						 int *errp)
+{
+	struct xfrm_state *x = NULL;
+	int err;
+
+	if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
+		err = -ESRCH;
+		x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	} else {
+		xfrm_address_t *saddr = NULL;
+
+		err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr);
+		if (err)
+			goto out;
+
+		if (!saddr) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto,
+					     p->family);
+	}
+
+ out:
+	if (!x && errp)
+		*errp = err;
+	return x;
+}
+
 static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 {
 	struct xfrm_state *x;
-	int err;
+	int err = -ESRCH;
 	struct km_event c;
 	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
 
-	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
+	x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
 	if (x == NULL)
-		return -ESRCH;
+		return err;
 
 	if ((err = security_xfrm_state_delete(x)) != 0)
 		goto out;
@@ -519,6 +607,13 @@
 		uctx->ctx_len = x->security->ctx_len;
 		memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
 	}
+
+	if (x->coaddr)
+		RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
+
+	if (x->lastused)
+		RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
+
 	nlh->nlmsg_len = skb->tail - b;
 out:
 	sp->this_idx++;
@@ -540,7 +635,7 @@
 	info.nlmsg_flags = NLM_F_MULTI;
 	info.this_idx = 0;
 	info.start_idx = cb->args[0];
-	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
+	(void) xfrm_state_walk(0, dump_one_state, &info);
 	cb->args[0] = info.this_idx;
 
 	return skb->len;
@@ -576,10 +671,9 @@
 	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
 	struct xfrm_state *x;
 	struct sk_buff *resp_skb;
-	int err;
+	int err = -ESRCH;
 
-	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
-	err = -ESRCH;
+	x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
 	if (x == NULL)
 		goto out_noput;
 
@@ -692,6 +786,22 @@
 	return 0;
 }
 
+static int verify_policy_type(__u8 type)
+{
+	switch (type) {
+	case XFRM_POLICY_TYPE_MAIN:
+#ifdef CONFIG_XFRM_SUB_POLICY
+	case XFRM_POLICY_TYPE_SUB:
+#endif
+		break;
+
+	default:
+		return -EINVAL;
+	};
+
+	return 0;
+}
+
 static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 {
 	switch (p->share) {
@@ -785,6 +895,29 @@
 	return 0;
 }
 
+static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1];
+	struct xfrm_userpolicy_type *upt;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
+	int err;
+
+	if (rt) {
+		if (rt->rta_len < sizeof(*upt))
+			return -EINVAL;
+
+		upt = RTA_DATA(rt);
+		type = upt->type;
+	}
+
+	err = verify_policy_type(type);
+	if (err)
+		return err;
+
+	*tp = type;
+	return 0;
+}
+
 static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
 {
 	xp->priority = p->priority;
@@ -823,16 +956,20 @@
 
 	copy_from_user_policy(xp, p);
 
+	err = copy_from_user_policy_type(&xp->type, xfrma);
+	if (err)
+		goto error;
+
 	if (!(err = copy_from_user_tmpl(xp, xfrma)))
 		err = copy_from_user_sec_ctx(xp, xfrma);
-
-	if (err) {
-		*errp = err;
-		kfree(xp);
-		xp = NULL;
-	}
+	if (err)
+		goto error;
 
 	return xp;
+ error:
+	*errp = err;
+	kfree(xp);
+	return NULL;
 }
 
 static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
@@ -909,27 +1046,63 @@
 	return -1;
 }
 
-static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
 {
-	if (xp->security) {
-		int ctx_size = sizeof(struct xfrm_sec_ctx) +
-				xp->security->ctx_len;
-		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
-		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+	int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len;
+	struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+	struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
 
-		uctx->exttype = XFRMA_SEC_CTX;
-		uctx->len = ctx_size;
-		uctx->ctx_doi = xp->security->ctx_doi;
-		uctx->ctx_alg = xp->security->ctx_alg;
-		uctx->ctx_len = xp->security->ctx_len;
-		memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
-	}
-	return 0;
+	uctx->exttype = XFRMA_SEC_CTX;
+	uctx->len = ctx_size;
+	uctx->ctx_doi = s->ctx_doi;
+	uctx->ctx_alg = s->ctx_alg;
+	uctx->ctx_len = s->ctx_len;
+	memcpy(uctx + 1, s->ctx_str, s->ctx_len);
+ 	return 0;
 
  rtattr_failure:
 	return -1;
 }
 
+static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb)
+{
+	if (x->security) {
+		return copy_sec_ctx(x->security, skb);
+	}
+	return 0;
+}
+
+static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	if (xp->security) {
+		return copy_sec_ctx(xp->security, skb);
+	}
+	return 0;
+}
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	struct xfrm_userpolicy_type upt;
+
+	memset(&upt, 0, sizeof(upt));
+	upt.type = xp->type;
+
+	RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+#else
+static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	return 0;
+}
+#endif
+
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
 {
 	struct xfrm_dump_info *sp = ptr;
@@ -953,6 +1126,8 @@
 		goto nlmsg_failure;
 	if (copy_to_user_sec_ctx(xp, skb))
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 out:
@@ -974,7 +1149,10 @@
 	info.nlmsg_flags = NLM_F_MULTI;
 	info.this_idx = 0;
 	info.start_idx = cb->args[0];
-	(void) xfrm_policy_walk(dump_one_policy, &info);
+	(void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info);
+#ifdef CONFIG_XFRM_SUB_POLICY
+	(void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info);
+#endif
 	cb->args[0] = info.this_idx;
 
 	return skb->len;
@@ -1010,6 +1188,7 @@
 {
 	struct xfrm_policy *xp;
 	struct xfrm_userpolicy_id *p;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err;
 	struct km_event c;
 	int delete;
@@ -1017,12 +1196,16 @@
 	p = NLMSG_DATA(nlh);
 	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
 
+	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+	if (err)
+		return err;
+
 	err = verify_policy_dir(p->dir);
 	if (err)
 		return err;
 
 	if (p->index)
-		xp = xfrm_policy_byid(p->dir, p->index, delete);
+		xp = xfrm_policy_byid(type, p->dir, p->index, delete);
 	else {
 		struct rtattr **rtattrs = (struct rtattr **)xfrma;
 		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1039,7 +1222,7 @@
 			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
+		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete);
 		security_xfrm_policy_free(&tmp);
 	}
 	if (xp == NULL)
@@ -1222,9 +1405,16 @@
 
 static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 {
-struct km_event c;
+	struct km_event c;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
+	int err;
 
-	xfrm_policy_flush();
+	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+	if (err)
+		return err;
+
+	xfrm_policy_flush(type);
+	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
 	c.pid = nlh->nlmsg_pid;
@@ -1237,10 +1427,15 @@
 	struct xfrm_policy *xp;
 	struct xfrm_user_polexpire *up = NLMSG_DATA(nlh);
 	struct xfrm_userpolicy_info *p = &up->pol;
+	__u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err = -ENOENT;
 
+	err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+	if (err)
+		return err;
+
 	if (p->index)
-		xp = xfrm_policy_byid(p->dir, p->index, 0);
+		xp = xfrm_policy_byid(type, p->dir, p->index, 0);
 	else {
 		struct rtattr **rtattrs = (struct rtattr **)xfrma;
 		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
@@ -1257,7 +1452,7 @@
 			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0);
+		xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0);
 		security_xfrm_policy_free(&tmp);
 	}
 
@@ -1384,6 +1579,7 @@
 	[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0),
 	[XFRM_MSG_NEWAE       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
+	[XFRM_MSG_REPORT      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
 };
 
 #undef XMSGSIZE
@@ -1708,7 +1904,9 @@
 
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
-	if (copy_to_user_sec_ctx(xp, skb))
+	if (copy_to_user_state_sec_ctx(x, skb))
+		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
 		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -1742,7 +1940,7 @@
 /* User gives us xfrm_user_policy_info followed by an array of 0
  * or more templates.
  */
-static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
+static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 					       u8 *data, int len, int *dir)
 {
 	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
@@ -1750,7 +1948,7 @@
 	struct xfrm_policy *xp;
 	int nr;
 
-	switch (family) {
+	switch (sk->sk_family) {
 	case AF_INET:
 		if (opt != IP_XFRM_POLICY) {
 			*dir = -EOPNOTSUPP;
@@ -1790,8 +1988,18 @@
 	}
 
 	copy_from_user_policy(xp, p);
+	xp->type = XFRM_POLICY_TYPE_MAIN;
 	copy_templates(xp, ut, nr);
 
+	if (!xp->security) {
+		int err = security_xfrm_sock_policy_alloc(xp, sk);
+		if (err) {
+			kfree(xp);
+			*dir = err;
+			return NULL;
+		}
+	}
+
 	*dir = p->dir;
 
 	return xp;
@@ -1814,6 +2022,8 @@
 		goto nlmsg_failure;
 	if (copy_to_user_sec_ctx(xp, skb))
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 	upe->hard = !!hard;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -1885,6 +2095,8 @@
 	copy_to_user_policy(xp, p, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_policy_type(xp, skb) < 0)
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 
@@ -1902,6 +2114,9 @@
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
 	unsigned char *b;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_userpolicy_type upt;
+#endif
 	int len = NLMSG_LENGTH(0);
 
 	skb = alloc_skb(len, GFP_ATOMIC);
@@ -1911,6 +2126,13 @@
 
 
 	nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
+	nlh->nlmsg_flags = 0;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	memset(&upt, 0, sizeof(upt));
+	upt.type = c->data.type;
+	RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
+#endif
 
 	nlh->nlmsg_len = skb->tail - b;
 
@@ -1918,6 +2140,9 @@
 	return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC);
 
 nlmsg_failure:
+#ifdef CONFIG_XFRM_SUB_POLICY
+rtattr_failure:
+#endif
 	kfree_skb(skb);
 	return -1;
 }
@@ -1942,19 +2167,64 @@
 
 }
 
+static int build_report(struct sk_buff *skb, u8 proto,
+			struct xfrm_selector *sel, xfrm_address_t *addr)
+{
+	struct xfrm_user_report *ur;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
+	ur = NLMSG_DATA(nlh);
+	nlh->nlmsg_flags = 0;
+
+	ur->proto = proto;
+	memcpy(&ur->sel, sel, sizeof(ur->sel));
+
+	if (addr)
+		RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int xfrm_send_report(u8 proto, struct xfrm_selector *sel,
+			    xfrm_address_t *addr)
+{
+	struct sk_buff *skb;
+	size_t len;
+
+	len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report)));
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	if (build_report(skb, proto, sel, addr) < 0)
+		BUG();
+
+	NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT;
+	return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC);
+}
+
 static struct xfrm_mgr netlink_mgr = {
 	.id		= "netlink",
 	.notify		= xfrm_send_state_notify,
 	.acquire	= xfrm_send_acquire,
 	.compile_policy	= xfrm_compile_policy,
 	.notify_policy	= xfrm_send_policy_notify,
+	.report		= xfrm_send_report,
 };
 
 static int __init xfrm_user_init(void)
 {
 	struct sock *nlsk;
 
-	printk(KERN_INFO "Initializing IPsec netlink socket\n");
+	printk(KERN_INFO "Initializing XFRM netlink socket\n");
 
 	nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
 	                             xfrm_netlink_rcv, THIS_MODULE);
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index b349246..f7844f6 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -62,6 +62,8 @@
 	} elsif ($arch eq 'ppc64') {
 		#XXX
 		$re = qr/.*stdu.*r1,-($x{1,8})\(r1\)/o;
+	} elsif ($arch eq 'powerpc') {
+		$re = qr/.*st[dw]u.*r1,-($x{1,8})\(r1\)/o;
 	} elsif ($arch =~ /^s390x?$/) {
 		#   11160:       a7 fb ff 60             aghi   %r15,-160
 		$re = qr/.*ag?hi.*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})/o;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index e2de650..de76da8 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -265,6 +265,14 @@
 	return 1;
 }
 
+/* looks like: "ap:tN" */
+static int do_ap_entry(const char *filename,
+		       struct ap_device_id *id, char *alias)
+{
+	sprintf(alias, "ap:t%02X", id->dev_type);
+	return 1;
+}
+
 /* Looks like: "serio:tyNprNidNexN" */
 static int do_serio_entry(const char *filename,
 			  struct serio_device_id *id, char *alias)
@@ -503,6 +511,10 @@
 		do_table(symval, sym->st_size,
 			 sizeof(struct ccw_device_id), "ccw",
 			 do_ccw_entry, mod);
+	else if (sym_is(symname, "__mod_ap_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct ap_device_id), "ap",
+			 do_ap_entry, mod);
 	else if (sym_is(symname, "__mod_serio_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct serio_device_id), "serio",
diff --git a/security/dummy.c b/security/dummy.c
index 58c6d39..aeee705 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -709,10 +709,10 @@
 	return 0;
 }
 
-static void dummy_socket_post_create (struct socket *sock, int family, int type,
-				      int protocol, int kern)
+static int dummy_socket_post_create (struct socket *sock, int family, int type,
+				     int protocol, int kern)
 {
-	return;
+	return 0;
 }
 
 static int dummy_socket_bind (struct socket *sock, struct sockaddr *address,
@@ -805,14 +805,38 @@
 {
 }
 
-static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir)
+static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk)
+{
+}
+
+static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid)
+{
+}
+
+static inline void dummy_sock_graft(struct sock* sk, struct socket *parent)
+{
+}
+
+static inline int dummy_inet_conn_request(struct sock *sk,
+			struct sk_buff *skb, struct request_sock *req)
 {
 	return 0;
 }
+
+static inline void dummy_inet_csk_clone(struct sock *newsk,
+			const struct request_sock *req)
+{
+}
+
+static inline void dummy_req_classify_flow(const struct request_sock *req,
+			struct flowi *fl)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
+static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk)
 {
 	return 0;
 }
@@ -831,7 +855,8 @@
 	return 0;
 }
 
-static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+static int dummy_xfrm_state_alloc_security(struct xfrm_state *x,
+	struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid)
 {
 	return 0;
 }
@@ -849,6 +874,23 @@
 {
 	return 0;
 }
+
+static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x,
+				struct xfrm_policy *xp, struct flowi *fl)
+{
+	return 1;
+}
+
+static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	return 1;
+}
+
+static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SECURITY_NETWORK_XFRM */
 static int dummy_register_security (const char *name, struct security_operations *ops)
 {
@@ -1060,7 +1102,12 @@
 	set_to_dummy_if_null(ops, socket_getpeersec_dgram);
 	set_to_dummy_if_null(ops, sk_alloc_security);
 	set_to_dummy_if_null(ops, sk_free_security);
-	set_to_dummy_if_null(ops, sk_getsid);
+	set_to_dummy_if_null(ops, sk_clone_security);
+	set_to_dummy_if_null(ops, sk_getsecid);
+	set_to_dummy_if_null(ops, sock_graft);
+	set_to_dummy_if_null(ops, inet_conn_request);
+	set_to_dummy_if_null(ops, inet_csk_clone);
+	set_to_dummy_if_null(ops, req_classify_flow);
  #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef  CONFIG_SECURITY_NETWORK_XFRM
 	set_to_dummy_if_null(ops, xfrm_policy_alloc_security);
@@ -1071,6 +1118,9 @@
 	set_to_dummy_if_null(ops, xfrm_state_free_security);
 	set_to_dummy_if_null(ops, xfrm_state_delete_security);
 	set_to_dummy_if_null(ops, xfrm_policy_lookup);
+	set_to_dummy_if_null(ops, xfrm_state_pol_flow_match);
+	set_to_dummy_if_null(ops, xfrm_flow_state_match);
+	set_to_dummy_if_null(ops, xfrm_decode_session);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 #ifdef CONFIG_KEYS
 	set_to_dummy_if_null(ops, key_alloc);
diff --git a/security/seclvl.c b/security/seclvl.c
index c26dd7d..8f62919 100644
--- a/security/seclvl.c
+++ b/security/seclvl.c
@@ -16,6 +16,7 @@
  *	(at your option) any later version.
  */
 
+#include <linux/err.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
@@ -197,26 +198,27 @@
 static int
 plaintext_to_sha1(unsigned char *hash, const char *plaintext, unsigned int len)
 {
-	struct crypto_tfm *tfm;
+	struct hash_desc desc;
 	struct scatterlist sg;
+	int err;
+
 	if (len > PAGE_SIZE) {
 		seclvl_printk(0, KERN_ERR, "Plaintext password too large (%d "
 			      "characters).  Largest possible is %lu "
 			      "bytes.\n", len, PAGE_SIZE);
 		return -EINVAL;
 	}
-	tfm = crypto_alloc_tfm("sha1", CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL) {
+	desc.tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm)) {
 		seclvl_printk(0, KERN_ERR,
 			      "Failed to load transform for SHA1\n");
 		return -EINVAL;
 	}
 	sg_init_one(&sg, (u8 *)plaintext, len);
-	crypto_digest_init(tfm);
-	crypto_digest_update(tfm, &sg, 1);
-	crypto_digest_final(tfm, hash);
-	crypto_free_tfm(tfm);
-	return 0;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = crypto_hash_digest(&desc, &sg, len, hash);
+	crypto_free_hash(desc.tfm);
+	return err;
 }
 
 /**
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5d1b8c7..5a66c4c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -12,6 +12,8 @@
  *  Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *  Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
  *                          <dgoeddel@trustedcs.com>
+ *  Copyright (C) 2006 Hewlett-Packard Development Company, L.P.
+ *                     Paul Moore, <paul.moore@hp.com>
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -74,6 +76,7 @@
 #include "objsec.h"
 #include "netif.h"
 #include "xfrm.h"
+#include "selinux_netlabel.h"
 
 #define XATTR_SELINUX_SUFFIX "selinux"
 #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX
@@ -269,17 +272,17 @@
 {
 	struct sk_security_struct *ssec;
 
-	if (family != PF_UNIX)
-		return 0;
-
 	ssec = kzalloc(sizeof(*ssec), priority);
 	if (!ssec)
 		return -ENOMEM;
 
 	ssec->sk = sk;
 	ssec->peer_sid = SECINITSID_UNLABELED;
+	ssec->sid = SECINITSID_UNLABELED;
 	sk->sk_security = ssec;
 
+	selinux_netlbl_sk_security_init(ssec, family);
+
 	return 0;
 }
 
@@ -287,9 +290,6 @@
 {
 	struct sk_security_struct *ssec = sk->sk_security;
 
-	if (sk->sk_family != PF_UNIX)
-		return;
-
 	sk->sk_security = NULL;
 	kfree(ssec);
 }
@@ -2400,6 +2400,7 @@
 
 static int selinux_file_permission(struct file *file, int mask)
 {
+	int rc;
 	struct inode *inode = file->f_dentry->d_inode;
 
 	if (!mask) {
@@ -2411,8 +2412,12 @@
 	if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
 		mask |= MAY_APPEND;
 
-	return file_has_perm(current, file,
-			     file_mask_to_av(inode->i_mode, mask));
+	rc = file_has_perm(current, file,
+			   file_mask_to_av(inode->i_mode, mask));
+	if (rc)
+		return rc;
+
+	return selinux_netlbl_inode_permission(inode, mask);
 }
 
 static int selinux_file_alloc_security(struct file *file)
@@ -3063,11 +3068,13 @@
 	return err;
 }
 
-static void selinux_socket_post_create(struct socket *sock, int family,
-				       int type, int protocol, int kern)
+static int selinux_socket_post_create(struct socket *sock, int family,
+				      int type, int protocol, int kern)
 {
+	int err = 0;
 	struct inode_security_struct *isec;
 	struct task_security_struct *tsec;
+	struct sk_security_struct *sksec;
 	u32 newsid;
 
 	isec = SOCK_INODE(sock)->i_security;
@@ -3078,7 +3085,15 @@
 	isec->sid = kern ? SECINITSID_KERNEL : newsid;
 	isec->initialized = 1;
 
-	return;
+	if (sock->sk) {
+		sksec = sock->sk->sk_security;
+		sksec->sid = isec->sid;
+		err = selinux_netlbl_socket_post_create(sock,
+							family,
+							isec->sid);
+	}
+
+	return err;
 }
 
 /* Range of port numbers used to automatically bind.
@@ -3259,7 +3274,13 @@
 static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg,
  				  int size)
 {
-	return socket_has_perm(current, sock, SOCKET__WRITE);
+	int rc;
+
+	rc = socket_has_perm(current, sock, SOCKET__WRITE);
+	if (rc)
+		return rc;
+
+	return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE);
 }
 
 static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg,
@@ -3327,8 +3348,9 @@
 	/* server child socket */
 	ssec = newsk->sk_security;
 	ssec->peer_sid = isec->sid;
-	
-	return 0;
+	err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid);
+
+	return err;
 }
 
 static int selinux_socket_unix_may_send(struct socket *sock,
@@ -3354,11 +3376,29 @@
 }
 
 static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
-		struct avc_audit_data *ad, u32 sock_sid, u16 sock_class,
-		u16 family, char *addrp, int len)
+		struct avc_audit_data *ad, u16 family, char *addrp, int len)
 {
 	int err = 0;
 	u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0;
+	struct socket *sock;
+	u16 sock_class = 0;
+	u32 sock_sid = 0;
+
+ 	read_lock_bh(&sk->sk_callback_lock);
+ 	sock = sk->sk_socket;
+ 	if (sock) {
+ 		struct inode *inode;
+ 		inode = SOCK_INODE(sock);
+ 		if (inode) {
+ 			struct inode_security_struct *isec;
+ 			isec = inode->i_security;
+ 			sock_sid = isec->sid;
+ 			sock_class = isec->sclass;
+ 		}
+ 	}
+ 	read_unlock_bh(&sk->sk_callback_lock);
+ 	if (!sock_sid)
+  		goto out;
 
 	if (!skb->dev)
 		goto out;
@@ -3418,12 +3458,10 @@
 static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	u16 family;
-	u16 sock_class = 0;
 	char *addrp;
 	int len, err = 0;
-	u32 sock_sid = 0;
-	struct socket *sock;
 	struct avc_audit_data ad;
+	struct sk_security_struct *sksec = sk->sk_security;
 
 	family = sk->sk_family;
 	if (family != PF_INET && family != PF_INET6)
@@ -3433,22 +3471,6 @@
 	if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP))
 		family = PF_INET;
 
- 	read_lock_bh(&sk->sk_callback_lock);
- 	sock = sk->sk_socket;
- 	if (sock) {
- 		struct inode *inode;
- 		inode = SOCK_INODE(sock);
- 		if (inode) {
- 			struct inode_security_struct *isec;
- 			isec = inode->i_security;
- 			sock_sid = isec->sid;
- 			sock_class = isec->sclass;
- 		}
- 	}
- 	read_unlock_bh(&sk->sk_callback_lock);
- 	if (!sock_sid)
-  		goto out;
-
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]";
 	ad.u.net.family = family;
@@ -3458,16 +3480,19 @@
 		goto out;
 
 	if (selinux_compat_net)
-		err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid,
-						  sock_class, family,
+		err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family,
 						  addrp, len);
 	else
-		err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET,
+		err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__RECV, &ad);
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_sock_rcv_skb(sock_sid, skb);
+	err = selinux_netlbl_sock_rcv_skb(sksec, skb, &ad);
+	if (err)
+		goto out;
+
+	err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
 out:	
 	return err;
 }
@@ -3490,8 +3515,9 @@
 		peer_sid = ssec->peer_sid;
 	}
 	else if (isec->sclass == SECCLASS_TCP_SOCKET) {
-		peer_sid = selinux_socket_getpeer_stream(sock->sk);
-
+		peer_sid = selinux_netlbl_socket_getpeersec_stream(sock);
+		if (peer_sid == SECSID_NULL)
+			peer_sid = selinux_socket_getpeer_stream(sock->sk);
 		if (peer_sid == SECSID_NULL) {
 			err = -ENOPROTOOPT;
 			goto out;
@@ -3531,8 +3557,11 @@
 
 	if (sock && (sock->sk->sk_family == PF_UNIX))
 		selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid);
-	else if (skb)
-		peer_secid = selinux_socket_getpeer_dgram(skb);
+	else if (skb) {
+		peer_secid = selinux_netlbl_socket_getpeersec_dgram(skb);
+		if (peer_secid == SECSID_NULL)
+			peer_secid = selinux_socket_getpeer_dgram(skb);
+	}
 
 	if (peer_secid == SECSID_NULL)
 		err = -EINVAL;
@@ -3551,22 +3580,86 @@
 	sk_free_security(sk);
 }
 
-static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir)
+static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
 {
-	struct inode_security_struct *isec;
-	u32 sock_sid = SECINITSID_ANY_SOCKET;
+	struct sk_security_struct *ssec = sk->sk_security;
+	struct sk_security_struct *newssec = newsk->sk_security;
 
+	newssec->sid = ssec->sid;
+	newssec->peer_sid = ssec->peer_sid;
+
+	selinux_netlbl_sk_clone_security(ssec, newssec);
+}
+
+static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
+{
 	if (!sk)
-		return selinux_no_sk_sid(fl);
+		*secid = SECINITSID_ANY_SOCKET;
+	else {
+		struct sk_security_struct *sksec = sk->sk_security;
 
-	read_lock_bh(&sk->sk_callback_lock);
-	isec = get_sock_isec(sk);
+		*secid = sksec->sid;
+	}
+}
 
-	if (isec)
-		sock_sid = isec->sid;
+static void selinux_sock_graft(struct sock* sk, struct socket *parent)
+{
+	struct inode_security_struct *isec = SOCK_INODE(parent)->i_security;
+	struct sk_security_struct *sksec = sk->sk_security;
 
-	read_unlock_bh(&sk->sk_callback_lock);
-	return sock_sid;
+	isec->sid = sksec->sid;
+
+	selinux_netlbl_sock_graft(sk, parent);
+}
+
+static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+				     struct request_sock *req)
+{
+	struct sk_security_struct *sksec = sk->sk_security;
+	int err;
+	u32 newsid;
+	u32 peersid;
+
+	newsid = selinux_netlbl_inet_conn_request(skb, sksec->sid);
+	if (newsid != SECSID_NULL) {
+		req->secid = newsid;
+		return 0;
+	}
+
+	err = selinux_xfrm_decode_session(skb, &peersid, 0);
+	BUG_ON(err);
+
+	if (peersid == SECSID_NULL) {
+		req->secid = sksec->sid;
+		return 0;
+	}
+
+	err = security_sid_mls_copy(sksec->sid, peersid, &newsid);
+	if (err)
+		return err;
+
+	req->secid = newsid;
+	return 0;
+}
+
+static void selinux_inet_csk_clone(struct sock *newsk,
+				   const struct request_sock *req)
+{
+	struct sk_security_struct *newsksec = newsk->sk_security;
+
+	newsksec->sid = req->secid;
+	/* NOTE: Ideally, we should also get the isec->sid for the
+	   new socket in sync, but we don't have the isec available yet.
+	   So we will wait until sock_graft to do it, by which
+	   time it will have been created and available. */
+
+	selinux_netlbl_sk_security_init(newsksec, req->rsk_ops->family);
+}
+
+static void selinux_req_classify_flow(const struct request_sock *req,
+				      struct flowi *fl)
+{
+	fl->secid = req->secid;
 }
 
 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
@@ -3608,12 +3701,24 @@
 #ifdef CONFIG_NETFILTER
 
 static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev,
-					    struct inode_security_struct *isec,
 					    struct avc_audit_data *ad,
 					    u16 family, char *addrp, int len)
 {
-	int err;
+	int err = 0;
 	u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0;
+	struct socket *sock;
+	struct inode *inode;
+	struct inode_security_struct *isec;
+
+	sock = sk->sk_socket;
+	if (!sock)
+		goto out;
+
+	inode = SOCK_INODE(sock);
+	if (!inode)
+		goto out;
+
+	isec = inode->i_security;
 	
 	err = sel_netif_sids(dev, &if_sid, NULL);
 	if (err)
@@ -3678,26 +3783,16 @@
 	char *addrp;
 	int len, err = 0;
 	struct sock *sk;
-	struct socket *sock;
-	struct inode *inode;
 	struct sk_buff *skb = *pskb;
-	struct inode_security_struct *isec;
 	struct avc_audit_data ad;
 	struct net_device *dev = (struct net_device *)out;
+	struct sk_security_struct *sksec;
 
 	sk = skb->sk;
 	if (!sk)
 		goto out;
 
-	sock = sk->sk_socket;
-	if (!sock)
-		goto out;
-
-	inode = SOCK_INODE(sock);
-	if (!inode)
-		goto out;
-
-	isec = inode->i_security;
+	sksec = sk->sk_security;
 
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = dev->name;
@@ -3708,16 +3803,16 @@
 		goto out;
 
 	if (selinux_compat_net)
-		err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad,
+		err = selinux_ip_postroute_last_compat(sk, dev, &ad,
 						       family, addrp, len);
 	else
-		err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET,
+		err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__SEND, &ad);
 
 	if (err)
 		goto out;
 
-	err = selinux_xfrm_postroute_last(isec->sid, skb);
+	err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad);
 out:
 	return err ? NF_DROP : NF_ACCEPT;
 }
@@ -4618,7 +4713,12 @@
 	.socket_getpeersec_dgram =	selinux_socket_getpeersec_dgram,
 	.sk_alloc_security =		selinux_sk_alloc_security,
 	.sk_free_security =		selinux_sk_free_security,
-	.sk_getsid = 			selinux_sk_getsid_security,
+	.sk_clone_security =		selinux_sk_clone_security,
+	.sk_getsecid = 			selinux_sk_getsecid,
+	.sock_graft =			selinux_sock_graft,
+	.inet_conn_request =		selinux_inet_conn_request,
+	.inet_csk_clone =		selinux_inet_csk_clone,
+	.req_classify_flow =		selinux_req_classify_flow,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
@@ -4629,6 +4729,9 @@
 	.xfrm_state_free_security =	selinux_xfrm_state_free,
 	.xfrm_state_delete_security =	selinux_xfrm_state_delete,
 	.xfrm_policy_lookup = 		selinux_xfrm_policy_lookup,
+	.xfrm_state_pol_flow_match =	selinux_xfrm_state_pol_flow_match,
+	.xfrm_flow_state_match =	selinux_xfrm_flow_state_match,
+	.xfrm_decode_session =		selinux_xfrm_decode_session,
 #endif
 
 #ifdef CONFIG_KEYS
diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index 7c9b583..09fc8a2 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h
@@ -241,6 +241,7 @@
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto")
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom")
    S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext")
+   S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch")
    S_(SECCLASS_PACKET, PACKET__SEND, "send")
    S_(SECCLASS_PACKET, PACKET__RECV, "recv")
    S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto")
diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index 69fd4b4..81f4f52 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h
@@ -911,6 +911,7 @@
 #define ASSOCIATION__SENDTO                       0x00000001UL
 #define ASSOCIATION__RECVFROM                     0x00000002UL
 #define ASSOCIATION__SETCONTEXT                   0x00000004UL
+#define ASSOCIATION__POLMATCH                     0x00000008UL
 
 #define NETLINK_KOBJECT_UEVENT_SOCKET__IOCTL      0x00000001UL
 #define NETLINK_KOBJECT_UEVENT_SOCKET__READ       0x00000002UL
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 9401788..0a39bfd 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -99,7 +99,16 @@
 
 struct sk_security_struct {
 	struct sock *sk;		/* back pointer to sk object */
+	u32 sid;			/* SID of this object */
 	u32 peer_sid;			/* SID of peer */
+#ifdef CONFIG_NETLABEL
+	u16 sclass;			/* sock security class */
+	enum {				/* NetLabel state */
+		NLBL_UNSET = 0,
+		NLBL_REQUIRE,
+		NLBL_LABELED,
+	} nlbl_state;
+#endif
 };
 
 struct key_security_struct {
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 063af47..911954a 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -78,6 +78,8 @@
 int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
                                  u16 tclass);
 
+int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
+
 #define SECURITY_FS_USE_XATTR		1 /* use xattr */
 #define SECURITY_FS_USE_TRANS		2 /* use transition SIDs, e.g. devpts/tmpfs */
 #define SECURITY_FS_USE_TASK		3 /* use task SIDs, e.g. pipefs/sockfs */
diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h
new file mode 100644
index 0000000..ecab4bd
--- /dev/null
+++ b/security/selinux/include/selinux_netlabel.h
@@ -0,0 +1,119 @@
+/*
+ * SELinux interface to the NetLabel subsystem
+ *
+ * Author : Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _SELINUX_NETLABEL_H_
+#define _SELINUX_NETLABEL_H_
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#include "avc.h"
+#include "objsec.h"
+
+#ifdef CONFIG_NETLABEL
+void selinux_netlbl_cache_invalidate(void);
+int selinux_netlbl_socket_post_create(struct socket *sock,
+				      int sock_family,
+				      u32 sid);
+void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
+u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid);
+int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+				struct sk_buff *skb,
+				struct avc_audit_data *ad);
+u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock);
+u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb);
+void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
+				     int family);
+void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec,
+				      struct sk_security_struct *newssec);
+int selinux_netlbl_inode_permission(struct inode *inode, int mask);
+#else
+static inline void selinux_netlbl_cache_invalidate(void)
+{
+	return;
+}
+
+static inline int selinux_netlbl_socket_post_create(struct socket *sock,
+						    int sock_family,
+						    u32 sid)
+{
+	return 0;
+}
+
+static inline void selinux_netlbl_sock_graft(struct sock *sk,
+					     struct socket *sock)
+{
+	return;
+}
+
+static inline u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb,
+						   u32 sock_sid)
+{
+	return SECSID_NULL;
+}
+
+static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+					      struct sk_buff *skb,
+					      struct avc_audit_data *ad)
+{
+	return 0;
+}
+
+static inline u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock)
+{
+	return SECSID_NULL;
+}
+
+static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
+{
+	return SECSID_NULL;
+}
+
+static inline void selinux_netlbl_sk_security_init(
+	                                       struct sk_security_struct *ssec,
+					       int family)
+{
+	return;
+}
+
+static inline void selinux_netlbl_sk_clone_security(
+	                                   struct sk_security_struct *ssec,
+					   struct sk_security_struct *newssec)
+{
+	return;
+}
+
+static inline int selinux_netlbl_inode_permission(struct inode *inode,
+						  int mask)
+{
+	return 0;
+}
+#endif /* CONFIG_NETLABEL */
+
+#endif
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index c96498a..81eb598 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -2,18 +2,25 @@
  * SELinux support for the XFRM LSM hooks
  *
  * Author : Trent Jaeger, <jaegert@us.ibm.com>
+ * Updated : Venkat Yekkirala, <vyekkirala@TrustedCS.com>
  */
 #ifndef _SELINUX_XFRM_H_
 #define _SELINUX_XFRM_H_
 
-int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+int selinux_xfrm_policy_alloc(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk);
 int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new);
 void selinux_xfrm_policy_free(struct xfrm_policy *xp);
 int selinux_xfrm_policy_delete(struct xfrm_policy *xp);
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+	struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid);
 void selinux_xfrm_state_free(struct xfrm_state *x);
 int selinux_xfrm_state_delete(struct xfrm_state *x);
-int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+			struct xfrm_policy *xp, struct flowi *fl);
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
+
 
 /*
  * Extract the security blob from the sock (it's actually on the socket)
@@ -26,30 +33,23 @@
 	return SOCK_INODE(sk->sk_socket)->i_security;
 }
 
-
-static inline u32 selinux_no_sk_sid(struct flowi *fl)
-{
-	/* NOTE: no sock occurs on ICMP reply, forwards, ... */
-	/* icmp_reply: authorize as kernel packet */
-	if (fl && fl->proto == IPPROTO_ICMP) {
-		return SECINITSID_KERNEL;
-	}
-
-	return SECINITSID_ANY_SOCKET;
-}
-
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb);
+int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
+			struct avc_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad);
 u32 selinux_socket_getpeer_stream(struct sock *sk);
 u32 selinux_socket_getpeer_dgram(struct sk_buff *skb);
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 #else
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
+static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad)
 {
 	return 0;
 }
 
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
+static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+			struct avc_audit_data *ad)
 {
 	return 0;
 }
@@ -63,6 +63,11 @@
 {
 	return SECSID_NULL;
 }
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+{
+	*sid = SECSID_NULL;
+	return 0;
+}
 #endif
 
 #endif /* _SELINUX_XFRM_H_ */
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 47024a6..cfed1d3 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -3,6 +3,14 @@
  *
  * Author : Stephen Smalley, <sds@epoch.ncsc.mil>
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added ebitmap_export() and ebitmap_import()
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
@@ -59,6 +67,138 @@
 	return 0;
 }
 
+/**
+ * ebitmap_export - Export an ebitmap to a unsigned char bitmap string
+ * @src: the ebitmap to export
+ * @dst: the resulting bitmap string
+ * @dst_len: length of dst in bytes
+ *
+ * Description:
+ * Allocate a buffer at least src->highbit bits long and export the extensible
+ * bitmap into the buffer.  The bitmap string will be in little endian format,
+ * i.e. LSB first.  The value returned in dst_len may not the true size of the
+ * buffer as the length of the buffer is rounded up to a multiple of MAPTYPE.
+ * The caller must free the buffer when finished. Returns zero on success,
+ * negative values on failure.
+ *
+ */
+int ebitmap_export(const struct ebitmap *src,
+		   unsigned char **dst,
+		   size_t *dst_len)
+{
+	size_t bitmap_len;
+	unsigned char *bitmap;
+	struct ebitmap_node *iter_node;
+	MAPTYPE node_val;
+	size_t bitmap_byte;
+	unsigned char bitmask;
+
+	bitmap_len = src->highbit / 8;
+	if (src->highbit % 7)
+		bitmap_len += 1;
+	if (bitmap_len == 0)
+		return -EINVAL;
+
+	bitmap = kzalloc((bitmap_len & ~(sizeof(MAPTYPE) - 1)) +
+			 sizeof(MAPTYPE),
+			 GFP_ATOMIC);
+	if (bitmap == NULL)
+		return -ENOMEM;
+
+	iter_node = src->node;
+	do {
+		bitmap_byte = iter_node->startbit / 8;
+		bitmask = 0x80;
+		node_val = iter_node->map;
+		do {
+			if (bitmask == 0) {
+				bitmap_byte++;
+				bitmask = 0x80;
+			}
+			if (node_val & (MAPTYPE)0x01)
+				bitmap[bitmap_byte] |= bitmask;
+			node_val >>= 1;
+			bitmask >>= 1;
+		} while (node_val > 0);
+		iter_node = iter_node->next;
+	} while (iter_node);
+
+	*dst = bitmap;
+	*dst_len = bitmap_len;
+	return 0;
+}
+
+/**
+ * ebitmap_import - Import an unsigned char bitmap string into an ebitmap
+ * @src: the bitmap string
+ * @src_len: the bitmap length in bytes
+ * @dst: the empty ebitmap
+ *
+ * Description:
+ * This function takes a little endian bitmap string in src and imports it into
+ * the ebitmap pointed to by dst.  Returns zero on success, negative values on
+ * failure.
+ *
+ */
+int ebitmap_import(const unsigned char *src,
+		   size_t src_len,
+		   struct ebitmap *dst)
+{
+	size_t src_off = 0;
+	size_t node_limit;
+	struct ebitmap_node *node_new;
+	struct ebitmap_node *node_last = NULL;
+	u32 i_byte;
+	u32 i_bit;
+	unsigned char src_byte;
+
+	while (src_off < src_len) {
+		if (src_len - src_off >= sizeof(MAPTYPE)) {
+			if (*(MAPTYPE *)&src[src_off] == 0) {
+				src_off += sizeof(MAPTYPE);
+				continue;
+			}
+			node_limit = sizeof(MAPTYPE);
+		} else {
+			for (src_byte = 0, i_byte = src_off;
+			     i_byte < src_len && src_byte == 0;
+			     i_byte++)
+				src_byte |= src[i_byte];
+			if (src_byte == 0)
+				break;
+			node_limit = src_len - src_off;
+		}
+
+		node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC);
+		if (unlikely(node_new == NULL)) {
+			ebitmap_destroy(dst);
+			return -ENOMEM;
+		}
+		node_new->startbit = src_off * 8;
+		for (i_byte = 0; i_byte < node_limit; i_byte++) {
+			src_byte = src[src_off++];
+			for (i_bit = i_byte * 8; src_byte != 0; i_bit++) {
+				if (src_byte & 0x80)
+					node_new->map |= MAPBIT << i_bit;
+				src_byte <<= 1;
+			}
+		}
+
+		if (node_last != NULL)
+			node_last->next = node_new;
+		else
+			dst->node = node_new;
+		node_last = node_new;
+	}
+
+	if (likely(node_last != NULL))
+		dst->highbit = node_last->startbit + MAPSIZE;
+	else
+		ebitmap_init(dst);
+
+	return 0;
+}
+
 int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
 {
 	struct ebitmap_node *n1, *n2;
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 8bf4105..da2d465 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -69,6 +69,12 @@
 
 int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
+int ebitmap_export(const struct ebitmap *src,
+		   unsigned char **dst,
+		   size_t *dst_len);
+int ebitmap_import(const unsigned char *src,
+		   size_t src_len,
+		   struct ebitmap *dst);
 int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
 int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 7bc5b64..119bd60 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -10,6 +10,13 @@
  *
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support to import/export the MLS label
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -212,26 +219,6 @@
 }
 
 /*
- * Copies the MLS range from `src' into `dst'.
- */
-static inline int mls_copy_context(struct context *dst,
-				   struct context *src)
-{
-	int l, rc = 0;
-
-	/* Copy the MLS range from the source context */
-	for (l = 0; l < 2; l++) {
-		dst->range.level[l].sens = src->range.level[l].sens;
-		rc = ebitmap_cpy(&dst->range.level[l].cat,
-				 &src->range.level[l].cat);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
-/*
  * Set the MLS fields in the security context structure
  * `context' based on the string representation in
  * the string `*scontext'.  Update `*scontext' to
@@ -585,3 +572,152 @@
 	return -EINVAL;
 }
 
+/**
+ * mls_export_lvl - Export the MLS sensitivity levels
+ * @context: the security context
+ * @low: the low sensitivity level
+ * @high: the high sensitivity level
+ *
+ * Description:
+ * Given the security context copy the low MLS sensitivity level into lvl_low
+ * and the high sensitivity level in lvl_high.  The MLS levels are only
+ * exported if the pointers are not NULL, if they are NULL then that level is
+ * not exported.
+ *
+ */
+void mls_export_lvl(const struct context *context, u32 *low, u32 *high)
+{
+	if (!selinux_mls_enabled)
+		return;
+
+	if (low != NULL)
+		*low = context->range.level[0].sens - 1;
+	if (high != NULL)
+		*high = context->range.level[1].sens - 1;
+}
+
+/**
+ * mls_import_lvl - Import the MLS sensitivity levels
+ * @context: the security context
+ * @low: the low sensitivity level
+ * @high: the high sensitivity level
+ *
+ * Description:
+ * Given the security context and the two sensitivty levels, set the MLS levels
+ * in the context according the two given as parameters.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+void mls_import_lvl(struct context *context, u32 low, u32 high)
+{
+	if (!selinux_mls_enabled)
+		return;
+
+	context->range.level[0].sens = low + 1;
+	context->range.level[1].sens = high + 1;
+}
+
+/**
+ * mls_export_cat - Export the MLS categories
+ * @context: the security context
+ * @low: the low category
+ * @low_len: length of the cat_low bitmap in bytes
+ * @high: the high category
+ * @high_len: length of the cat_high bitmap in bytes
+ *
+ * Description:
+ * Given the security context export the low MLS category bitmap into cat_low
+ * and the high category bitmap into cat_high.  The MLS categories are only
+ * exported if the pointers are not NULL, if they are NULL then that level is
+ * not exported.  The caller is responsibile for freeing the memory when
+ * finished.  Returns zero on success, negative values on failure.
+ *
+ */
+int mls_export_cat(const struct context *context,
+		   unsigned char **low,
+		   size_t *low_len,
+		   unsigned char **high,
+		   size_t *high_len)
+{
+	int rc = -EPERM;
+
+	if (!selinux_mls_enabled)
+		return 0;
+
+	if (low != NULL) {
+		rc = ebitmap_export(&context->range.level[0].cat,
+				    low,
+				    low_len);
+		if (rc != 0)
+			goto export_cat_failure;
+	}
+	if (high != NULL) {
+		rc = ebitmap_export(&context->range.level[1].cat,
+				    high,
+				    high_len);
+		if (rc != 0)
+			goto export_cat_failure;
+	}
+
+	return 0;
+
+export_cat_failure:
+	if (low != NULL)
+		kfree(*low);
+	if (high != NULL)
+		kfree(*high);
+	return rc;
+}
+
+/**
+ * mls_import_cat - Import the MLS categories
+ * @context: the security context
+ * @low: the low category
+ * @low_len: length of the cat_low bitmap in bytes
+ * @high: the high category
+ * @high_len: length of the cat_high bitmap in bytes
+ *
+ * Description:
+ * Given the security context and the two category bitmap strings import the
+ * categories into the security context.  The MLS categories are only imported
+ * if the pointers are not NULL, if they are NULL they are skipped.  Returns
+ * zero on success, negative values on failure.
+ *
+ */
+int mls_import_cat(struct context *context,
+		   const unsigned char *low,
+		   size_t low_len,
+		   const unsigned char *high,
+		   size_t high_len)
+{
+	int rc = -EPERM;
+
+	if (!selinux_mls_enabled)
+		return 0;
+
+	if (low != NULL) {
+		rc = ebitmap_import(low,
+				    low_len,
+				    &context->range.level[0].cat);
+		if (rc != 0)
+			goto import_cat_failure;
+	}
+	if (high != NULL) {
+		if (high == low)
+			rc = ebitmap_cpy(&context->range.level[1].cat,
+					 &context->range.level[0].cat);
+		else
+			rc = ebitmap_import(high,
+					    high_len,
+					    &context->range.level[1].cat);
+		if (rc != 0)
+			goto import_cat_failure;
+	}
+
+	return 0;
+
+import_cat_failure:
+	ebitmap_destroy(&context->range.level[0].cat);
+	ebitmap_destroy(&context->range.level[1].cat);
+	return rc;
+}
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index fbb42f0..df6032c 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -10,6 +10,13 @@
  *
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
+/*
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support to import/export the MLS label
+ *
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ */
 
 #ifndef _SS_MLS_H_
 #define _SS_MLS_H_
@@ -17,6 +24,26 @@
 #include "context.h"
 #include "policydb.h"
 
+/*
+ * Copies the MLS range from `src' into `dst'.
+ */
+static inline int mls_copy_context(struct context *dst,
+				   struct context *src)
+{
+	int l, rc = 0;
+
+	/* Copy the MLS range from the source context */
+	for (l = 0; l < 2; l++) {
+		dst->range.level[l].sens = src->range.level[l].sens;
+		rc = ebitmap_cpy(&dst->range.level[l].cat,
+				 &src->range.level[l].cat);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
+
 int mls_compute_context_len(struct context *context);
 void mls_sid_to_context(struct context *context, char **scontext);
 int mls_context_isvalid(struct policydb *p, struct context *c);
@@ -42,5 +69,19 @@
 int mls_setup_user_range(struct context *fromcon, struct user_datum *user,
                          struct context *usercon);
 
+void mls_export_lvl(const struct context *context, u32 *low, u32 *high);
+void mls_import_lvl(struct context *context, u32 low, u32 high);
+
+int mls_export_cat(const struct context *context,
+		   unsigned char **low,
+		   size_t *low_len,
+		   unsigned char **high,
+		   size_t *high_len);
+int mls_import_cat(struct context *context,
+		   const unsigned char *low,
+		   size_t low_len,
+		   const unsigned char *high,
+		   size_t high_len);
+
 #endif	/* _SS_MLS_H */
 
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 85e4298..7eb69a6 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -13,6 +13,11 @@
  *
  * 	Added conditional policy language extensions
  *
+ * Updated: Hewlett-Packard <paul.moore@hp.com>
+ *
+ *      Added support for NetLabel
+ *
+ * Copyright (C) 2006 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  * Copyright (C) 2003 - 2004 Tresys Technology, LLC
  * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
@@ -29,6 +34,8 @@
 #include <linux/sched.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
+#include <net/sock.h>
+#include <net/netlabel.h>
 
 #include "flask.h"
 #include "avc.h"
@@ -40,6 +47,8 @@
 #include "services.h"
 #include "conditional.h"
 #include "mls.h"
+#include "objsec.h"
+#include "selinux_netlabel.h"
 
 extern void selnl_notify_policyload(u32 seqno);
 unsigned int policydb_loaded_version;
@@ -1241,6 +1250,7 @@
 		selinux_complete_init();
 		avc_ss_reset(seqno);
 		selnl_notify_policyload(seqno);
+		selinux_netlbl_cache_invalidate();
 		return 0;
 	}
 
@@ -1295,6 +1305,7 @@
 
 	avc_ss_reset(seqno);
 	selnl_notify_policyload(seqno);
+	selinux_netlbl_cache_invalidate();
 
 	return 0;
 
@@ -1817,6 +1828,75 @@
 	return rc;
 }
 
+/*
+ * security_sid_mls_copy() - computes a new sid based on the given
+ * sid and the mls portion of mls_sid.
+ */
+int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid)
+{
+	struct context *context1;
+	struct context *context2;
+	struct context newcon;
+	char *s;
+	u32 len;
+	int rc = 0;
+
+	if (!ss_initialized || !selinux_mls_enabled) {
+		*new_sid = sid;
+		goto out;
+	}
+
+	context_init(&newcon);
+
+	POLICY_RDLOCK;
+	context1 = sidtab_search(&sidtab, sid);
+	if (!context1) {
+		printk(KERN_ERR "security_sid_mls_copy:  unrecognized SID "
+		       "%d\n", sid);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	context2 = sidtab_search(&sidtab, mls_sid);
+	if (!context2) {
+		printk(KERN_ERR "security_sid_mls_copy:  unrecognized SID "
+		       "%d\n", mls_sid);
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	newcon.user = context1->user;
+	newcon.role = context1->role;
+	newcon.type = context1->type;
+	rc = mls_copy_context(&newcon, context2);
+	if (rc)
+		goto out_unlock;
+
+
+	/* Check the validity of the new context. */
+	if (!policydb_context_isvalid(&policydb, &newcon)) {
+		rc = convert_context_handle_invalid_context(&newcon);
+		if (rc)
+			goto bad;
+	}
+
+	rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid);
+	goto out_unlock;
+
+bad:
+	if (!context_struct_to_string(&newcon, &s, &len)) {
+		audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+			  "security_sid_mls_copy: invalid context %s", s);
+		kfree(s);
+	}
+
+out_unlock:
+	POLICY_RDUNLOCK;
+	context_destroy(&newcon);
+out:
+	return rc;
+}
+
 struct selinux_audit_rule {
 	u32 au_seqno;
 	struct context au_ctxt;
@@ -2064,3 +2144,536 @@
 {
 	aurule_callback = callback;
 }
+
+#ifdef CONFIG_NETLABEL
+/*
+ * This is the structure we store inside the NetLabel cache block.
+ */
+#define NETLBL_CACHE(x)           ((struct netlbl_cache *)(x))
+#define NETLBL_CACHE_T_NONE       0
+#define NETLBL_CACHE_T_SID        1
+#define NETLBL_CACHE_T_MLS        2
+struct netlbl_cache {
+	u32 type;
+	union {
+		u32 sid;
+		struct mls_range mls_label;
+	} data;
+};
+
+/**
+ * selinux_netlbl_cache_free - Free the NetLabel cached data
+ * @data: the data to free
+ *
+ * Description:
+ * This function is intended to be used as the free() callback inside the
+ * netlbl_lsm_cache structure.
+ *
+ */
+static void selinux_netlbl_cache_free(const void *data)
+{
+	struct netlbl_cache *cache = NETLBL_CACHE(data);
+	switch (cache->type) {
+	case NETLBL_CACHE_T_MLS:
+		ebitmap_destroy(&cache->data.mls_label.level[0].cat);
+		break;
+	}
+	kfree(data);
+}
+
+/**
+ * selinux_netlbl_cache_add - Add an entry to the NetLabel cache
+ * @skb: the packet
+ * @ctx: the SELinux context
+ *
+ * Description:
+ * Attempt to cache the context in @ctx, which was derived from the packet in
+ * @skb, in the NetLabel subsystem cache.
+ *
+ */
+static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx)
+{
+	struct netlbl_cache *cache = NULL;
+	struct netlbl_lsm_secattr secattr;
+
+	netlbl_secattr_init(&secattr);
+
+	cache = kzalloc(sizeof(*cache),	GFP_ATOMIC);
+	if (cache == NULL)
+		goto netlbl_cache_add_failure;
+	secattr.cache.free = selinux_netlbl_cache_free;
+	secattr.cache.data = (void *)cache;
+
+	cache->type = NETLBL_CACHE_T_MLS;
+	if (ebitmap_cpy(&cache->data.mls_label.level[0].cat,
+			&ctx->range.level[0].cat) != 0)
+		goto netlbl_cache_add_failure;
+	cache->data.mls_label.level[1].cat.highbit =
+		cache->data.mls_label.level[0].cat.highbit;
+	cache->data.mls_label.level[1].cat.node =
+		cache->data.mls_label.level[0].cat.node;
+	cache->data.mls_label.level[0].sens = ctx->range.level[0].sens;
+	cache->data.mls_label.level[1].sens = ctx->range.level[0].sens;
+
+	if (netlbl_cache_add(skb, &secattr) != 0)
+		goto netlbl_cache_add_failure;
+
+	return;
+
+netlbl_cache_add_failure:
+	netlbl_secattr_destroy(&secattr, 1);
+}
+
+/**
+ * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache
+ *
+ * Description:
+ * Invalidate the NetLabel security attribute mapping cache.
+ *
+ */
+void selinux_netlbl_cache_invalidate(void)
+{
+	netlbl_cache_invalidate();
+}
+
+/**
+ * selinux_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID
+ * @skb: the network packet
+ * @secattr: the NetLabel packet security attributes
+ * @base_sid: the SELinux SID to use as a context for MLS only attributes
+ * @sid: the SELinux SID
+ *
+ * Description:
+ * Convert the given NetLabel packet security attributes in @secattr into a
+ * SELinux SID.  If the @secattr field does not contain a full SELinux
+ * SID/context then use the context in @base_sid as the foundation.  If @skb
+ * is not NULL attempt to cache as much data as possibile.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb,
+					 struct netlbl_lsm_secattr *secattr,
+					 u32 base_sid,
+					 u32 *sid)
+{
+	int rc = -EIDRM;
+	struct context *ctx;
+	struct context ctx_new;
+	struct netlbl_cache *cache;
+
+	POLICY_RDLOCK;
+
+	if (secattr->cache.data) {
+		cache = NETLBL_CACHE(secattr->cache.data);
+		switch (cache->type) {
+		case NETLBL_CACHE_T_SID:
+			*sid = cache->data.sid;
+			rc = 0;
+			break;
+		case NETLBL_CACHE_T_MLS:
+			ctx = sidtab_search(&sidtab, base_sid);
+			if (ctx == NULL)
+				goto netlbl_secattr_to_sid_return;
+
+			ctx_new.user = ctx->user;
+			ctx_new.role = ctx->role;
+			ctx_new.type = ctx->type;
+			ctx_new.range.level[0].sens =
+				cache->data.mls_label.level[0].sens;
+			ctx_new.range.level[0].cat.highbit =
+				cache->data.mls_label.level[0].cat.highbit;
+			ctx_new.range.level[0].cat.node =
+				cache->data.mls_label.level[0].cat.node;
+			ctx_new.range.level[1].sens =
+				cache->data.mls_label.level[1].sens;
+			ctx_new.range.level[1].cat.highbit =
+				cache->data.mls_label.level[1].cat.highbit;
+			ctx_new.range.level[1].cat.node =
+				cache->data.mls_label.level[1].cat.node;
+
+			rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid);
+			break;
+		default:
+			goto netlbl_secattr_to_sid_return;
+		}
+	} else if (secattr->mls_lvl_vld) {
+		ctx = sidtab_search(&sidtab, base_sid);
+		if (ctx == NULL)
+			goto netlbl_secattr_to_sid_return;
+
+		ctx_new.user = ctx->user;
+		ctx_new.role = ctx->role;
+		ctx_new.type = ctx->type;
+		mls_import_lvl(&ctx_new, secattr->mls_lvl, secattr->mls_lvl);
+		if (secattr->mls_cat) {
+			if (mls_import_cat(&ctx_new,
+					   secattr->mls_cat,
+					   secattr->mls_cat_len,
+					   NULL,
+					   0) != 0)
+				goto netlbl_secattr_to_sid_return;
+			ctx_new.range.level[1].cat.highbit =
+				ctx_new.range.level[0].cat.highbit;
+			ctx_new.range.level[1].cat.node =
+				ctx_new.range.level[0].cat.node;
+		} else {
+			ebitmap_init(&ctx_new.range.level[0].cat);
+			ebitmap_init(&ctx_new.range.level[1].cat);
+		}
+		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
+			goto netlbl_secattr_to_sid_return_cleanup;
+
+		rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid);
+		if (rc != 0)
+			goto netlbl_secattr_to_sid_return_cleanup;
+
+		if (skb != NULL)
+			selinux_netlbl_cache_add(skb, &ctx_new);
+		ebitmap_destroy(&ctx_new.range.level[0].cat);
+	} else {
+		*sid = SECINITSID_UNLABELED;
+		rc = 0;
+	}
+
+netlbl_secattr_to_sid_return:
+	POLICY_RDUNLOCK;
+	return rc;
+netlbl_secattr_to_sid_return_cleanup:
+	ebitmap_destroy(&ctx_new.range.level[0].cat);
+	goto netlbl_secattr_to_sid_return;
+}
+
+/**
+ * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel
+ * @skb: the packet
+ * @base_sid: the SELinux SID to use as a context for MLS only attributes
+ * @sid: the SID
+ *
+ * Description:
+ * Call the NetLabel mechanism to get the security attributes of the given
+ * packet and use those attributes to determine the correct context/SID to
+ * assign to the packet.  Returns zero on success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
+					u32 base_sid,
+					u32 *sid)
+{
+	int rc;
+	struct netlbl_lsm_secattr secattr;
+
+	netlbl_secattr_init(&secattr);
+	rc = netlbl_skbuff_getattr(skb, &secattr);
+	if (rc == 0)
+		rc = selinux_netlbl_secattr_to_sid(skb,
+						   &secattr,
+						   base_sid,
+						   sid);
+	netlbl_secattr_destroy(&secattr, 0);
+
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_setsid - Label a socket using the NetLabel mechanism
+ * @sock: the socket to label
+ * @sid: the SID to use
+ *
+ * Description:
+ * Attempt to label a socket using the NetLabel mechanism using the given
+ * SID.  Returns zero values on success, negative values on failure.
+ *
+ */
+static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid)
+{
+	int rc = -ENOENT;
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	struct context *ctx;
+
+	if (!ss_initialized)
+		return 0;
+
+	POLICY_RDLOCK;
+
+	ctx = sidtab_search(&sidtab, sid);
+	if (ctx == NULL)
+		goto netlbl_socket_setsid_return;
+
+	netlbl_secattr_init(&secattr);
+	secattr.domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1],
+				 GFP_ATOMIC);
+	mls_export_lvl(ctx, &secattr.mls_lvl, NULL);
+	secattr.mls_lvl_vld = 1;
+	mls_export_cat(ctx,
+		       &secattr.mls_cat,
+		       &secattr.mls_cat_len,
+		       NULL,
+		       NULL);
+
+	rc = netlbl_socket_setattr(sock, &secattr);
+	if (rc == 0)
+		sksec->nlbl_state = NLBL_LABELED;
+
+	netlbl_secattr_destroy(&secattr, 0);
+
+netlbl_socket_setsid_return:
+	POLICY_RDUNLOCK;
+	return rc;
+}
+
+/**
+ * selinux_netlbl_sk_security_init - Setup the NetLabel fields
+ * @ssec: the sk_security_struct
+ * @family: the socket family
+ *
+ * Description:
+ * Called when a new sk_security_struct is allocated to initialize the NetLabel
+ * fields.
+ *
+ */
+void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
+				     int family)
+{
+        if (family == PF_INET)
+		ssec->nlbl_state = NLBL_REQUIRE;
+	else
+		ssec->nlbl_state = NLBL_UNSET;
+}
+
+/**
+ * selinux_netlbl_sk_clone_security - Copy the NetLabel fields
+ * @ssec: the original sk_security_struct
+ * @newssec: the cloned sk_security_struct
+ *
+ * Description:
+ * Clone the NetLabel specific sk_security_struct fields from @ssec to
+ * @newssec.
+ *
+ */
+void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec,
+				      struct sk_security_struct *newssec)
+{
+	newssec->sclass = ssec->sclass;
+	if (ssec->nlbl_state != NLBL_UNSET)
+		newssec->nlbl_state = NLBL_REQUIRE;
+	else
+		newssec->nlbl_state = NLBL_UNSET;
+}
+
+/**
+ * selinux_netlbl_socket_post_create - Label a socket using NetLabel
+ * @sock: the socket to label
+ * @sock_family: the socket family
+ * @sid: the SID to use
+ *
+ * Description:
+ * Attempt to label a socket using the NetLabel mechanism using the given
+ * SID.  Returns zero values on success, negative values on failure.
+ *
+ */
+int selinux_netlbl_socket_post_create(struct socket *sock,
+				      int sock_family,
+				      u32 sid)
+{
+	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	sksec->sclass = isec->sclass;
+
+	if (sock_family != PF_INET)
+		return 0;
+
+	sksec->nlbl_state = NLBL_REQUIRE;
+	return selinux_netlbl_socket_setsid(sock, sid);
+}
+
+/**
+ * selinux_netlbl_sock_graft - Netlabel the new socket
+ * @sk: the new connection
+ * @sock: the new socket
+ *
+ * Description:
+ * The connection represented by @sk is being grafted onto @sock so set the
+ * socket's NetLabel to match the SID of @sk.
+ *
+ */
+void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
+{
+	struct inode_security_struct *isec = SOCK_INODE(sock)->i_security;
+	struct sk_security_struct *sksec = sk->sk_security;
+
+	sksec->sclass = isec->sclass;
+
+	if (sk->sk_family != PF_INET)
+		return;
+
+	sksec->nlbl_state = NLBL_REQUIRE;
+	sksec->peer_sid = sksec->sid;
+
+	/* Try to set the NetLabel on the socket to save time later, if we fail
+	 * here we will pick up the pieces in later calls to
+	 * selinux_netlbl_inode_permission(). */
+	selinux_netlbl_socket_setsid(sock, sksec->sid);
+}
+
+/**
+ * selinux_netlbl_inet_conn_request - Handle a new connection request
+ * @skb: the packet
+ * @sock_sid: the SID of the parent socket
+ *
+ * Description:
+ * If present, use the security attributes of the packet in @skb and the
+ * parent sock's SID to arrive at a SID for the new child sock.  Returns the
+ * SID of the connection or SECSID_NULL on failure.
+ *
+ */
+u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid)
+{
+	int rc;
+	u32 peer_sid;
+
+	rc = selinux_netlbl_skbuff_getsid(skb, sock_sid, &peer_sid);
+	if (rc != 0)
+		return SECSID_NULL;
+
+	if (peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return peer_sid;
+}
+
+/**
+ * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled
+ * @inode: the file descriptor's inode
+ * @mask: the permission mask
+ *
+ * Description:
+ * Looks at a file's inode and if it is marked as a socket protected by
+ * NetLabel then verify that the socket has been labeled, if not try to label
+ * the socket now with the inode's SID.  Returns zero on success, negative
+ * values on failure.
+ *
+ */
+int selinux_netlbl_inode_permission(struct inode *inode, int mask)
+{
+	int rc;
+	struct inode_security_struct *isec;
+	struct sk_security_struct *sksec;
+	struct socket *sock;
+
+	if (!S_ISSOCK(inode->i_mode))
+		return 0;
+
+	sock = SOCKET_I(inode);
+	isec = inode->i_security;
+	sksec = sock->sk->sk_security;
+	down(&isec->sem);
+	if (unlikely(sksec->nlbl_state == NLBL_REQUIRE &&
+		     (mask & (MAY_WRITE | MAY_APPEND)))) {
+		lock_sock(sock->sk);
+		rc = selinux_netlbl_socket_setsid(sock, sksec->sid);
+		release_sock(sock->sk);
+	} else
+		rc = 0;
+	up(&isec->sem);
+
+	return rc;
+}
+
+/**
+ * selinux_netlbl_sock_rcv_skb - Do an inbound access check using NetLabel
+ * @sksec: the sock's sk_security_struct
+ * @skb: the packet
+ * @ad: the audit data
+ *
+ * Description:
+ * Fetch the NetLabel security attributes from @skb and perform an access check
+ * against the receiving socket.  Returns zero on success, negative values on
+ * error.
+ *
+ */
+int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
+				struct sk_buff *skb,
+				struct avc_audit_data *ad)
+{
+	int rc;
+	u32 netlbl_sid;
+	u32 recv_perm;
+
+	rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid);
+	if (rc != 0)
+		return rc;
+
+	if (netlbl_sid == SECINITSID_UNLABELED)
+		return 0;
+
+	switch (sksec->sclass) {
+	case SECCLASS_UDP_SOCKET:
+		recv_perm = UDP_SOCKET__RECV_MSG;
+		break;
+	case SECCLASS_TCP_SOCKET:
+		recv_perm = TCP_SOCKET__RECV_MSG;
+		break;
+	default:
+		recv_perm = RAWIP_SOCKET__RECV_MSG;
+	}
+
+	rc = avc_has_perm(sksec->sid,
+			  netlbl_sid,
+			  sksec->sclass,
+			  recv_perm,
+			  ad);
+	if (rc == 0)
+		return 0;
+
+	netlbl_skbuff_err(skb, rc);
+	return rc;
+}
+
+/**
+ * selinux_netlbl_socket_getpeersec_stream - Return the connected peer's SID
+ * @sock: the socket
+ *
+ * Description:
+ * Examine @sock to find the connected peer's SID.  Returns the SID on success
+ * or SECSID_NULL on error.
+ *
+ */
+u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock)
+{
+	struct sk_security_struct *sksec = sock->sk->sk_security;
+
+	if (sksec->peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return sksec->peer_sid;
+}
+
+/**
+ * selinux_netlbl_socket_getpeersec_dgram - Return the SID of a NetLabel packet
+ * @skb: the packet
+ *
+ * Description:
+ * Examine @skb to find the SID assigned to it by NetLabel.  Returns the SID on
+ * success, SECSID_NULL on error.
+ *
+ */
+u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb)
+{
+	int peer_sid;
+	struct sock *sk = skb->sk;
+	struct inode_security_struct *isec;
+
+	if (sk == NULL || sk->sk_socket == NULL)
+		return SECSID_NULL;
+
+	isec = SOCK_INODE(sk->sk_socket)->i_security;
+	if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0)
+		return SECSID_NULL;
+	if (peer_sid == SECINITSID_UNLABELED)
+		return SECSID_NULL;
+
+	return peer_sid;
+}
+#endif /* CONFIG_NETLABEL */
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 6c985ce..3e742b8 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -6,7 +6,12 @@
  *  Authors:  Serge Hallyn <sergeh@us.ibm.com>
  *	      Trent Jaeger <jaegert@us.ibm.com>
  *
+ *  Updated: Venkat Yekkirala <vyekkirala@TrustedCS.com>
+ *
+ *           Granular IPSec Associations for use in MLS environments.
+ *
  *  Copyright (C) 2005 International Business Machines Corporation
+ *  Copyright (C) 2006 Trusted Computer Solutions, Inc.
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2,
@@ -67,10 +72,10 @@
 }
 
 /*
- * LSM hook implementation that authorizes that a socket can be used
- * with the corresponding xfrm_sec_ctx and direction.
+ * LSM hook implementation that authorizes that a flow can use
+ * a xfrm policy rule.
  */
-int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
 	int rc = 0;
 	u32 sel_sid = SECINITSID_UNLABELED;
@@ -84,27 +89,130 @@
 		sel_sid = ctx->ctx_sid;
 	}
 
-	rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION,
-			  ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM :
-			   ((dir == FLOW_DIR_OUT) ?  ASSOCIATION__SENDTO :
-			    (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))),
+	rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__POLMATCH,
 			  NULL);
 
 	return rc;
 }
 
 /*
+ * LSM hook implementation that authorizes that a state matches
+ * the given policy, flow combo.
+ */
+
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
+			struct flowi *fl)
+{
+	u32 state_sid;
+	u32 pol_sid;
+	int err;
+
+	if (x->security)
+		state_sid = x->security->ctx_sid;
+	else
+		state_sid = SECINITSID_UNLABELED;
+
+	if (xp->security)
+		pol_sid = xp->security->ctx_sid;
+	else
+		pol_sid = SECINITSID_UNLABELED;
+
+	err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__POLMATCH,
+			  NULL);
+
+	if (err)
+		return 0;
+
+	return selinux_xfrm_flow_state_match(fl, x);
+}
+
+/*
+ * LSM hook implementation that authorizes that a particular outgoing flow
+ * can use a given security association.
+ */
+
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+{
+	int rc = 0;
+	u32 sel_sid = SECINITSID_UNLABELED;
+	struct xfrm_sec_ctx *ctx;
+
+	/* Context sid is either set to label or ANY_ASSOC */
+	if ((ctx = xfrm->security)) {
+		if (!selinux_authorizable_ctx(ctx))
+			return 0;
+
+		sel_sid = ctx->ctx_sid;
+	}
+
+	rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__SENDTO,
+			  NULL)? 0:1;
+
+	return rc;
+}
+
+/*
+ * LSM hook implementation that determines the sid for the session.
+ */
+
+int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+{
+	struct sec_path *sp;
+
+	*sid = SECSID_NULL;
+
+	if (skb == NULL)
+		return 0;
+
+	sp = skb->sp;
+	if (sp) {
+		int i, sid_set = 0;
+
+		for (i = sp->len-1; i >= 0; i--) {
+			struct xfrm_state *x = sp->xvec[i];
+			if (selinux_authorizable_xfrm(x)) {
+				struct xfrm_sec_ctx *ctx = x->security;
+
+				if (!sid_set) {
+					*sid = ctx->ctx_sid;
+					sid_set = 1;
+
+					if (!ckall)
+						break;
+				}
+				else if (*sid != ctx->ctx_sid)
+					return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
  * Security blob allocation for xfrm_policy and xfrm_state
  * CTX does not have a meaningful value on input
  */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx)
+static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
+	struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid)
 {
 	int rc = 0;
 	struct task_security_struct *tsec = current->security;
-	struct xfrm_sec_ctx *ctx;
+	struct xfrm_sec_ctx *ctx = NULL;
+	char *ctx_str = NULL;
+	u32 str_len;
+	u32 ctx_sid;
 
-	BUG_ON(!uctx);
-	BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX);
+	BUG_ON(uctx && pol);
+
+	if (!uctx)
+		goto not_from_user;
+
+	if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX)
+		return -EINVAL;
 
 	if (uctx->ctx_len >= PAGE_SIZE)
 		return -ENOMEM;
@@ -141,9 +249,43 @@
 
 	return rc;
 
+not_from_user:
+	if (pol) {
+		rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid);
+		if (rc)
+			goto out;
+	}
+	else
+		ctx_sid = sid;
+
+	rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len);
+	if (rc)
+		goto out;
+
+	*ctxp = ctx = kmalloc(sizeof(*ctx) +
+			      str_len,
+			      GFP_ATOMIC);
+
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->ctx_doi = XFRM_SC_DOI_LSM;
+	ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+	ctx->ctx_sid = ctx_sid;
+	ctx->ctx_len = str_len;
+	memcpy(ctx->ctx_str,
+	       ctx_str,
+	       str_len);
+
+	goto out2;
+
 out:
 	*ctxp = NULL;
 	kfree(ctx);
+out2:
+	kfree(ctx_str);
 	return rc;
 }
 
@@ -151,13 +293,23 @@
  * LSM hook implementation that allocs and transfers uctx spec to
  * xfrm_policy.
  */
-int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx)
+int selinux_xfrm_policy_alloc(struct xfrm_policy *xp,
+		struct xfrm_user_sec_ctx *uctx, struct sock *sk)
 {
 	int err;
+	u32 sid;
 
 	BUG_ON(!xp);
+	BUG_ON(uctx && sk);
 
-	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx);
+	if (sk) {
+		struct sk_security_struct *ssec = sk->sk_security;
+		sid = ssec->sid;
+	}
+	else
+		sid = SECSID_NULL;
+
+	err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid);
 	return err;
 }
 
@@ -217,13 +369,14 @@
  * LSM hook implementation that allocs and transfers sec_ctx spec to
  * xfrm_state.
  */
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx)
+int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
+		struct xfrm_sec_ctx *pol, u32 secid)
 {
 	int err;
 
 	BUG_ON(!x);
 
-	err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx);
+	err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid);
 	return err;
 }
 
@@ -329,38 +482,30 @@
  * we need to check for unlabelled access since this may not have
  * gone thru the IPSec process.
  */
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb)
+int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
+				struct avc_audit_data *ad)
 {
 	int i, rc = 0;
 	struct sec_path *sp;
+	u32 sel_sid = SECINITSID_UNLABELED;
 
 	sp = skb->sp;
 
 	if (sp) {
-		/*
-		 * __xfrm_policy_check does not approve unless xfrm_policy_ok
-		 * says that spi's match for policy and the socket.
-		 *
-		 *  Only need to verify the existence of an authorizable sp.
-		 */
 		for (i = 0; i < sp->len; i++) {
 			struct xfrm_state *x = sp->xvec[i];
 
-			if (x && selinux_authorizable_xfrm(x))
-				goto accept;
+			if (x && selinux_authorizable_xfrm(x)) {
+				struct xfrm_sec_ctx *ctx = x->security;
+				sel_sid = ctx->ctx_sid;
+				break;
+			}
 		}
 	}
 
-	/* check SELinux sock for unlabelled access */
-	rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-			  ASSOCIATION__RECVFROM, NULL);
-	if (rc)
-		goto drop;
+	rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__RECVFROM, ad);
 
-accept:
-	return 0;
-
-drop:
 	return rc;
 }
 
@@ -371,7 +516,8 @@
  * If we do have a authorizable security association, then it has already been
  * checked in xfrm_policy_lookup hook.
  */
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb)
+int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
+					struct avc_audit_data *ad)
 {
 	struct dst_entry *dst;
 	int rc = 0;
@@ -391,7 +537,7 @@
 	}
 
 	rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-			  ASSOCIATION__SENDTO, NULL);
+			  ASSOCIATION__SENDTO, ad);
 out:
 	return rc;
 }
diff --git a/sound/aoa/Kconfig b/sound/aoa/Kconfig
index 2f4334d..5d5813c 100644
--- a/sound/aoa/Kconfig
+++ b/sound/aoa/Kconfig
@@ -1,5 +1,5 @@
 menu "Apple Onboard Audio driver"
-	depends on SND!=n && PPC
+	depends on SND!=n && PPC_PMAC
 
 config SND_AOA
 	tristate "Apple Onboard Audio driver"
diff --git a/sound/aoa/codecs/Kconfig b/sound/aoa/codecs/Kconfig
index 90cf58f..d5fbd60 100644
--- a/sound/aoa/codecs/Kconfig
+++ b/sound/aoa/codecs/Kconfig
@@ -1,6 +1,8 @@
 config SND_AOA_ONYX
 	tristate "support Onyx chip"
 	depends on SND_AOA
+	select I2C
+	select I2C_POWERMAC
 	---help---
 	This option enables support for the Onyx (pcm3052)
 	codec chip found in the latest Apple machines
@@ -18,6 +20,8 @@
 config SND_AOA_TAS
 	tristate "support TAS chips"
 	depends on SND_AOA
+	select I2C
+	select I2C_POWERMAC
 	---help---
 	This option enables support for the tas chips
 	found in a lot of Apple Machines, especially
diff --git a/sound/aoa/codecs/snd-aoa-codec-tas.c b/sound/aoa/codecs/snd-aoa-codec-tas.c
index 16c0b6b..2ef55a1 100644
--- a/sound/aoa/codecs/snd-aoa-codec-tas.c
+++ b/sound/aoa/codecs/snd-aoa-codec-tas.c
@@ -66,6 +66,8 @@
 #include <asm/prom.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+
 MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("tas codec driver for snd-aoa");
@@ -91,6 +93,10 @@
 	u8			bass, treble;
 	u8			acr;
 	int			drc_range;
+	/* protects hardware access against concurrency from
+	 * userspace when hitting controls and during
+	 * codec init/suspend/resume */
+	struct mutex		mtx;
 };
 
 static int tas_reset_init(struct tas *tas);
@@ -231,8 +237,10 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->cached_volume_l;
 	ucontrol->value.integer.value[1] = tas->cached_volume_r;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -241,14 +249,18 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	if (tas->cached_volume_l == ucontrol->value.integer.value[0]
-	 && tas->cached_volume_r == ucontrol->value.integer.value[1])
+	 && tas->cached_volume_r == ucontrol->value.integer.value[1]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->cached_volume_l = ucontrol->value.integer.value[0];
 	tas->cached_volume_r = ucontrol->value.integer.value[1];
 	if (tas->hw_enabled)
 		tas_set_volume(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -276,8 +288,10 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = !tas->mute_l;
 	ucontrol->value.integer.value[1] = !tas->mute_r;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -286,14 +300,18 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	if (tas->mute_l == !ucontrol->value.integer.value[0]
-	 && tas->mute_r == !ucontrol->value.integer.value[1])
+	 && tas->mute_r == !ucontrol->value.integer.value[1]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->mute_l = !ucontrol->value.integer.value[0];
 	tas->mute_r = !ucontrol->value.integer.value[1];
 	if (tas->hw_enabled)
 		tas_set_volume(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -322,8 +340,10 @@
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 	int idx = kcontrol->private_value;
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->mixer_l[idx];
 	ucontrol->value.integer.value[1] = tas->mixer_r[idx];
+	mutex_unlock(&tas->mtx);
 
 	return 0;
 }
@@ -334,15 +354,19 @@
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 	int idx = kcontrol->private_value;
 
+	mutex_lock(&tas->mtx);
 	if (tas->mixer_l[idx] == ucontrol->value.integer.value[0]
-	 && tas->mixer_r[idx] == ucontrol->value.integer.value[1])
+	 && tas->mixer_r[idx] == ucontrol->value.integer.value[1]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->mixer_l[idx] = ucontrol->value.integer.value[0];
 	tas->mixer_r[idx] = ucontrol->value.integer.value[1];
 
 	if (tas->hw_enabled)
 		tas_set_mixer(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -375,7 +399,9 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->drc_range;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -384,12 +410,16 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->drc_range == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->drc_range == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->drc_range = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas3004_set_drc(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -417,7 +447,9 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->drc_enabled;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -426,12 +458,16 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->drc_enabled == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->drc_enabled == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->drc_enabled = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas3004_set_drc(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -463,7 +499,9 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.enumerated.item[0] = !!(tas->acr & TAS_ACR_INPUT_B);
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -471,15 +509,21 @@
 	struct snd_ctl_elem_value *ucontrol)
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
-	int oldacr = tas->acr;
+	int oldacr;
+
+	mutex_lock(&tas->mtx);
+	oldacr = tas->acr;
 
 	tas->acr &= ~TAS_ACR_INPUT_B;
 	if (ucontrol->value.enumerated.item[0])
 		tas->acr |= TAS_ACR_INPUT_B;
-	if (oldacr == tas->acr)
+	if (oldacr == tas->acr) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 	if (tas->hw_enabled)
 		tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -518,7 +562,9 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->treble;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -527,12 +573,16 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->treble == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->treble == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->treble = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas_set_treble(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -560,7 +610,9 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
+	mutex_lock(&tas->mtx);
 	ucontrol->value.integer.value[0] = tas->bass;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -569,12 +621,16 @@
 {
 	struct tas *tas = snd_kcontrol_chip(kcontrol);
 
-	if (tas->bass == ucontrol->value.integer.value[0])
+	mutex_lock(&tas->mtx);
+	if (tas->bass == ucontrol->value.integer.value[0]) {
+		mutex_unlock(&tas->mtx);
 		return 0;
+	}
 
 	tas->bass = ucontrol->value.integer.value[0];
 	if (tas->hw_enabled)
 		tas_set_bass(tas);
+	mutex_unlock(&tas->mtx);
 	return 1;
 }
 
@@ -628,16 +684,16 @@
 
 	tmp = TAS_MCS_SCLK64 | TAS_MCS_SPORT_MODE_I2S | TAS_MCS_SPORT_WL_24BIT;
 	if (tas_write_reg(tas, TAS_REG_MCS, 1, &tmp))
-		return -ENODEV;
+		goto outerr;
 
 	tas->acr |= TAS_ACR_ANALOG_PDOWN | TAS_ACR_B_MONAUREAL |
 		TAS_ACR_B_MON_SEL_RIGHT;
 	if (tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr))
-		return -ENODEV;
+		goto outerr;
 
 	tmp = 0;
 	if (tas_write_reg(tas, TAS_REG_MCS2, 1, &tmp))
-		return -ENODEV;
+		goto outerr;
 
 	tas3004_set_drc(tas);
 
@@ -649,9 +705,11 @@
 
 	tas->acr &= ~TAS_ACR_ANALOG_PDOWN;
 	if (tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr))
-		return -ENODEV;
+		goto outerr;
 
 	return 0;
+ outerr:
+	return -ENODEV;
 }
 
 static int tas_switch_clock(struct codec_info_item *cii, enum clock_switch clock)
@@ -666,11 +724,13 @@
 		break;
 	case CLOCK_SWITCH_SLAVE:
 		/* Clocks are back, re-init the codec */
+		mutex_lock(&tas->mtx);
 		tas_reset_init(tas);
 		tas_set_volume(tas);
 		tas_set_mixer(tas);
 		tas->hw_enabled = 1;
 		tas->codec.gpio->methods->all_amps_restore(tas->codec.gpio);
+		mutex_unlock(&tas->mtx);
 		break;
 	default:
 		/* doesn't happen as of now */
@@ -684,19 +744,23 @@
  * our i2c device is suspended, and then take note of that! */
 static int tas_suspend(struct tas *tas)
 {
+	mutex_lock(&tas->mtx);
 	tas->hw_enabled = 0;
 	tas->acr |= TAS_ACR_ANALOG_PDOWN;
 	tas_write_reg(tas, TAS_REG_ACR, 1, &tas->acr);
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
 static int tas_resume(struct tas *tas)
 {
 	/* reset codec */
+	mutex_lock(&tas->mtx);
 	tas_reset_init(tas);
 	tas_set_volume(tas);
 	tas_set_mixer(tas);
 	tas->hw_enabled = 1;
+	mutex_unlock(&tas->mtx);
 	return 0;
 }
 
@@ -739,11 +803,14 @@
 		return -EINVAL;
 	}
 
+	mutex_lock(&tas->mtx);
 	if (tas_reset_init(tas)) {
 		printk(KERN_ERR PFX "tas failed to initialise\n");
+		mutex_unlock(&tas->mtx);
 		return -ENXIO;
 	}
 	tas->hw_enabled = 1;
+	mutex_unlock(&tas->mtx);
 
 	if (tas->codec.soundbus_dev->attach_codec(tas->codec.soundbus_dev,
 						   aoa_get_card(),
@@ -822,6 +889,7 @@
 	if (!tas)
 		return -ENOMEM;
 
+	mutex_init(&tas->mtx);
 	tas->i2c.driver = &tas_driver;
 	tas->i2c.adapter = adapter;
 	tas->i2c.addr = addr;
@@ -850,6 +918,7 @@
  detach:
 	i2c_detach_client(&tas->i2c);
  fail:
+	mutex_destroy(&tas->mtx);
 	kfree(tas);
 	return -EINVAL;
 }
@@ -908,6 +977,7 @@
 	/* power down codec chip */
 	tas_write_reg(tas, TAS_REG_ACR, 1, &tmp);
 
+	mutex_destroy(&tas->mtx);
 	kfree(tas);
 	return 0;
 }
diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c
index f69d333..7c26089 100644
--- a/sound/aoa/core/snd-aoa-gpio-feature.c
+++ b/sound/aoa/core/snd-aoa-gpio-feature.c
@@ -56,7 +56,7 @@
 {
 	struct device_node *np, *gpio;
 	u32 *reg;
-	char *audio_gpio;
+	const char *audio_gpio;
 
 	*gpioptr = -1;
 
diff --git a/sound/core/control.c b/sound/core/control.c
index bb397ea..6973a96 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -75,6 +75,8 @@
 	init_waitqueue_head(&ctl->change_sleep);
 	spin_lock_init(&ctl->read_lock);
 	ctl->card = card;
+	ctl->prefer_pcm_subdevice = -1;
+	ctl->prefer_rawmidi_subdevice = -1;
 	ctl->pid = current->pid;
 	file->private_data = ctl;
 	write_lock_irqsave(&card->ctl_files_rwlock, flags);
@@ -236,11 +238,16 @@
 	kctl.id.index = ncontrol->index;
 	kctl.count = ncontrol->count ? ncontrol->count : 1;
 	access = ncontrol->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE :
-		 (ncontrol->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|SNDRV_CTL_ELEM_ACCESS_INACTIVE|
-		 		      SNDRV_CTL_ELEM_ACCESS_DINDIRECT|SNDRV_CTL_ELEM_ACCESS_INDIRECT));
+		 (ncontrol->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|
+				      SNDRV_CTL_ELEM_ACCESS_INACTIVE|
+		 		      SNDRV_CTL_ELEM_ACCESS_DINDIRECT|
+		 		      SNDRV_CTL_ELEM_ACCESS_INDIRECT|
+		 		      SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE|
+		 		      SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK));
 	kctl.info = ncontrol->info;
 	kctl.get = ncontrol->get;
 	kctl.put = ncontrol->put;
+	kctl.tlv.p = ncontrol->tlv.p;
 	kctl.private_value = ncontrol->private_value;
 	kctl.private_data = private_data;
 	return snd_ctl_new(&kctl, access);
@@ -882,6 +889,8 @@
 	struct snd_ctl_elem_info info;
 	void *elem_data;		/* element data */
 	unsigned long elem_data_size;	/* size of element data in bytes */
+	void *tlv_data;			/* TLV data */
+	unsigned long tlv_data_size;	/* TLV data size */
 	void *priv_data;		/* private data (like strings for enumerated type) */
 	unsigned long priv_data_size;	/* size of private data in bytes */
 };
@@ -916,9 +925,48 @@
 	return change;
 }
 
+static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol,
+				 int op_flag,
+				 unsigned int size,
+				 unsigned int __user *tlv)
+{
+	struct user_element *ue = kcontrol->private_data;
+	int change = 0;
+	void *new_data;
+
+	if (op_flag > 0) {
+		if (size > 1024 * 128)	/* sane value */
+			return -EINVAL;
+		new_data = kmalloc(size, GFP_KERNEL);
+		if (new_data == NULL)
+			return -ENOMEM;
+		if (copy_from_user(new_data, tlv, size)) {
+			kfree(new_data);
+			return -EFAULT;
+		}
+		change = ue->tlv_data_size != size;
+		if (!change)
+			change = memcmp(ue->tlv_data, new_data, size);
+		kfree(ue->tlv_data);
+		ue->tlv_data = new_data;
+		ue->tlv_data_size = size;
+	} else {
+		if (! ue->tlv_data_size || ! ue->tlv_data)
+			return -ENXIO;
+		if (size < ue->tlv_data_size)
+			return -ENOSPC;
+		if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size))
+			return -EFAULT;
+	}
+	return change;
+}
+
 static void snd_ctl_elem_user_free(struct snd_kcontrol *kcontrol)
 {
-	kfree(kcontrol->private_data);
+	struct user_element *ue = kcontrol->private_data;
+	if (ue->tlv_data)
+		kfree(ue->tlv_data);
+	kfree(ue);
 }
 
 static int snd_ctl_elem_add(struct snd_ctl_file *file,
@@ -937,7 +985,8 @@
 		return -EINVAL;
 	access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE :
 		(info->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|
-				 SNDRV_CTL_ELEM_ACCESS_INACTIVE));
+				 SNDRV_CTL_ELEM_ACCESS_INACTIVE|
+				 SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE));
 	info->id.numid = 0;
 	memset(&kctl, 0, sizeof(kctl));
 	down_write(&card->controls_rwsem);
@@ -963,6 +1012,10 @@
 		kctl.get = snd_ctl_elem_user_get;
 	if (access & SNDRV_CTL_ELEM_ACCESS_WRITE)
 		kctl.put = snd_ctl_elem_user_put;
+	if (access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE) {
+		kctl.tlv.c = snd_ctl_elem_user_tlv;
+		access |= SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+	}
 	switch (info->type) {
 	case SNDRV_CTL_ELEM_TYPE_BOOLEAN:
 		private_size = sizeof(char);
@@ -997,6 +1050,7 @@
 	if (ue == NULL)
 		return -ENOMEM;
 	ue->info = *info;
+	ue->info.access = 0;
 	ue->elem_data = (char *)ue + sizeof(*ue);
 	ue->elem_data_size = private_size;
 	kctl.private_free = snd_ctl_elem_user_free;
@@ -1067,6 +1121,67 @@
 	return 0;
 }
 
+static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file,
+                             struct snd_ctl_tlv __user *_tlv,
+                             int op_flag)
+{
+	struct snd_card *card = file->card;
+	struct snd_ctl_tlv tlv;
+	struct snd_kcontrol *kctl;
+	struct snd_kcontrol_volatile *vd;
+	unsigned int len;
+	int err = 0;
+
+	if (copy_from_user(&tlv, _tlv, sizeof(tlv)))
+		return -EFAULT;
+	if (tlv.length < sizeof(unsigned int) * 3)
+		return -EINVAL;
+	down_read(&card->controls_rwsem);
+	kctl = snd_ctl_find_numid(card, tlv.numid);
+	if (kctl == NULL) {
+		err = -ENOENT;
+		goto __kctl_end;
+	}
+	if (kctl->tlv.p == NULL) {
+		err = -ENXIO;
+		goto __kctl_end;
+	}
+	vd = &kctl->vd[tlv.numid - kctl->id.numid];
+	if ((op_flag == 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) == 0) ||
+	    (op_flag > 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) == 0) ||
+	    (op_flag < 0 && (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND) == 0)) {
+	    	err = -ENXIO;
+	    	goto __kctl_end;
+	}
+	if (vd->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) {
+		if (file && vd->owner != NULL && vd->owner != file) {
+			err = -EPERM;
+			goto __kctl_end;
+		}
+		err = kctl->tlv.c(kctl, op_flag, tlv.length, _tlv->tlv); 
+		if (err > 0) {
+			up_read(&card->controls_rwsem);
+			snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_TLV, &kctl->id);
+			return 0;
+		}
+	} else {
+		if (op_flag) {
+			err = -ENXIO;
+			goto __kctl_end;
+		}
+		len = kctl->tlv.p[1] + 2 * sizeof(unsigned int);
+		if (tlv.length < len) {
+			err = -ENOMEM;
+			goto __kctl_end;
+		}
+		if (copy_to_user(_tlv->tlv, kctl->tlv.p, len))
+			err = -EFAULT;
+	}
+      __kctl_end:
+	up_read(&card->controls_rwsem);
+	return err;
+}
+
 static long snd_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct snd_ctl_file *ctl;
@@ -1086,11 +1201,11 @@
 	case SNDRV_CTL_IOCTL_CARD_INFO:
 		return snd_ctl_card_info(card, ctl, cmd, argp);
 	case SNDRV_CTL_IOCTL_ELEM_LIST:
-		return snd_ctl_elem_list(ctl->card, argp);
+		return snd_ctl_elem_list(card, argp);
 	case SNDRV_CTL_IOCTL_ELEM_INFO:
 		return snd_ctl_elem_info_user(ctl, argp);
 	case SNDRV_CTL_IOCTL_ELEM_READ:
-		return snd_ctl_elem_read_user(ctl->card, argp);
+		return snd_ctl_elem_read_user(card, argp);
 	case SNDRV_CTL_IOCTL_ELEM_WRITE:
 		return snd_ctl_elem_write_user(ctl, argp);
 	case SNDRV_CTL_IOCTL_ELEM_LOCK:
@@ -1105,6 +1220,12 @@
 		return snd_ctl_elem_remove(ctl, argp);
 	case SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS:
 		return snd_ctl_subscribe_events(ctl, ip);
+	case SNDRV_CTL_IOCTL_TLV_READ:
+		return snd_ctl_tlv_ioctl(ctl, argp, 0);
+	case SNDRV_CTL_IOCTL_TLV_WRITE:
+		return snd_ctl_tlv_ioctl(ctl, argp, 1);
+	case SNDRV_CTL_IOCTL_TLV_COMMAND:
+		return snd_ctl_tlv_ioctl(ctl, argp, -1);
 	case SNDRV_CTL_IOCTL_POWER:
 		return -ENOPROTOOPT;
 	case SNDRV_CTL_IOCTL_POWER_STATE:
@@ -1338,6 +1459,11 @@
 	struct snd_card *card = device->device_data;
 	struct list_head *flist;
 	struct snd_ctl_file *ctl;
+	int err, cardnum;
+
+	snd_assert(card != NULL, return -ENXIO);
+	cardnum = card->number;
+	snd_assert(cardnum >= 0 && cardnum < SNDRV_CARDS, return -ENXIO);
 
 	down_read(&card->controls_rwsem);
 	list_for_each(flist, &card->ctl_files) {
@@ -1346,6 +1472,10 @@
 		kill_fasync(&ctl->fasync, SIGIO, POLL_ERR);
 	}
 	up_read(&card->controls_rwsem);
+
+	if ((err = snd_unregister_device(SNDRV_DEVICE_TYPE_CONTROL,
+					 card, -1)) < 0)
+		return err;
 	return 0;
 }
 
@@ -1367,23 +1497,6 @@
 }
 
 /*
- * de-registration of the control device
- */
-static int snd_ctl_dev_unregister(struct snd_device *device)
-{
-	struct snd_card *card = device->device_data;
-	int err, cardnum;
-
-	snd_assert(card != NULL, return -ENXIO);
-	cardnum = card->number;
-	snd_assert(cardnum >= 0 && cardnum < SNDRV_CARDS, return -ENXIO);
-	if ((err = snd_unregister_device(SNDRV_DEVICE_TYPE_CONTROL,
-					 card, -1)) < 0)
-		return err;
-	return snd_ctl_dev_free(device);
-}
-
-/*
  * create control core:
  * called from init.c
  */
@@ -1393,7 +1506,6 @@
 		.dev_free = snd_ctl_dev_free,
 		.dev_register =	snd_ctl_dev_register,
 		.dev_disconnect = snd_ctl_dev_disconnect,
-		.dev_unregister = snd_ctl_dev_unregister
 	};
 
 	snd_assert(card != NULL, return -ENXIO);
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index 3c0161b..ab48962 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -407,6 +407,10 @@
 	case SNDRV_CTL_IOCTL_POWER_STATE:
 	case SNDRV_CTL_IOCTL_ELEM_LOCK:
 	case SNDRV_CTL_IOCTL_ELEM_UNLOCK:
+	case SNDRV_CTL_IOCTL_ELEM_REMOVE:
+	case SNDRV_CTL_IOCTL_TLV_READ:
+	case SNDRV_CTL_IOCTL_TLV_WRITE:
+	case SNDRV_CTL_IOCTL_TLV_COMMAND:
 		return snd_ctl_ioctl(file, cmd, (unsigned long)argp);
 	case SNDRV_CTL_IOCTL_ELEM_LIST32:
 		return snd_ctl_elem_list_compat(ctl->card, argp);
diff --git a/sound/core/device.c b/sound/core/device.c
index 6ce4da4..ccb2581 100644
--- a/sound/core/device.c
+++ b/sound/core/device.c
@@ -71,7 +71,7 @@
  * @device_data: the data pointer to release
  *
  * Removes the device from the list on the card and invokes the
- * callback, dev_unregister or dev_free, corresponding to the state.
+ * callbacks, dev_disconnect and dev_free, corresponding to the state.
  * Then release the device.
  *
  * Returns zero if successful, or a negative error code on failure or if the
@@ -90,16 +90,14 @@
 			continue;
 		/* unlink */
 		list_del(&dev->list);
-		if ((dev->state == SNDRV_DEV_REGISTERED ||
-		     dev->state == SNDRV_DEV_DISCONNECTED) &&
-		    dev->ops->dev_unregister) {
-			if (dev->ops->dev_unregister(dev))
-				snd_printk(KERN_ERR "device unregister failure\n");
-		} else {
-			if (dev->ops->dev_free) {
-				if (dev->ops->dev_free(dev))
-					snd_printk(KERN_ERR "device free failure\n");
-			}
+		if (dev->state == SNDRV_DEV_REGISTERED &&
+		    dev->ops->dev_disconnect)
+			if (dev->ops->dev_disconnect(dev))
+				snd_printk(KERN_ERR
+					   "device disconnect failure\n");
+		if (dev->ops->dev_free) {
+			if (dev->ops->dev_free(dev))
+				snd_printk(KERN_ERR "device free failure\n");
 		}
 		kfree(dev);
 		return 0;
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 8bd0dcc..9aa9d94 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -42,7 +42,7 @@
 static int snd_hwdep_free(struct snd_hwdep *hwdep);
 static int snd_hwdep_dev_free(struct snd_device *device);
 static int snd_hwdep_dev_register(struct snd_device *device);
-static int snd_hwdep_dev_unregister(struct snd_device *device);
+static int snd_hwdep_dev_disconnect(struct snd_device *device);
 
 
 static struct snd_hwdep *snd_hwdep_search(struct snd_card *card, int device)
@@ -353,7 +353,7 @@
 	static struct snd_device_ops ops = {
 		.dev_free = snd_hwdep_dev_free,
 		.dev_register = snd_hwdep_dev_register,
-		.dev_unregister = snd_hwdep_dev_unregister
+		.dev_disconnect = snd_hwdep_dev_disconnect,
 	};
 
 	snd_assert(rhwdep != NULL, return -EINVAL);
@@ -439,7 +439,7 @@
 	return 0;
 }
 
-static int snd_hwdep_dev_unregister(struct snd_device *device)
+static int snd_hwdep_dev_disconnect(struct snd_device *device)
 {
 	struct snd_hwdep *hwdep = device->device_data;
 
@@ -454,9 +454,9 @@
 		snd_unregister_oss_device(hwdep->oss_type, hwdep->card, hwdep->device);
 #endif
 	snd_unregister_device(SNDRV_DEVICE_TYPE_HWDEP, hwdep->card, hwdep->device);
-	list_del(&hwdep->list);
+	list_del_init(&hwdep->list);
 	mutex_unlock(&register_mutex);
-	return snd_hwdep_free(hwdep);
+	return 0;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -497,7 +497,7 @@
 
 static void __exit snd_hwdep_proc_done(void)
 {
-	snd_info_unregister(snd_hwdep_proc_entry);
+	snd_info_free_entry(snd_hwdep_proc_entry);
 }
 #else /* !CONFIG_PROC_FS */
 #define snd_hwdep_proc_init()
diff --git a/sound/core/info.c b/sound/core/info.c
index 340332c..e43662b 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -78,6 +78,7 @@
 
 static int snd_info_version_init(void);
 static int snd_info_version_done(void);
+static void snd_info_disconnect(struct snd_info_entry *entry);
 
 
 /* resize the proc r/w buffer */
@@ -174,15 +175,15 @@
 	switch (entry->content) {
 	case SNDRV_INFO_CONTENT_TEXT:
 		switch (orig) {
-		case 0:	/* SEEK_SET */
+		case SEEK_SET:
 			file->f_pos = offset;
 			ret = file->f_pos;
 			goto out;
-		case 1:	/* SEEK_CUR */
+		case SEEK_CUR:
 			file->f_pos += offset;
 			ret = file->f_pos;
 			goto out;
-		case 2:	/* SEEK_END */
+		case SEEK_END:
 		default:
 			ret = -EINVAL;
 			goto out;
@@ -304,7 +305,7 @@
 	mutex_lock(&info_mutex);
 	p = PDE(inode);
 	entry = p == NULL ? NULL : (struct snd_info_entry *)p->data;
-	if (entry == NULL || entry->disconnected) {
+	if (entry == NULL || ! entry->p) {
 		mutex_unlock(&info_mutex);
 		return -ENODEV;
 	}
@@ -586,10 +587,10 @@
 	snd_info_version_done();
 	if (snd_proc_root) {
 #if defined(CONFIG_SND_SEQUENCER) || defined(CONFIG_SND_SEQUENCER_MODULE)
-		snd_info_unregister(snd_seq_root);
+		snd_info_free_entry(snd_seq_root);
 #endif
 #ifdef CONFIG_SND_OSSEMUL
-		snd_info_unregister(snd_oss_root);
+		snd_info_free_entry(snd_oss_root);
 #endif
 		snd_remove_proc_entry(&proc_root, snd_proc_root);
 	}
@@ -648,17 +649,28 @@
  * de-register the card proc file
  * called from init.c
  */
-int snd_info_card_free(struct snd_card *card)
+void snd_info_card_disconnect(struct snd_card *card)
 {
-	snd_assert(card != NULL, return -ENXIO);
+	snd_assert(card != NULL, return);
+	mutex_lock(&info_mutex);
 	if (card->proc_root_link) {
 		snd_remove_proc_entry(snd_proc_root, card->proc_root_link);
 		card->proc_root_link = NULL;
 	}
-	if (card->proc_root) {
-		snd_info_unregister(card->proc_root);
-		card->proc_root = NULL;
-	}
+	if (card->proc_root)
+		snd_info_disconnect(card->proc_root);
+	mutex_unlock(&info_mutex);
+}
+
+/*
+ * release the card proc file resources
+ * called from init.c
+ */
+int snd_info_card_free(struct snd_card *card)
+{
+	snd_assert(card != NULL, return -ENXIO);
+	snd_info_free_entry(card->proc_root);
+	card->proc_root = NULL;
 	return 0;
 }
 
@@ -767,6 +779,8 @@
 	entry->mode = S_IFREG | S_IRUGO;
 	entry->content = SNDRV_INFO_CONTENT_TEXT;
 	mutex_init(&entry->access);
+	INIT_LIST_HEAD(&entry->children);
+	INIT_LIST_HEAD(&entry->list);
 	return entry;
 }
 
@@ -819,6 +833,24 @@
 
 EXPORT_SYMBOL(snd_info_create_card_entry);
 
+static void snd_info_disconnect(struct snd_info_entry *entry)
+{
+	struct list_head *p, *n;
+	struct proc_dir_entry *root;
+
+	list_for_each_safe(p, n, &entry->children) {
+		snd_info_disconnect(list_entry(p, struct snd_info_entry, list));
+	}
+
+	if (! entry->p)
+		return;
+	list_del_init(&entry->list);
+	root = entry->parent == NULL ? snd_proc_root : entry->parent->p;
+	snd_assert(root, return);
+	snd_remove_proc_entry(root, entry->p);
+	entry->p = NULL;
+}
+
 static int snd_info_dev_free_entry(struct snd_device *device)
 {
 	struct snd_info_entry *entry = device->device_data;
@@ -832,19 +864,6 @@
 	return snd_info_register(entry);
 }
 
-static int snd_info_dev_disconnect_entry(struct snd_device *device)
-{
-	struct snd_info_entry *entry = device->device_data;
-	entry->disconnected = 1;
-	return 0;
-}
-
-static int snd_info_dev_unregister_entry(struct snd_device *device)
-{
-	struct snd_info_entry *entry = device->device_data;
-	return snd_info_unregister(entry);
-}
-
 /**
  * snd_card_proc_new - create an info entry for the given card
  * @card: the card instance
@@ -871,8 +890,7 @@
 	static struct snd_device_ops ops = {
 		.dev_free = snd_info_dev_free_entry,
 		.dev_register =	snd_info_dev_register_entry,
-		.dev_disconnect = snd_info_dev_disconnect_entry,
-		.dev_unregister = snd_info_dev_unregister_entry
+		/* disconnect is done via snd_info_card_disconnect() */
 	};
 	struct snd_info_entry *entry;
 	int err;
@@ -901,6 +919,11 @@
 {
 	if (entry == NULL)
 		return;
+	if (entry->p) {
+		mutex_lock(&info_mutex);
+		snd_info_disconnect(entry);
+		mutex_unlock(&info_mutex);
+	}
 	kfree(entry->name);
 	if (entry->private_free)
 		entry->private_free(entry);
@@ -935,38 +958,14 @@
 	p->size = entry->size;
 	p->data = entry;
 	entry->p = p;
+	if (entry->parent)
+		list_add_tail(&entry->list, &entry->parent->children);
 	mutex_unlock(&info_mutex);
 	return 0;
 }
 
 EXPORT_SYMBOL(snd_info_register);
 
-/**
- * snd_info_unregister - de-register the info entry
- * @entry: the info entry
- *
- * De-registers the info entry and releases the instance.
- *
- * Returns zero if successful, or a negative error code on failure.
- */
-int snd_info_unregister(struct snd_info_entry * entry)
-{
-	struct proc_dir_entry *root;
-
-	if (! entry)
-		return 0;
-	snd_assert(entry->p != NULL, return -ENXIO);
-	root = entry->parent == NULL ? snd_proc_root : entry->parent->p;
-	snd_assert(root, return -ENXIO);
-	mutex_lock(&info_mutex);
-	snd_remove_proc_entry(root, entry->p);
-	mutex_unlock(&info_mutex);
-	snd_info_free_entry(entry);
-	return 0;
-}
-
-EXPORT_SYMBOL(snd_info_unregister);
-
 /*
 
  */
@@ -999,8 +998,7 @@
 
 static int __exit snd_info_version_done(void)
 {
-	if (snd_info_version_entry)
-		snd_info_unregister(snd_info_version_entry);
+	snd_info_free_entry(snd_info_version_entry);
 	return 0;
 }
 
diff --git a/sound/core/info_oss.c b/sound/core/info_oss.c
index bb2c40d..3ebc349 100644
--- a/sound/core/info_oss.c
+++ b/sound/core/info_oss.c
@@ -131,10 +131,8 @@
 
 int snd_info_minor_unregister(void)
 {
-	if (snd_sndstat_proc_entry) {
-		snd_info_unregister(snd_sndstat_proc_entry);
-		snd_sndstat_proc_entry = NULL;
-	}
+	snd_info_free_entry(snd_sndstat_proc_entry);
+	snd_sndstat_proc_entry = NULL;
 	return 0;
 }
 
diff --git a/sound/core/init.c b/sound/core/init.c
index 4d92588..d7607a2 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -81,8 +81,6 @@
 #define init_info_for_card(card)
 #endif
 
-static void snd_card_free_thread(void * __card);
-
 /**
  *  snd_card_new - create and initialize a soundcard structure
  *  @idx: card index (address) [0 ... (SNDRV_CARDS-1)]
@@ -145,7 +143,6 @@
 	INIT_LIST_HEAD(&card->ctl_files);
 	spin_lock_init(&card->files_lock);
 	init_waitqueue_head(&card->shutdown_sleep);
-	INIT_WORK(&card->free_workq, snd_card_free_thread, card);
 #ifdef CONFIG_PM
 	mutex_init(&card->power_lock);
 	init_waitqueue_head(&card->power_sleep);
@@ -310,6 +307,7 @@
 	if (err < 0)
 		snd_printk(KERN_ERR "not all devices for card %i can be disconnected\n", card->number);
 
+	snd_info_card_disconnect(card);
 	return 0;	
 }
 
@@ -326,22 +324,10 @@
  *  Returns zero. Frees all associated devices and frees the control
  *  interface associated to given soundcard.
  */
-int snd_card_free(struct snd_card *card)
+static int snd_card_do_free(struct snd_card *card)
 {
 	struct snd_shutdown_f_ops *s_f_ops;
 
-	if (card == NULL)
-		return -EINVAL;
-	mutex_lock(&snd_card_mutex);
-	snd_cards[card->number] = NULL;
-	mutex_unlock(&snd_card_mutex);
-
-#ifdef CONFIG_PM
-	wake_up(&card->power_sleep);
-#endif
-	/* wait, until all devices are ready for the free operation */
-	wait_event(card->shutdown_sleep, card->files == NULL);
-
 #if defined(CONFIG_SND_MIXER_OSS) || defined(CONFIG_SND_MIXER_OSS_MODULE)
 	if (snd_mixer_oss_notify_callback)
 		snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_FREE);
@@ -360,7 +346,7 @@
 	}
 	if (card->private_free)
 		card->private_free(card);
-	snd_info_unregister(card->proc_id);
+	snd_info_free_entry(card->proc_id);
 	if (snd_info_card_free(card) < 0) {
 		snd_printk(KERN_WARNING "unable to free card info\n");
 		/* Not fatal error */
@@ -370,62 +356,60 @@
 		card->s_f_ops = s_f_ops->next;
 		kfree(s_f_ops);
 	}
-	mutex_lock(&snd_card_mutex);
-	snd_cards_lock &= ~(1 << card->number);
-	mutex_unlock(&snd_card_mutex);
 	kfree(card);
 	return 0;
 }
 
+static int snd_card_free_prepare(struct snd_card *card)
+{
+	if (card == NULL)
+		return -EINVAL;
+	(void) snd_card_disconnect(card);
+	mutex_lock(&snd_card_mutex);
+	snd_cards[card->number] = NULL;
+	snd_cards_lock &= ~(1 << card->number);
+	mutex_unlock(&snd_card_mutex);
+#ifdef CONFIG_PM
+	wake_up(&card->power_sleep);
+#endif
+	return 0;
+}
+
+int snd_card_free_when_closed(struct snd_card *card)
+{
+	int free_now = 0;
+	int ret = snd_card_free_prepare(card);
+	if (ret)
+		return ret;
+
+	spin_lock(&card->files_lock);
+	if (card->files == NULL)
+		free_now = 1;
+	else
+		card->free_on_last_close = 1;
+	spin_unlock(&card->files_lock);
+
+	if (free_now)
+		snd_card_do_free(card);
+	return 0;
+}
+
+EXPORT_SYMBOL(snd_card_free_when_closed);
+
+int snd_card_free(struct snd_card *card)
+{
+	int ret = snd_card_free_prepare(card);
+	if (ret)
+		return ret;
+
+	/* wait, until all devices are ready for the free operation */
+	wait_event(card->shutdown_sleep, card->files == NULL);
+	snd_card_do_free(card);
+	return 0;
+}
+
 EXPORT_SYMBOL(snd_card_free);
 
-static void snd_card_free_thread(void * __card)
-{
-	struct snd_card *card = __card;
-	struct module * module = card->module;
-
-	if (!try_module_get(module)) {
-		snd_printk(KERN_ERR "unable to lock toplevel module for card %i in free thread\n", card->number);
-		module = NULL;
-	}
-
-	snd_card_free(card);
-
-	module_put(module);
-}
-
-/**
- *  snd_card_free_in_thread - call snd_card_free() in thread
- *  @card: soundcard structure
- *
- *  This function schedules the call of snd_card_free() function in a
- *  work queue.  When all devices are released (non-busy), the work
- *  is woken up and calls snd_card_free().
- *
- *  When a card can be disconnected at any time by hotplug service,
- *  this function should be used in disconnect (or detach) callback
- *  instead of calling snd_card_free() directly.
- *  
- *  Returns - zero otherwise a negative error code if the start of thread failed.
- */
-int snd_card_free_in_thread(struct snd_card *card)
-{
-	if (card->files == NULL) {
-		snd_card_free(card);
-		return 0;
-	}
-
-	if (schedule_work(&card->free_workq))
-		return 0;
-
-	snd_printk(KERN_ERR "schedule_work() failed in snd_card_free_in_thread for card %i\n", card->number);
-	/* try to free the structure immediately */
-	snd_card_free(card);
-	return -EFAULT;
-}
-
-EXPORT_SYMBOL(snd_card_free_in_thread);
-
 static void choose_default_id(struct snd_card *card)
 {
 	int i, len, idx_flag = 0, loops = SNDRV_CARDS;
@@ -625,9 +609,9 @@
 
 int __exit snd_card_info_done(void)
 {
-	snd_info_unregister(snd_card_info_entry);
+	snd_info_free_entry(snd_card_info_entry);
 #ifdef MODULE
-	snd_info_unregister(snd_card_module_info_entry);
+	snd_info_free_entry(snd_card_module_info_entry);
 #endif
 	return 0;
 }
@@ -708,15 +692,16 @@
  *
  *  This function removes the file formerly added to the card via
  *  snd_card_file_add() function.
- *  If all files are removed and the release of the card is
- *  scheduled, it will wake up the the thread to call snd_card_free()
- *  (see snd_card_free_in_thread() function).
+ *  If all files are removed and snd_card_free_when_closed() was
+ *  called beforehand, it processes the pending release of
+ *  resources.
  *
  *  Returns zero or a negative error code.
  */
 int snd_card_file_remove(struct snd_card *card, struct file *file)
 {
 	struct snd_monitor_file *mfile, *pfile = NULL;
+	int last_close = 0;
 
 	spin_lock(&card->files_lock);
 	mfile = card->files;
@@ -731,9 +716,14 @@
 		pfile = mfile;
 		mfile = mfile->next;
 	}
-	spin_unlock(&card->files_lock);
 	if (card->files == NULL)
+		last_close = 1;
+	spin_unlock(&card->files_lock);
+	if (last_close) {
 		wake_up(&card->shutdown_sleep);
+		if (card->free_on_last_close)
+			snd_card_do_free(card);
+	}
 	if (!mfile) {
 		snd_printk(KERN_ERR "ALSA card file remove problem (%p)\n", file);
 		return -ENOENT;
diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
index 75a9505..f4c6704 100644
--- a/sound/core/oss/mixer_oss.c
+++ b/sound/core/oss/mixer_oss.c
@@ -1193,10 +1193,8 @@
 
 static void snd_mixer_oss_proc_done(struct snd_mixer_oss *mixer)
 {
-	if (mixer->proc_entry) {
-		snd_info_unregister(mixer->proc_entry);
-		mixer->proc_entry = NULL;
-	}
+	snd_info_free_entry(mixer->proc_entry);
+	mixer->proc_entry = NULL;
 }
 #else /* !CONFIG_PROC_FS */
 #define snd_mixer_oss_proc_init(mix)
@@ -1312,21 +1310,19 @@
 		card->mixer_oss = mixer;
 		snd_mixer_oss_build(mixer);
 		snd_mixer_oss_proc_init(mixer);
-	} else if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT) {
-		mixer = card->mixer_oss;
-		if (mixer == NULL || !mixer->oss_dev_alloc)
-			return 0;
-		snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0);
-		mixer->oss_dev_alloc = 0;
-	} else {		/* free */
+	} else {
 		mixer = card->mixer_oss;
 		if (mixer == NULL)
 			return 0;
+		if (mixer->oss_dev_alloc) {
 #ifdef SNDRV_OSS_INFO_DEV_MIXERS
-		snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number);
+			snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_MIXERS, mixer->card->number);
 #endif
-		if (mixer->oss_dev_alloc)
 			snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_MIXER, mixer->card, 0);
+			mixer->oss_dev_alloc = 0;
+		}
+		if (cmd == SND_MIXER_OSS_NOTIFY_DISCONNECT)
+			return 0;
 		snd_mixer_oss_proc_done(mixer);
 		return snd_mixer_oss_free1(mixer);
 	}
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 472fce0..505b23e 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -2846,11 +2846,9 @@
 	int stream;
 	for (stream = 0; stream < 2; ++stream) {
 		struct snd_pcm_str *pstr = &pcm->streams[stream];
-		if (pstr->oss.proc_entry) {
-			snd_info_unregister(pstr->oss.proc_entry);
-			pstr->oss.proc_entry = NULL;
-			snd_pcm_oss_proc_free_setup_list(pstr);
-		}
+		snd_info_free_entry(pstr->oss.proc_entry);
+		pstr->oss.proc_entry = NULL;
+		snd_pcm_oss_proc_free_setup_list(pstr);
 	}
 }
 #else /* !CONFIG_SND_VERBOSE_PROCFS */
@@ -2931,6 +2929,12 @@
 			snd_unregister_oss_device(SNDRV_OSS_DEVICE_TYPE_PCM,
 						  pcm->card, 1);
 		}
+		if (dsp_map[pcm->card->number] == (int)pcm->device) {
+#ifdef SNDRV_OSS_INFO_DEV_AUDIO
+			snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number);
+#endif
+		}
+		pcm->oss.reg = 0;
 	}
 	return 0;
 }
@@ -2938,15 +2942,7 @@
 static int snd_pcm_oss_unregister_minor(struct snd_pcm *pcm)
 {
 	snd_pcm_oss_disconnect_minor(pcm);
-	if (pcm->oss.reg) {
-		if (dsp_map[pcm->card->number] == (int)pcm->device) {
-#ifdef SNDRV_OSS_INFO_DEV_AUDIO
-			snd_oss_info_unregister(SNDRV_OSS_INFO_DEV_AUDIO, pcm->card->number);
-#endif
-		}
-		pcm->oss.reg = 0;
-		snd_pcm_oss_proc_done(pcm);
-	}
+	snd_pcm_oss_proc_done(pcm);
 	return 0;
 }
 
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 7581edd..bf8f412 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -42,7 +42,6 @@
 static int snd_pcm_dev_free(struct snd_device *device);
 static int snd_pcm_dev_register(struct snd_device *device);
 static int snd_pcm_dev_disconnect(struct snd_device *device);
-static int snd_pcm_dev_unregister(struct snd_device *device);
 
 static struct snd_pcm *snd_pcm_search(struct snd_card *card, int device)
 {
@@ -494,19 +493,13 @@
 static int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr)
 {
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
-	if (pstr->proc_xrun_debug_entry) {
-		snd_info_unregister(pstr->proc_xrun_debug_entry);
-		pstr->proc_xrun_debug_entry = NULL;
-	}
+	snd_info_free_entry(pstr->proc_xrun_debug_entry);
+	pstr->proc_xrun_debug_entry = NULL;
 #endif
-	if (pstr->proc_info_entry) {
-		snd_info_unregister(pstr->proc_info_entry);
-		pstr->proc_info_entry = NULL;
-	}
-	if (pstr->proc_root) {
-		snd_info_unregister(pstr->proc_root);
-		pstr->proc_root = NULL;
-	}
+	snd_info_free_entry(pstr->proc_info_entry);
+	pstr->proc_info_entry = NULL;
+	snd_info_free_entry(pstr->proc_root);
+	pstr->proc_root = NULL;
 	return 0;
 }
 
@@ -570,29 +563,19 @@
 
 	return 0;
 }
-		
+
 static int snd_pcm_substream_proc_done(struct snd_pcm_substream *substream)
 {
-	if (substream->proc_info_entry) {
-		snd_info_unregister(substream->proc_info_entry);
-		substream->proc_info_entry = NULL;
-	}
-	if (substream->proc_hw_params_entry) {
-		snd_info_unregister(substream->proc_hw_params_entry);
-		substream->proc_hw_params_entry = NULL;
-	}
-	if (substream->proc_sw_params_entry) {
-		snd_info_unregister(substream->proc_sw_params_entry);
-		substream->proc_sw_params_entry = NULL;
-	}
-	if (substream->proc_status_entry) {
-		snd_info_unregister(substream->proc_status_entry);
-		substream->proc_status_entry = NULL;
-	}
-	if (substream->proc_root) {
-		snd_info_unregister(substream->proc_root);
-		substream->proc_root = NULL;
-	}
+	snd_info_free_entry(substream->proc_info_entry);
+	substream->proc_info_entry = NULL;
+	snd_info_free_entry(substream->proc_hw_params_entry);
+	substream->proc_hw_params_entry = NULL;
+	snd_info_free_entry(substream->proc_sw_params_entry);
+	substream->proc_sw_params_entry = NULL;
+	snd_info_free_entry(substream->proc_status_entry);
+	substream->proc_status_entry = NULL;
+	snd_info_free_entry(substream->proc_root);
+	substream->proc_root = NULL;
 	return 0;
 }
 #else /* !CONFIG_SND_VERBOSE_PROCFS */
@@ -696,7 +679,6 @@
 		.dev_free = snd_pcm_dev_free,
 		.dev_register =	snd_pcm_dev_register,
 		.dev_disconnect = snd_pcm_dev_disconnect,
-		.dev_unregister = snd_pcm_dev_unregister
 	};
 
 	snd_assert(rpcm != NULL, return -EINVAL);
@@ -740,6 +722,7 @@
 	substream = pstr->substream;
 	while (substream) {
 		substream_next = substream->next;
+		snd_pcm_timer_done(substream);
 		snd_pcm_substream_proc_done(substream);
 		kfree(substream);
 		substream = substream_next;
@@ -756,7 +739,12 @@
 
 static int snd_pcm_free(struct snd_pcm *pcm)
 {
+	struct snd_pcm_notify *notify;
+
 	snd_assert(pcm != NULL, return -ENXIO);
+	list_for_each_entry(notify, &snd_pcm_notify_list, list) {
+		notify->n_unregister(pcm);
+	}
 	if (pcm->private_free)
 		pcm->private_free(pcm);
 	snd_pcm_lib_preallocate_free_for_all(pcm);
@@ -804,7 +792,8 @@
 		kctl = snd_ctl_file(list);
 		if (kctl->pid == current->pid) {
 			prefer_subdevice = kctl->prefer_pcm_subdevice;
-			break;
+			if (prefer_subdevice != -1)
+				break;
 		}
 	}
 	up_read(&card->controls_rwsem);
@@ -918,6 +907,28 @@
 	substream->pstr->substream_opened--;
 }
 
+static ssize_t show_pcm_class(struct class_device *class_device, char *buf)
+{
+	struct snd_pcm *pcm;
+	const char *str;
+	static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = {
+		[SNDRV_PCM_CLASS_GENERIC] = "generic",
+		[SNDRV_PCM_CLASS_MULTI] = "multi",
+		[SNDRV_PCM_CLASS_MODEM] = "modem",
+		[SNDRV_PCM_CLASS_DIGITIZER] = "digitizer",
+	};
+
+	if (! (pcm = class_get_devdata(class_device)) ||
+	    pcm->dev_class > SNDRV_PCM_CLASS_LAST)
+		str = "none";
+	else
+		str = strs[pcm->dev_class];
+        return snprintf(buf, PAGE_SIZE, "%s\n", str);
+}
+
+static struct class_device_attribute pcm_attrs =
+	__ATTR(pcm_class, S_IRUGO, show_pcm_class, NULL);
+
 static int snd_pcm_dev_register(struct snd_device *device)
 {
 	int cidx, err;
@@ -956,6 +967,8 @@
 			mutex_unlock(&register_mutex);
 			return err;
 		}
+		snd_add_device_sysfs_file(devtype, pcm->card, pcm->device,
+					  &pcm_attrs);
 		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
 			snd_pcm_timer_init(substream);
 	}
@@ -971,35 +984,22 @@
 static int snd_pcm_dev_disconnect(struct snd_device *device)
 {
 	struct snd_pcm *pcm = device->device_data;
-	struct list_head *list;
+	struct snd_pcm_notify *notify;
 	struct snd_pcm_substream *substream;
-	int cidx;
+	int cidx, devtype;
 
 	mutex_lock(&register_mutex);
+	if (list_empty(&pcm->list))
+		goto unlock;
+
 	list_del_init(&pcm->list);
 	for (cidx = 0; cidx < 2; cidx++)
 		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
 			if (substream->runtime)
 				substream->runtime->status->state = SNDRV_PCM_STATE_DISCONNECTED;
-	list_for_each(list, &snd_pcm_notify_list) {
-		struct snd_pcm_notify *notify;
-		notify = list_entry(list, struct snd_pcm_notify, list);
+	list_for_each_entry(notify, &snd_pcm_notify_list, list) {
 		notify->n_disconnect(pcm);
 	}
-	mutex_unlock(&register_mutex);
-	return 0;
-}
-
-static int snd_pcm_dev_unregister(struct snd_device *device)
-{
-	int cidx, devtype;
-	struct snd_pcm_substream *substream;
-	struct list_head *list;
-	struct snd_pcm *pcm = device->device_data;
-
-	snd_assert(pcm != NULL, return -ENXIO);
-	mutex_lock(&register_mutex);
-	list_del(&pcm->list);
 	for (cidx = 0; cidx < 2; cidx++) {
 		devtype = -1;
 		switch (cidx) {
@@ -1011,23 +1011,20 @@
 			break;
 		}
 		snd_unregister_device(devtype, pcm->card, pcm->device);
-		for (substream = pcm->streams[cidx].substream; substream; substream = substream->next)
-			snd_pcm_timer_done(substream);
 	}
-	list_for_each(list, &snd_pcm_notify_list) {
-		struct snd_pcm_notify *notify;
-		notify = list_entry(list, struct snd_pcm_notify, list);
-		notify->n_unregister(pcm);
-	}
+ unlock:
 	mutex_unlock(&register_mutex);
-	return snd_pcm_free(pcm);
+	return 0;
 }
 
 int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree)
 {
 	struct list_head *p;
 
-	snd_assert(notify != NULL && notify->n_register != NULL && notify->n_unregister != NULL, return -EINVAL);
+	snd_assert(notify != NULL &&
+		   notify->n_register != NULL &&
+		   notify->n_unregister != NULL &&
+		   notify->n_disconnect, return -EINVAL);
 	mutex_lock(&register_mutex);
 	if (nfree) {
 		list_del(&notify->list);
@@ -1090,8 +1087,7 @@
 
 static void snd_pcm_proc_done(void)
 {
-	if (snd_pcm_proc_entry)
-		snd_info_unregister(snd_pcm_proc_entry);
+	snd_info_free_entry(snd_pcm_proc_entry);
 }
 
 #else /* !CONFIG_PROC_FS */
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 2b8aab6..2b53979 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -478,7 +478,7 @@
 	 * mmap of PCM status/control records because of the size
 	 * incompatibility.
 	 */
-	substream->no_mmap_ctrl = 1;
+	pcm_file->no_compat_mmap = 1;
 
 	switch (cmd) {
 	case SNDRV_PCM_IOCTL_PVERSION:
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index 067d205..be030cb 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -101,7 +101,7 @@
 {
 	snd_pcm_lib_preallocate_dma_free(substream);
 #ifdef CONFIG_SND_VERBOSE_PROCFS
-	snd_info_unregister(substream->proc_prealloc_entry);
+	snd_info_free_entry(substream->proc_prealloc_entry);
 	substream->proc_prealloc_entry = NULL;
 #endif
 	return 0;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 439f047..0224c70 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1992,35 +1992,9 @@
 	return 0;
 }
 
-static void snd_pcm_add_file(struct snd_pcm_str *str,
-			     struct snd_pcm_file *pcm_file)
-{
-	pcm_file->next = str->files;
-	str->files = pcm_file;
-}
-
-static void snd_pcm_remove_file(struct snd_pcm_str *str,
-				struct snd_pcm_file *pcm_file)
-{
-	struct snd_pcm_file * pcm_file1;
-	if (str->files == pcm_file) {
-		str->files = pcm_file->next;
-	} else {
-		pcm_file1 = str->files;
-		while (pcm_file1 && pcm_file1->next != pcm_file)
-			pcm_file1 = pcm_file1->next;
-		if (pcm_file1 != NULL)
-			pcm_file1->next = pcm_file->next;
-	}
-}
-
 static void pcm_release_private(struct snd_pcm_substream *substream)
 {
-	struct snd_pcm_file *pcm_file = substream->file;
-
 	snd_pcm_unlink(substream);
-	snd_pcm_remove_file(substream->pstr, pcm_file);
-	kfree(pcm_file);
 }
 
 void snd_pcm_release_substream(struct snd_pcm_substream *substream)
@@ -2060,7 +2034,6 @@
 		return 0;
 	}
 
-	substream->no_mmap_ctrl = 0;
 	err = snd_pcm_hw_constraints_init(substream);
 	if (err < 0) {
 		snd_printd("snd_pcm_hw_constraints_init failed\n");
@@ -2105,19 +2078,16 @@
 	if (err < 0)
 		return err;
 
-	if (substream->ref_count > 1)
-		pcm_file = substream->file;
-	else {
-		pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL);
-		if (pcm_file == NULL) {
-			snd_pcm_release_substream(substream);
-			return -ENOMEM;
-		}
+	pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL);
+	if (pcm_file == NULL) {
+		snd_pcm_release_substream(substream);
+		return -ENOMEM;
+	}
+	pcm_file->substream = substream;
+	if (substream->ref_count == 1) {
 		str = substream->pstr;
 		substream->file = pcm_file;
 		substream->pcm_release = pcm_release_private;
-		pcm_file->substream = substream;
-		snd_pcm_add_file(str, pcm_file);
 	}
 	file->private_data = pcm_file;
 	*rpcm_file = pcm_file;
@@ -2209,6 +2179,7 @@
 	fasync_helper(-1, file, 0, &substream->runtime->fasync);
 	mutex_lock(&pcm->open_mutex);
 	snd_pcm_release_substream(substream);
+	kfree(pcm_file);
 	mutex_unlock(&pcm->open_mutex);
 	wake_up(&pcm->open_wait);
 	module_put(pcm->card->module);
@@ -3270,11 +3241,11 @@
 	offset = area->vm_pgoff << PAGE_SHIFT;
 	switch (offset) {
 	case SNDRV_PCM_MMAP_OFFSET_STATUS:
-		if (substream->no_mmap_ctrl)
+		if (pcm_file->no_compat_mmap)
 			return -ENXIO;
 		return snd_pcm_mmap_status(substream, file, area);
 	case SNDRV_PCM_MMAP_OFFSET_CONTROL:
-		if (substream->no_mmap_ctrl)
+		if (pcm_file->no_compat_mmap)
 			return -ENXIO;
 		return snd_pcm_mmap_control(substream, file, area);
 	default:
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 8c15c66..269c467 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -55,7 +55,6 @@
 static int snd_rawmidi_dev_free(struct snd_device *device);
 static int snd_rawmidi_dev_register(struct snd_device *device);
 static int snd_rawmidi_dev_disconnect(struct snd_device *device);
-static int snd_rawmidi_dev_unregister(struct snd_device *device);
 
 static LIST_HEAD(snd_rawmidi_devices);
 static DEFINE_MUTEX(register_mutex);
@@ -431,7 +430,8 @@
 			kctl = snd_ctl_file(list);
 			if (kctl->pid == current->pid) {
 				subdevice = kctl->prefer_rawmidi_subdevice;
-				break;
+				if (subdevice != -1)
+					break;
 			}
 		}
 		up_read(&card->controls_rwsem);
@@ -1426,7 +1426,6 @@
 		.dev_free = snd_rawmidi_dev_free,
 		.dev_register = snd_rawmidi_dev_register,
 		.dev_disconnect = snd_rawmidi_dev_disconnect,
-		.dev_unregister = snd_rawmidi_dev_unregister
 	};
 
 	snd_assert(rrawmidi != NULL, return -EINVAL);
@@ -1479,6 +1478,14 @@
 static int snd_rawmidi_free(struct snd_rawmidi *rmidi)
 {
 	snd_assert(rmidi != NULL, return -ENXIO);	
+
+	snd_info_free_entry(rmidi->proc_entry);
+	rmidi->proc_entry = NULL;
+	mutex_lock(&register_mutex);
+	if (rmidi->ops && rmidi->ops->dev_unregister)
+		rmidi->ops->dev_unregister(rmidi);
+	mutex_unlock(&register_mutex);
+
 	snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]);
 	snd_rawmidi_free_substreams(&rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]);
 	if (rmidi->private_free)
@@ -1587,21 +1594,6 @@
 
 	mutex_lock(&register_mutex);
 	list_del_init(&rmidi->list);
-	mutex_unlock(&register_mutex);
-	return 0;
-}
-
-static int snd_rawmidi_dev_unregister(struct snd_device *device)
-{
-	struct snd_rawmidi *rmidi = device->device_data;
-
-	snd_assert(rmidi != NULL, return -ENXIO);
-	mutex_lock(&register_mutex);
-	list_del(&rmidi->list);
-	if (rmidi->proc_entry) {
-		snd_info_unregister(rmidi->proc_entry);
-		rmidi->proc_entry = NULL;
-	}
 #ifdef CONFIG_SND_OSSEMUL
 	if (rmidi->ossreg) {
 		if ((int)rmidi->device == midi_map[rmidi->card->number]) {
@@ -1615,17 +1607,9 @@
 		rmidi->ossreg = 0;
 	}
 #endif /* CONFIG_SND_OSSEMUL */
-	if (rmidi->ops && rmidi->ops->dev_unregister)
-		rmidi->ops->dev_unregister(rmidi);
 	snd_unregister_device(SNDRV_DEVICE_TYPE_RAWMIDI, rmidi->card, rmidi->device);
 	mutex_unlock(&register_mutex);
-#if defined(CONFIG_SND_SEQUENCER) || (defined(MODULE) && defined(CONFIG_SND_SEQUENCER_MODULE))
-	if (rmidi->seq_dev) {
-		snd_device_free(rmidi->card, rmidi->seq_dev);
-		rmidi->seq_dev = NULL;
-	}
-#endif
-	return snd_rawmidi_free(rmidi);
+	return 0;
 }
 
 /**
diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c
index 84704cc..412dd62 100644
--- a/sound/core/rtctimer.c
+++ b/sound/core/rtctimer.c
@@ -156,7 +156,7 @@
 static void __exit rtctimer_exit(void)
 {
 	if (rtctimer) {
-		snd_timer_global_unregister(rtctimer);
+		snd_timer_global_free(rtctimer);
 		rtctimer = NULL;
 	}
 }
diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c
index e723413..92858cf 100644
--- a/sound/core/seq/oss/seq_oss.c
+++ b/sound/core/seq/oss/seq_oss.c
@@ -303,8 +303,7 @@
 static void
 unregister_proc(void)
 {
-	if (info_entry)
-		snd_info_unregister(info_entry);
+	snd_info_free_entry(info_entry);
 	info_entry = NULL;
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index 102ff54..b79d011 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -90,7 +90,6 @@
 static int snd_seq_device_dev_free(struct snd_device *device);
 static int snd_seq_device_dev_register(struct snd_device *device);
 static int snd_seq_device_dev_disconnect(struct snd_device *device);
-static int snd_seq_device_dev_unregister(struct snd_device *device);
 
 static int init_device(struct snd_seq_device *dev, struct ops_list *ops);
 static int free_device(struct snd_seq_device *dev, struct ops_list *ops);
@@ -189,7 +188,6 @@
 		.dev_free = snd_seq_device_dev_free,
 		.dev_register = snd_seq_device_dev_register,
 		.dev_disconnect = snd_seq_device_dev_disconnect,
-		.dev_unregister = snd_seq_device_dev_unregister
 	};
 
 	if (result)
@@ -309,15 +307,6 @@
 }
 
 /*
- * unregister the existing device
- */
-static int snd_seq_device_dev_unregister(struct snd_device *device)
-{
-	struct snd_seq_device *dev = device->device_data;
-	return snd_seq_device_free(dev);
-}
-
-/*
  * register device driver
  * id = driver id
  * entry = driver operators - duplicated to each instance
@@ -573,7 +562,7 @@
 {
 	remove_drivers();
 #ifdef CONFIG_PROC_FS
-	snd_info_unregister(info_entry);
+	snd_info_free_entry(info_entry);
 #endif
 	if (num_ops)
 		snd_printk(KERN_ERR "drivers not released (%d)\n", num_ops);
diff --git a/sound/core/seq/seq_info.c b/sound/core/seq/seq_info.c
index 142e9e6..8a7fe5c 100644
--- a/sound/core/seq/seq_info.c
+++ b/sound/core/seq/seq_info.c
@@ -64,9 +64,9 @@
 
 int __exit snd_seq_info_done(void)
 {
-	snd_info_unregister(queues_entry);
-	snd_info_unregister(clients_entry);
-	snd_info_unregister(timer_entry);
+	snd_info_free_entry(queues_entry);
+	snd_info_free_entry(clients_entry);
+	snd_info_free_entry(timer_entry);
 	return 0;
 }
 #endif
diff --git a/sound/core/sound.c b/sound/core/sound.c
index 7edd1fc..efa476c 100644
--- a/sound/core/sound.c
+++ b/sound/core/sound.c
@@ -268,7 +268,11 @@
 	snd_minors[minor] = preg;
 	if (card)
 		device = card->dev;
-	class_device_create(sound_class, NULL, MKDEV(major, minor), device, "%s", name);
+	preg->class_dev = class_device_create(sound_class, NULL,
+					      MKDEV(major, minor),
+					      device, "%s", name);
+	if (preg->class_dev)
+		class_set_devdata(preg->class_dev, private_data);
 
 	mutex_unlock(&sound_mutex);
 	return 0;
@@ -276,6 +280,24 @@
 
 EXPORT_SYMBOL(snd_register_device);
 
+/* find the matching minor record
+ * return the index of snd_minor, or -1 if not found
+ */
+static int find_snd_minor(int type, struct snd_card *card, int dev)
+{
+	int cardnum, minor;
+	struct snd_minor *mptr;
+
+	cardnum = card ? card->number : -1;
+	for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor)
+		if ((mptr = snd_minors[minor]) != NULL &&
+		    mptr->type == type &&
+		    mptr->card == cardnum &&
+		    mptr->device == dev)
+			return minor;
+	return -1;
+}
+
 /**
  * snd_unregister_device - unregister the device on the given card
  * @type: the device type, SNDRV_DEVICE_TYPE_XXX
@@ -289,32 +311,42 @@
  */
 int snd_unregister_device(int type, struct snd_card *card, int dev)
 {
-	int cardnum, minor;
-	struct snd_minor *mptr;
+	int minor;
 
-	cardnum = card ? card->number : -1;
 	mutex_lock(&sound_mutex);
-	for (minor = 0; minor < ARRAY_SIZE(snd_minors); ++minor)
-		if ((mptr = snd_minors[minor]) != NULL &&
-		    mptr->type == type &&
-		    mptr->card == cardnum &&
-		    mptr->device == dev)
-			break;
-	if (minor == ARRAY_SIZE(snd_minors)) {
+	minor = find_snd_minor(type, card, dev);
+	if (minor < 0) {
 		mutex_unlock(&sound_mutex);
 		return -EINVAL;
 	}
 
 	class_device_destroy(sound_class, MKDEV(major, minor));
 
+	kfree(snd_minors[minor]);
 	snd_minors[minor] = NULL;
 	mutex_unlock(&sound_mutex);
-	kfree(mptr);
 	return 0;
 }
 
 EXPORT_SYMBOL(snd_unregister_device);
 
+int snd_add_device_sysfs_file(int type, struct snd_card *card, int dev,
+			      const struct class_device_attribute *attr)
+{
+	int minor, ret = -EINVAL;
+	struct class_device *cdev;
+
+	mutex_lock(&sound_mutex);
+	minor = find_snd_minor(type, card, dev);
+	if (minor >= 0 && (cdev = snd_minors[minor]->class_dev) != NULL)
+		ret = class_device_create_file(cdev, attr);
+	mutex_unlock(&sound_mutex);
+	return ret;
+
+}
+
+EXPORT_SYMBOL(snd_add_device_sysfs_file);
+
 #ifdef CONFIG_PROC_FS
 /*
  *  INFO PART
@@ -387,8 +419,7 @@
 
 int __exit snd_minor_info_done(void)
 {
-	if (snd_minor_info_entry)
-		snd_info_unregister(snd_minor_info_entry);
+	snd_info_free_entry(snd_minor_info_entry);
 	return 0;
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c
index 74f0fe5..b2fc40a 100644
--- a/sound/core/sound_oss.c
+++ b/sound/core/sound_oss.c
@@ -270,8 +270,7 @@
 
 int __exit snd_minor_info_oss_done(void)
 {
-	if (snd_minor_info_oss_entry)
-		snd_info_unregister(snd_minor_info_oss_entry);
+	snd_info_free_entry(snd_minor_info_oss_entry);
 	return 0;
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 0a984e8..10a79ae 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -88,7 +88,7 @@
 static int snd_timer_free(struct snd_timer *timer);
 static int snd_timer_dev_free(struct snd_device *device);
 static int snd_timer_dev_register(struct snd_device *device);
-static int snd_timer_dev_unregister(struct snd_device *device);
+static int snd_timer_dev_disconnect(struct snd_device *device);
 
 static void snd_timer_reschedule(struct snd_timer * timer, unsigned long ticks_left);
 
@@ -718,7 +718,7 @@
 		}
 	}
 	if (timer->flags & SNDRV_TIMER_FLG_RESCHED)
-		snd_timer_reschedule(timer, ticks_left);
+		snd_timer_reschedule(timer, timer->sticks);
 	if (timer->running) {
 		if (timer->hw.flags & SNDRV_TIMER_HW_STOP) {
 			timer->hw.stop(timer);
@@ -773,7 +773,7 @@
 	static struct snd_device_ops ops = {
 		.dev_free = snd_timer_dev_free,
 		.dev_register = snd_timer_dev_register,
-		.dev_unregister = snd_timer_dev_unregister
+		.dev_disconnect = snd_timer_dev_disconnect,
 	};
 
 	snd_assert(tid != NULL, return -EINVAL);
@@ -813,6 +813,21 @@
 static int snd_timer_free(struct snd_timer *timer)
 {
 	snd_assert(timer != NULL, return -ENXIO);
+
+	mutex_lock(&register_mutex);
+	if (! list_empty(&timer->open_list_head)) {
+		struct list_head *p, *n;
+		struct snd_timer_instance *ti;
+		snd_printk(KERN_WARNING "timer %p is busy?\n", timer);
+		list_for_each_safe(p, n, &timer->open_list_head) {
+			list_del_init(p);
+			ti = list_entry(p, struct snd_timer_instance, open_list);
+			ti->timer = NULL;
+		}
+	}
+	list_del(&timer->device_list);
+	mutex_unlock(&register_mutex);
+
 	if (timer->private_free)
 		timer->private_free(timer);
 	kfree(timer);
@@ -867,30 +882,13 @@
 	return 0;
 }
 
-static int snd_timer_unregister(struct snd_timer *timer)
-{
-	struct list_head *p, *n;
-	struct snd_timer_instance *ti;
-
-	snd_assert(timer != NULL, return -ENXIO);
-	mutex_lock(&register_mutex);
-	if (! list_empty(&timer->open_list_head)) {
-		snd_printk(KERN_WARNING "timer 0x%lx is busy?\n", (long)timer);
-		list_for_each_safe(p, n, &timer->open_list_head) {
-			list_del_init(p);
-			ti = list_entry(p, struct snd_timer_instance, open_list);
-			ti->timer = NULL;
-		}
-	}
-	list_del(&timer->device_list);
-	mutex_unlock(&register_mutex);
-	return snd_timer_free(timer);
-}
-
-static int snd_timer_dev_unregister(struct snd_device *device)
+static int snd_timer_dev_disconnect(struct snd_device *device)
 {
 	struct snd_timer *timer = device->device_data;
-	return snd_timer_unregister(timer);
+	mutex_lock(&register_mutex);
+	list_del_init(&timer->device_list);
+	mutex_unlock(&register_mutex);
+	return 0;
 }
 
 void snd_timer_notify(struct snd_timer *timer, int event, struct timespec *tstamp)
@@ -955,18 +953,12 @@
 	return snd_timer_dev_register(&dev);
 }
 
-int snd_timer_global_unregister(struct snd_timer *timer)
-{
-	return snd_timer_unregister(timer);
-}
-
 /*
  *  System timer
  */
 
 struct snd_timer_system_private {
 	struct timer_list tlist;
-	struct timer * timer;
 	unsigned long last_expires;
 	unsigned long last_jiffies;
 	unsigned long correction;
@@ -978,7 +970,7 @@
 	struct snd_timer_system_private *priv = timer->private_data;
 	unsigned long jiff = jiffies;
 	if (time_after(jiff, priv->last_expires))
-		priv->correction = (long)jiff - (long)priv->last_expires;
+		priv->correction += (long)jiff - (long)priv->last_expires;
 	snd_timer_interrupt(timer, (long)jiff - (long)priv->last_jiffies);
 }
 
@@ -994,7 +986,7 @@
 		njiff++;
 	} else {
 		njiff += timer->sticks - priv->correction;
-		priv->correction -= timer->sticks;
+		priv->correction = 0;
 	}
 	priv->last_expires = priv->tlist.expires = njiff;
 	add_timer(&priv->tlist);
@@ -1013,6 +1005,7 @@
 		timer->sticks = priv->last_expires - jiff;
 	else
 		timer->sticks = 1;
+	priv->correction = 0;
 	return 0;
 }
 
@@ -1126,7 +1119,7 @@
 
 static void __exit snd_timer_proc_done(void)
 {
-	snd_info_unregister(snd_timer_proc_entry);
+	snd_info_free_entry(snd_timer_proc_entry);
 }
 #else /* !CONFIG_PROC_FS */
 #define snd_timer_proc_init()
@@ -1982,7 +1975,7 @@
 	/* unregister the system timer */
 	list_for_each_safe(p, n, &snd_timer_list) {
 		struct snd_timer *timer = list_entry(p, struct snd_timer, device_list);
-		snd_timer_unregister(timer);
+		snd_timer_free(timer);
 	}
 	snd_timer_proc_done();
 #ifdef SNDRV_OSS_INFO_DEV_TIMERS
@@ -2005,5 +1998,4 @@
 EXPORT_SYMBOL(snd_timer_global_new);
 EXPORT_SYMBOL(snd_timer_global_free);
 EXPORT_SYMBOL(snd_timer_global_register);
-EXPORT_SYMBOL(snd_timer_global_unregister);
 EXPORT_SYMBOL(snd_timer_interrupt);
diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig
index 395c4ef..7971285 100644
--- a/sound/drivers/Kconfig
+++ b/sound/drivers/Kconfig
@@ -73,6 +73,19 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-mtpav.
 
+config SND_MTS64
+	tristate "ESI Miditerminal 4140 driver"
+	depends on SND && PARPORT
+	select SND_RAWMIDI
+	help
+	  The ESI Miditerminal 4140 is a 4 In 4 Out MIDI Interface with 
+          additional SMPTE Timecode capabilities for the parallel port.
+
+	  Say 'Y' to include support for this device.
+
+	  To compile this driver as a module, chose 'M' here: the module 
+          will be called snd-mts64.
+
 config SND_SERIAL_U16550
 	tristate "UART16550 serial MIDI driver"
 	depends on SND
diff --git a/sound/drivers/Makefile b/sound/drivers/Makefile
index cb98c3d..c9bad6d 100644
--- a/sound/drivers/Makefile
+++ b/sound/drivers/Makefile
@@ -5,6 +5,7 @@
 
 snd-dummy-objs := dummy.o
 snd-mtpav-objs := mtpav.o
+snd-mts64-objs := mts64.o
 snd-serial-u16550-objs := serial-u16550.o
 snd-virmidi-objs := virmidi.o
 
@@ -13,5 +14,6 @@
 obj-$(CONFIG_SND_VIRMIDI) += snd-virmidi.o
 obj-$(CONFIG_SND_SERIAL_U16550) += snd-serial-u16550.o
 obj-$(CONFIG_SND_MTPAV) += snd-mtpav.o
+obj-$(CONFIG_SND_MTS64) += snd-mts64.o
 
 obj-$(CONFIG_SND) += opl3/ opl4/ mpu401/ vx/
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index ffeafaf..42001ef 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -29,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/pcm.h>
 #include <sound/rawmidi.h>
 #include <sound/initval.h>
@@ -285,7 +286,7 @@
 	.channels_max =		USE_CHANNELS_MAX,
 	.buffer_bytes_max =	MAX_BUFFER_SIZE,
 	.period_bytes_min =	64,
-	.period_bytes_max =	MAX_BUFFER_SIZE,
+	.period_bytes_max =	MAX_PERIOD_SIZE,
 	.periods_min =		USE_PERIODS_MIN,
 	.periods_max =		USE_PERIODS_MAX,
 	.fifo_size =		0,
@@ -443,10 +444,13 @@
 }
 
 #define DUMMY_VOLUME(xname, xindex, addr) \
-{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
   .info = snd_dummy_volume_info, \
   .get = snd_dummy_volume_get, .put = snd_dummy_volume_put, \
-  .private_value = addr }
+  .private_value = addr, \
+  .tlv = { .p = db_scale_dummy } }
 
 static int snd_dummy_volume_info(struct snd_kcontrol *kcontrol,
 				 struct snd_ctl_elem_info *uinfo)
@@ -497,6 +501,8 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dummy, -4500, 30, 0);
+
 #define DUMMY_CAPSRC(xname, xindex, addr) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
   .info = snd_dummy_capsrc_info, \
@@ -547,13 +553,13 @@
 DUMMY_VOLUME("Master Volume", 0, MIXER_ADDR_MASTER),
 DUMMY_CAPSRC("Master Capture Switch", 0, MIXER_ADDR_MASTER),
 DUMMY_VOLUME("Synth Volume", 0, MIXER_ADDR_SYNTH),
-DUMMY_CAPSRC("Synth Capture Switch", 0, MIXER_ADDR_MASTER),
+DUMMY_CAPSRC("Synth Capture Switch", 0, MIXER_ADDR_SYNTH),
 DUMMY_VOLUME("Line Volume", 0, MIXER_ADDR_LINE),
-DUMMY_CAPSRC("Line Capture Switch", 0, MIXER_ADDR_MASTER),
+DUMMY_CAPSRC("Line Capture Switch", 0, MIXER_ADDR_LINE),
 DUMMY_VOLUME("Mic Volume", 0, MIXER_ADDR_MIC),
-DUMMY_CAPSRC("Mic Capture Switch", 0, MIXER_ADDR_MASTER),
+DUMMY_CAPSRC("Mic Capture Switch", 0, MIXER_ADDR_MIC),
 DUMMY_VOLUME("CD Volume", 0, MIXER_ADDR_CD),
-DUMMY_CAPSRC("CD Capture Switch", 0, MIXER_ADDR_MASTER)
+DUMMY_CAPSRC("CD Capture Switch", 0, MIXER_ADDR_CD)
 };
 
 static int __init snd_card_dummy_new_mixer(struct snd_dummy *dummy)
diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c
index 17cc105..2de181a 100644
--- a/sound/drivers/mpu401/mpu401.c
+++ b/sound/drivers/mpu401/mpu401.c
@@ -211,7 +211,7 @@
 	struct snd_card *card = (struct snd_card *) pnp_get_drvdata(dev);
 
 	snd_card_disconnect(card);
-	snd_card_free_in_thread(card);
+	snd_card_free_when_closed(card);
 }
 
 static struct pnp_driver snd_mpu401_pnp_driver = {
diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c
new file mode 100644
index 0000000..1699873
--- /dev/null
+++ b/sound/drivers/mts64.c
@@ -0,0 +1,1091 @@
+/*     
+ *   ALSA Driver for Ego Systems Inc. (ESI) Miditerminal 4140
+ *   Copyright (c) 2006 by Matthias König <mk@phasorlab.de>
+ *
+ *   This program is free software; you can redistribute it and/or modify 
+ *   it under the terms of the GNU General Public License as published by 
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version. 
+ *
+ *   This program is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <sound/driver.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/parport.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/rawmidi.h>
+#include <sound/control.h>
+
+#define CARD_NAME "Miditerminal 4140"
+#define DRIVER_NAME "MTS64"
+#define PLATFORM_DRIVER "snd_mts64"
+
+static int index[SNDRV_CARDS]  = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS]   = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+static struct platform_device *platform_devices[SNDRV_CARDS]; 
+static int device_count;
+
+module_param_array(index, int, NULL, S_IRUGO);
+MODULE_PARM_DESC(index, "Index value for " CARD_NAME " soundcard.");
+module_param_array(id, charp, NULL, S_IRUGO);
+MODULE_PARM_DESC(id, "ID string for " CARD_NAME " soundcard.");
+module_param_array(enable, bool, NULL, S_IRUGO);
+MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
+
+MODULE_AUTHOR("Matthias Koenig <mk@phasorlab.de>");
+MODULE_DESCRIPTION("ESI Miditerminal 4140");
+MODULE_LICENSE("GPL");
+MODULE_SUPPORTED_DEVICE("{{ESI,Miditerminal 4140}}");
+
+/*********************************************************************
+ * Chip specific
+ *********************************************************************/
+#define MTS64_NUM_INPUT_PORTS 5
+#define MTS64_NUM_OUTPUT_PORTS 4
+#define MTS64_SMPTE_SUBSTREAM 4
+
+struct mts64 {
+	spinlock_t lock;
+	struct snd_card *card;
+	struct snd_rawmidi *rmidi;
+	struct pardevice *pardev;
+	int pardev_claimed;
+
+	int open_count;
+	int current_midi_output_port;
+	int current_midi_input_port;
+	u8 mode[MTS64_NUM_INPUT_PORTS];
+	struct snd_rawmidi_substream *midi_input_substream[MTS64_NUM_INPUT_PORTS];
+	int smpte_switch;
+	u8 time[4]; /* [0]=hh, [1]=mm, [2]=ss, [3]=ff */
+	u8 fps;
+};
+
+static int snd_mts64_free(struct mts64 *mts)
+{
+	kfree(mts);
+	return 0;
+}
+
+static int __devinit snd_mts64_create(struct snd_card *card, 
+				      struct pardevice *pardev, 
+				      struct mts64 **rchip)
+{
+	struct mts64 *mts;
+
+	*rchip = NULL;
+
+	mts = kzalloc(sizeof(struct mts64), GFP_KERNEL);
+	if (mts == NULL) 
+		return -ENOMEM;
+
+	/* Init chip specific data */
+	spin_lock_init(&mts->lock);
+	mts->card = card;
+	mts->pardev = pardev;
+	mts->current_midi_output_port = -1;
+	mts->current_midi_input_port = -1;
+
+	*rchip = mts;
+
+	return 0;
+}
+
+/*********************************************************************
+ * HW register related constants
+ *********************************************************************/
+
+/* Status Bits */
+#define MTS64_STAT_BSY             0x80
+#define MTS64_STAT_BIT_SET         0x20  /* readout process, bit is set */
+#define MTS64_STAT_PORT            0x10  /* read byte is a port number */
+
+/* Control Bits */
+#define MTS64_CTL_READOUT          0x08  /* enable readout */
+#define MTS64_CTL_WRITE_CMD        0x06  
+#define MTS64_CTL_WRITE_DATA       0x02  
+#define MTS64_CTL_STROBE           0x01  
+
+/* Command */
+#define MTS64_CMD_RESET            0xfe
+#define MTS64_CMD_PROBE            0x8f  /* Used in probing procedure */
+#define MTS64_CMD_SMPTE_SET_TIME   0xe8
+#define MTS64_CMD_SMPTE_SET_FPS    0xee
+#define MTS64_CMD_SMPTE_STOP       0xef
+#define MTS64_CMD_SMPTE_FPS_24     0xe3
+#define MTS64_CMD_SMPTE_FPS_25     0xe2
+#define MTS64_CMD_SMPTE_FPS_2997   0xe4 
+#define MTS64_CMD_SMPTE_FPS_30D    0xe1
+#define MTS64_CMD_SMPTE_FPS_30     0xe0
+#define MTS64_CMD_COM_OPEN         0xf8  /* setting the communication mode */
+#define MTS64_CMD_COM_CLOSE1       0xff  /* clearing communication mode */
+#define MTS64_CMD_COM_CLOSE2       0xf5
+
+/*********************************************************************
+ * Hardware specific functions
+ *********************************************************************/
+static void mts64_enable_readout(struct parport *p);
+static void mts64_disable_readout(struct parport *p);
+static int mts64_device_ready(struct parport *p);
+static int mts64_device_init(struct parport *p);
+static int mts64_device_open(struct mts64 *mts);
+static int mts64_device_close(struct mts64 *mts);
+static u8 mts64_map_midi_input(u8 c);
+static int mts64_probe(struct parport *p);
+static u16 mts64_read(struct parport *p);
+static u8 mts64_read_char(struct parport *p);
+static void mts64_smpte_start(struct parport *p,
+			      u8 hours, u8 minutes,
+			      u8 seconds, u8 frames,
+			      u8 idx);
+static void mts64_smpte_stop(struct parport *p);
+static void mts64_write_command(struct parport *p, u8 c);
+static void mts64_write_data(struct parport *p, u8 c);
+static void mts64_write_midi(struct mts64 *mts, u8 c, int midiport);
+
+
+/*  Enables the readout procedure
+ *
+ *  Before we can read a midi byte from the device, we have to set
+ *  bit 3 of control port.
+ */
+static void mts64_enable_readout(struct parport *p)
+{
+	u8 c;
+
+	c = parport_read_control(p);
+	c |= MTS64_CTL_READOUT;
+	parport_write_control(p, c); 
+}
+
+/*  Disables readout 
+ *
+ *  Readout is disabled by clearing bit 3 of control
+ */
+static void mts64_disable_readout(struct parport *p)
+{
+	u8 c;
+
+	c = parport_read_control(p);
+	c &= ~MTS64_CTL_READOUT;
+	parport_write_control(p, c);
+}
+
+/*  waits for device ready
+ *
+ *  Checks if BUSY (Bit 7 of status) is clear
+ *  1 device ready
+ *  0 failure
+ */
+static int mts64_device_ready(struct parport *p)
+{
+	int i;
+	u8 c;
+
+	for (i = 0; i < 0xffff; ++i) {
+		c = parport_read_status(p);
+		c &= MTS64_STAT_BSY;
+		if (c != 0) 
+			return 1;
+	} 
+
+	return 0;
+}
+
+/*  Init device (LED blinking startup magic)
+ *
+ *  Returns:
+ *  0 init ok
+ *  -EIO failure
+ */
+static int __devinit mts64_device_init(struct parport *p)
+{
+	int i;
+
+	mts64_write_command(p, MTS64_CMD_RESET);
+
+	for (i = 0; i < 64; ++i) {
+		msleep(100);
+
+		if (mts64_probe(p) == 0) {
+			/* success */
+			mts64_disable_readout(p);
+			return 0;
+		}
+	}
+	mts64_disable_readout(p);
+
+	return -EIO;
+}
+
+/* 
+ *  Opens the device (set communication mode)
+ */
+static int mts64_device_open(struct mts64 *mts)
+{
+	int i;
+	struct parport *p = mts->pardev->port;
+
+	for (i = 0; i < 5; ++i)
+		mts64_write_command(p, MTS64_CMD_COM_OPEN);
+
+	return 0;
+}
+
+/*  
+ *  Close device (clear communication mode)
+ */
+static int mts64_device_close(struct mts64 *mts)
+{
+	int i;
+	struct parport *p = mts->pardev->port;
+
+	for (i = 0; i < 5; ++i) {
+		mts64_write_command(p, MTS64_CMD_COM_CLOSE1);
+		mts64_write_command(p, MTS64_CMD_COM_CLOSE2);
+	}
+
+	return 0;
+}
+
+/*  map hardware port to substream number
+ * 
+ *  When reading a byte from the device, the device tells us
+ *  on what port the byte is. This HW port has to be mapped to
+ *  the midiport (substream number).
+ *  substream 0-3 are Midiports 1-4
+ *  substream 4 is SMPTE Timecode
+ *  The mapping is done by the table:
+ *  HW | 0 | 1 | 2 | 3 | 4 
+ *  SW | 0 | 1 | 4 | 2 | 3
+ */
+static u8 mts64_map_midi_input(u8 c)
+{
+	static u8 map[] = { 0, 1, 4, 2, 3 };
+
+	return map[c];
+}
+
+
+/*  Probe parport for device
+ *
+ *  Do we have a Miditerminal 4140 on parport? 
+ *  Returns:
+ *  0       device found
+ *  -ENODEV no device
+ */
+static int __devinit mts64_probe(struct parport *p)
+{
+	u8 c;
+
+	mts64_smpte_stop(p);
+	mts64_write_command(p, MTS64_CMD_PROBE);
+
+	msleep(50);
+	
+	c = mts64_read(p);
+
+	c &= 0x00ff;
+	if (c != MTS64_CMD_PROBE) 
+		return -ENODEV;
+	else 
+		return 0;
+
+}
+
+/*  Read byte incl. status from device
+ *
+ *  Returns:
+ *  data in lower 8 bits and status in upper 8 bits
+ */
+static u16 mts64_read(struct parport *p)
+{
+	u8 data, status;
+
+	mts64_device_ready(p);
+	mts64_enable_readout(p);
+	status = parport_read_status(p);
+	data = mts64_read_char(p);
+	mts64_disable_readout(p);
+
+	return (status << 8) | data;
+}
+
+/*  Read a byte from device
+ *
+ *  Note, that readout mode has to be enabled.
+ *  readout procedure is as follows: 
+ *  - Write number of the Bit to read to DATA
+ *  - Read STATUS
+ *  - Bit 5 of STATUS indicates if Bit is set
+ *
+ *  Returns:
+ *  Byte read from device
+ */
+static u8 mts64_read_char(struct parport *p)
+{
+	u8 c = 0;
+	u8 status;
+	u8 i;
+
+	for (i = 0; i < 8; ++i) {
+		parport_write_data(p, i);
+		c >>= 1;
+		status = parport_read_status(p);
+		if (status & MTS64_STAT_BIT_SET) 
+			c |= 0x80;
+	}
+	
+	return c;
+}
+
+/*  Starts SMPTE Timecode generation
+ *
+ *  The device creates SMPTE Timecode by hardware.
+ *  0 24 fps
+ *  1 25 fps
+ *  2 29.97 fps
+ *  3 30 fps (Drop-frame)
+ *  4 30 fps
+ */
+static void mts64_smpte_start(struct parport *p,
+			      u8 hours, u8 minutes,
+			      u8 seconds, u8 frames,
+			      u8 idx)
+{
+	static u8 fps[5] = { MTS64_CMD_SMPTE_FPS_24, 
+			     MTS64_CMD_SMPTE_FPS_25,
+			     MTS64_CMD_SMPTE_FPS_2997, 
+			     MTS64_CMD_SMPTE_FPS_30D,
+			     MTS64_CMD_SMPTE_FPS_30    };
+
+	mts64_write_command(p, MTS64_CMD_SMPTE_SET_TIME);
+	mts64_write_command(p, frames);
+	mts64_write_command(p, seconds);
+	mts64_write_command(p, minutes);
+	mts64_write_command(p, hours);
+
+	mts64_write_command(p, MTS64_CMD_SMPTE_SET_FPS);
+	mts64_write_command(p, fps[idx]);
+}
+
+/*  Stops SMPTE Timecode generation
+ */
+static void mts64_smpte_stop(struct parport *p)
+{
+	mts64_write_command(p, MTS64_CMD_SMPTE_STOP);
+}
+
+/*  Write a command byte to device
+ */
+static void mts64_write_command(struct parport *p, u8 c)
+{
+	mts64_device_ready(p);
+
+	parport_write_data(p, c);
+
+	parport_write_control(p, MTS64_CTL_WRITE_CMD);
+	parport_write_control(p, MTS64_CTL_WRITE_CMD | MTS64_CTL_STROBE);
+	parport_write_control(p, MTS64_CTL_WRITE_CMD);
+}
+
+/*  Write a data byte to device 
+ */
+static void mts64_write_data(struct parport *p, u8 c)
+{
+	mts64_device_ready(p);
+
+	parport_write_data(p, c);
+
+	parport_write_control(p, MTS64_CTL_WRITE_DATA);
+	parport_write_control(p, MTS64_CTL_WRITE_DATA | MTS64_CTL_STROBE);
+	parport_write_control(p, MTS64_CTL_WRITE_DATA);
+}
+
+/*  Write a MIDI byte to midiport
+ *
+ *  midiport ranges from 0-3 and maps to Ports 1-4
+ *  assumptions: communication mode is on
+ */
+static void mts64_write_midi(struct mts64 *mts, u8 c,
+			     int midiport)
+{
+	struct parport *p = mts->pardev->port;
+
+	/* check current midiport */
+	if (mts->current_midi_output_port != midiport)
+		mts64_write_command(p, midiport);
+
+	/* write midi byte */
+	mts64_write_data(p, c);
+}
+
+/*********************************************************************
+ * Control elements
+ *********************************************************************/
+
+/* SMPTE Switch */
+static int snd_mts64_ctl_smpte_switch_info(struct snd_kcontrol *kctl,
+					   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_switch_get(struct snd_kcontrol* kctl,
+					  struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+
+	spin_lock_irq(&mts->lock);
+	uctl->value.integer.value[0] = mts->smpte_switch;
+	spin_unlock_irq(&mts->lock);
+
+	return 0;
+}
+
+/* smpte_switch is not accessed from IRQ handler, so we just need
+   to protect the HW access */
+static int snd_mts64_ctl_smpte_switch_put(struct snd_kcontrol* kctl,
+					  struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int changed = 0;
+
+	spin_lock_irq(&mts->lock);
+	if (mts->smpte_switch == uctl->value.integer.value[0])
+		goto __out;
+
+	changed = 1;
+	mts->smpte_switch = uctl->value.integer.value[0];
+	if (mts->smpte_switch) {
+		mts64_smpte_start(mts->pardev->port,
+				  mts->time[0], mts->time[1],
+				  mts->time[2], mts->time[3],
+				  mts->fps);
+	} else {
+		mts64_smpte_stop(mts->pardev->port);
+	}
+__out:
+	spin_unlock_irq(&mts->lock);
+	return changed;
+}
+
+static struct snd_kcontrol_new mts64_ctl_smpte_switch __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Playback Switch",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = snd_mts64_ctl_smpte_switch_info,
+	.get  = snd_mts64_ctl_smpte_switch_get,
+	.put  = snd_mts64_ctl_smpte_switch_put
+};
+
+/* Time */
+static int snd_mts64_ctl_smpte_time_h_info(struct snd_kcontrol *kctl,
+					   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 23;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_f_info(struct snd_kcontrol *kctl,
+					   struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 99;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_info(struct snd_kcontrol *kctl,
+					 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 59;
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_get(struct snd_kcontrol *kctl,
+					struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int idx = kctl->private_value;
+
+	spin_lock_irq(&mts->lock);
+	uctl->value.integer.value[0] = mts->time[idx];
+	spin_unlock_irq(&mts->lock);
+
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_time_put(struct snd_kcontrol *kctl,
+					struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int idx = kctl->private_value;
+	int changed = 0;
+
+	spin_lock_irq(&mts->lock);
+	if (mts->time[idx] != uctl->value.integer.value[0]) {
+		changed = 1;
+		mts->time[idx] = uctl->value.integer.value[0];
+	}
+	spin_unlock_irq(&mts->lock);
+
+	return changed;
+}
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_hours __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Hours",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info = snd_mts64_ctl_smpte_time_h_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_minutes __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Minutes",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 1,
+	.info = snd_mts64_ctl_smpte_time_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_seconds __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Seconds",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 2,
+	.info = snd_mts64_ctl_smpte_time_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+static struct snd_kcontrol_new mts64_ctl_smpte_time_frames __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Time Frames",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 3,
+	.info = snd_mts64_ctl_smpte_time_f_info,
+	.get  = snd_mts64_ctl_smpte_time_get,
+	.put  = snd_mts64_ctl_smpte_time_put
+};
+
+/* FPS */
+static int snd_mts64_ctl_smpte_fps_info(struct snd_kcontrol *kctl,
+					struct snd_ctl_elem_info *uinfo)
+{
+	static char *texts[5] = { "24",
+				  "25",
+				  "29.97",
+				  "30D",
+				  "30"    };
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = 5;
+	if (uinfo->value.enumerated.item > 4)
+		uinfo->value.enumerated.item = 4;
+	strcpy(uinfo->value.enumerated.name,
+	       texts[uinfo->value.enumerated.item]);
+	
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_fps_get(struct snd_kcontrol *kctl,
+				       struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+
+	spin_lock_irq(&mts->lock);
+	uctl->value.enumerated.item[0] = mts->fps;
+	spin_unlock_irq(&mts->lock);
+
+	return 0;
+}
+
+static int snd_mts64_ctl_smpte_fps_put(struct snd_kcontrol *kctl,
+				       struct snd_ctl_elem_value *uctl)
+{
+	struct mts64 *mts = snd_kcontrol_chip(kctl);
+	int changed = 0;
+
+	spin_lock_irq(&mts->lock);
+	if (mts->fps != uctl->value.enumerated.item[0]) {
+		changed = 1;
+		mts->fps = uctl->value.enumerated.item[0];
+	}
+	spin_unlock_irq(&mts->lock);
+
+	return changed;
+}
+
+static struct snd_kcontrol_new mts64_ctl_smpte_fps __devinitdata = {
+	.iface = SNDRV_CTL_ELEM_IFACE_RAWMIDI,
+	.name  = "SMPTE Fps",
+	.index = 0,
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+	.private_value = 0,
+	.info  = snd_mts64_ctl_smpte_fps_info,
+	.get   = snd_mts64_ctl_smpte_fps_get,
+	.put   = snd_mts64_ctl_smpte_fps_put
+};
+
+
+static int __devinit snd_mts64_ctl_create(struct snd_card *card, 
+					  struct mts64 *mts) 
+{
+	int err, i;
+	static struct snd_kcontrol_new *control[] = {
+		&mts64_ctl_smpte_switch,
+		&mts64_ctl_smpte_time_hours,
+		&mts64_ctl_smpte_time_minutes,
+		&mts64_ctl_smpte_time_seconds,
+		&mts64_ctl_smpte_time_frames,
+		&mts64_ctl_smpte_fps,
+	        0  };
+
+	for (i = 0; control[i]; ++i) {
+		err = snd_ctl_add(card, snd_ctl_new1(control[i], mts));
+		if (err < 0) {
+			snd_printd("Cannot create control: %s\n", 
+				   control[i]->name);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/*********************************************************************
+ * Rawmidi
+ *********************************************************************/
+#define MTS64_MODE_INPUT_TRIGGERED 0x01
+
+static int snd_mts64_rawmidi_open(struct snd_rawmidi_substream *substream)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+
+	if (mts->open_count == 0) {
+		/* We don't need a spinlock here, because this is just called 
+		   if the device has not been opened before. 
+		   So there aren't any IRQs from the device */
+		mts64_device_open(mts);
+
+		msleep(50);
+	}
+	++(mts->open_count);
+
+	return 0;
+}
+
+static int snd_mts64_rawmidi_close(struct snd_rawmidi_substream *substream)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+	unsigned long flags;
+
+	--(mts->open_count);
+	if (mts->open_count == 0) {
+		/* We need the spinlock_irqsave here because we can still
+		   have IRQs at this point */
+		spin_lock_irqsave(&mts->lock, flags);
+		mts64_device_close(mts);
+		spin_unlock_irqrestore(&mts->lock, flags);
+
+		msleep(500);
+
+	} else if (mts->open_count < 0)
+		mts->open_count = 0;
+
+	return 0;
+}
+
+static void snd_mts64_rawmidi_output_trigger(struct snd_rawmidi_substream *substream,
+					     int up)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+	u8 data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mts->lock, flags);
+	while (snd_rawmidi_transmit_peek(substream, &data, 1) == 1) {
+		mts64_write_midi(mts, data, substream->number+1);
+		snd_rawmidi_transmit_ack(substream, 1);
+	}
+	spin_unlock_irqrestore(&mts->lock, flags);
+}
+
+static void snd_mts64_rawmidi_input_trigger(struct snd_rawmidi_substream *substream,
+					    int up)
+{
+	struct mts64 *mts = substream->rmidi->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mts->lock, flags);
+	if (up)
+		mts->mode[substream->number] |= MTS64_MODE_INPUT_TRIGGERED;
+	else
+ 		mts->mode[substream->number] &= ~MTS64_MODE_INPUT_TRIGGERED;
+	
+	spin_unlock_irqrestore(&mts->lock, flags);
+}
+
+static struct snd_rawmidi_ops snd_mts64_rawmidi_output_ops = {
+	.open    = snd_mts64_rawmidi_open,
+	.close   = snd_mts64_rawmidi_close,
+	.trigger = snd_mts64_rawmidi_output_trigger
+};
+
+static struct snd_rawmidi_ops snd_mts64_rawmidi_input_ops = {
+	.open    = snd_mts64_rawmidi_open,
+	.close   = snd_mts64_rawmidi_close,
+	.trigger = snd_mts64_rawmidi_input_trigger
+};
+
+/* Create and initialize the rawmidi component */
+static int __devinit snd_mts64_rawmidi_create(struct snd_card *card)
+{
+	struct mts64 *mts = card->private_data;
+	struct snd_rawmidi *rmidi;
+	struct snd_rawmidi_substream *substream;
+	struct list_head *list;
+	int err;
+	
+	err = snd_rawmidi_new(card, CARD_NAME, 0, 
+			      MTS64_NUM_OUTPUT_PORTS, 
+			      MTS64_NUM_INPUT_PORTS, 
+			      &rmidi);
+	if (err < 0) 
+		return err;
+
+	rmidi->private_data = mts;
+	strcpy(rmidi->name, CARD_NAME);
+	rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT |
+		            SNDRV_RAWMIDI_INFO_INPUT |
+                            SNDRV_RAWMIDI_INFO_DUPLEX;
+
+	mts->rmidi = rmidi;
+
+	/* register rawmidi ops */
+	snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, 
+			    &snd_mts64_rawmidi_output_ops);
+	snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, 
+			    &snd_mts64_rawmidi_input_ops);
+
+	/* name substreams */
+	/* output */
+	list_for_each(list, 
+		      &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams) {
+		substream = list_entry(list, struct snd_rawmidi_substream, list);
+		sprintf(substream->name,
+			"Miditerminal %d", substream->number+1);
+	}
+	/* input */
+	list_for_each(list, 
+		      &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT].substreams) {
+		substream = list_entry(list, struct snd_rawmidi_substream, list);
+		mts->midi_input_substream[substream->number] = substream;
+		switch(substream->number) {
+		case MTS64_SMPTE_SUBSTREAM:
+			strcpy(substream->name, "Miditerminal SMPTE");
+			break;
+		default:
+			sprintf(substream->name,
+				"Miditerminal %d", substream->number+1);
+		}
+	}
+
+	/* controls */
+	err = snd_mts64_ctl_create(card, mts);
+
+	return err;
+}
+
+/*********************************************************************
+ * parport stuff
+ *********************************************************************/
+static void snd_mts64_interrupt(int irq, void *private, struct pt_regs *r)
+{
+	struct mts64 *mts = ((struct snd_card*)private)->private_data;
+	u16 ret;
+	u8 status, data;
+	struct snd_rawmidi_substream *substream;
+
+	spin_lock(&mts->lock);
+	ret = mts64_read(mts->pardev->port);
+	data = ret & 0x00ff;
+	status = ret >> 8;
+
+	if (status & MTS64_STAT_PORT) {
+		mts->current_midi_input_port = mts64_map_midi_input(data);
+	} else {
+		if (mts->current_midi_input_port == -1) 
+			goto __out;
+		substream = mts->midi_input_substream[mts->current_midi_input_port];
+		if (mts->mode[substream->number] & MTS64_MODE_INPUT_TRIGGERED)
+			snd_rawmidi_receive(substream, &data, 1);
+	}
+__out:
+	spin_unlock(&mts->lock);
+}
+
+static int __devinit snd_mts64_probe_port(struct parport *p)
+{
+	struct pardevice *pardev;
+	int res;
+
+	pardev = parport_register_device(p, DRIVER_NAME,
+					 NULL, NULL, NULL,
+					 0, NULL);
+	if (!pardev)
+		return -EIO;
+	
+	if (parport_claim(pardev)) {
+		parport_unregister_device(pardev);
+		return -EIO;
+	}
+
+	res = mts64_probe(p);
+
+	parport_release(pardev);
+	parport_unregister_device(pardev);
+
+	return res;
+}
+
+static void __devinit snd_mts64_attach(struct parport *p)
+{
+	struct platform_device *device;
+
+	device = platform_device_alloc(PLATFORM_DRIVER, device_count);
+	if (!device) 
+		return;
+
+	/* Temporary assignment to forward the parport */
+	platform_set_drvdata(device, p);
+
+	if (platform_device_register(device) < 0) {
+		platform_device_put(device);
+		return;
+	}
+
+	/* Since we dont get the return value of probe
+	 * We need to check if device probing succeeded or not */
+	if (!platform_get_drvdata(device)) {
+		platform_device_unregister(device);
+		return;
+	}
+
+	/* register device in global table */
+	platform_devices[device_count] = device;
+	device_count++;
+}
+
+static void snd_mts64_detach(struct parport *p)
+{
+	/* nothing to do here */
+}
+
+static struct parport_driver mts64_parport_driver = {
+	.name   = "mts64",
+	.attach = snd_mts64_attach,
+	.detach = snd_mts64_detach
+};
+
+/*********************************************************************
+ * platform stuff
+ *********************************************************************/
+static void snd_mts64_card_private_free(struct snd_card *card)
+{
+	struct mts64 *mts = card->private_data;
+	struct pardevice *pardev = mts->pardev;
+
+	if (pardev) {
+		if (mts->pardev_claimed)
+			parport_release(pardev);
+		parport_unregister_device(pardev);
+	}
+
+	snd_mts64_free(mts);
+}
+
+static int __devinit snd_mts64_probe(struct platform_device *pdev)
+{
+	struct pardevice *pardev;
+	struct parport *p;
+	int dev = pdev->id;
+	struct snd_card *card = NULL;
+	struct mts64 *mts = NULL;
+	int err;
+
+	p = platform_get_drvdata(pdev);
+	platform_set_drvdata(pdev, NULL);
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+	if (!enable[dev]) 
+		return -ENOENT;
+	if ((err = snd_mts64_probe_port(p)) < 0)
+		return err;
+
+	card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+	if (card == NULL) {
+		snd_printd("Cannot create card\n");
+		return -ENOMEM;
+	}
+	strcpy(card->driver, DRIVER_NAME);
+	strcpy(card->shortname, "ESI " CARD_NAME);
+	sprintf(card->longname,  "%s at 0x%lx, irq %i", 
+		card->shortname, p->base, p->irq);
+
+	pardev = parport_register_device(p,                   /* port */
+					 DRIVER_NAME,         /* name */
+					 NULL,                /* preempt */
+					 NULL,                /* wakeup */
+					 snd_mts64_interrupt, /* ISR */
+					 PARPORT_DEV_EXCL,    /* flags */
+					 (void *)card);       /* private */
+	if (pardev == NULL) {
+		snd_printd("Cannot register pardevice\n");
+		err = -EIO;
+		goto __err;
+	}
+
+	if ((err = snd_mts64_create(card, pardev, &mts)) < 0) {
+		snd_printd("Cannot create main component\n");
+		parport_unregister_device(pardev);
+		goto __err;
+	}
+	card->private_data = mts;
+	card->private_free = snd_mts64_card_private_free;
+	
+	if ((err = snd_mts64_rawmidi_create(card)) < 0) {
+		snd_printd("Creating Rawmidi component failed\n");
+		goto __err;
+	}
+
+	/* claim parport */
+	if (parport_claim(pardev)) {
+		snd_printd("Cannot claim parport 0x%lx\n", pardev->port->base);
+		err = -EIO;
+		goto __err;
+	}
+	mts->pardev_claimed = 1;
+
+	/* init device */
+	if ((err = mts64_device_init(p)) < 0)
+		goto __err;
+
+	platform_set_drvdata(pdev, card);
+
+	/* At this point card will be usable */
+	if ((err = snd_card_register(card)) < 0) {
+		snd_printd("Cannot register card\n");
+		goto __err;
+	}
+
+	snd_printk("ESI Miditerminal 4140 on 0x%lx\n", p->base);
+	return 0;
+
+__err:
+	snd_card_free(card);
+	return err;
+}
+
+static int snd_mts64_remove(struct platform_device *pdev)
+{
+	struct snd_card *card = platform_get_drvdata(pdev);
+
+	if (card)
+		snd_card_free(card);
+
+	return 0;
+}
+
+
+static struct platform_driver snd_mts64_driver = {
+	.probe  = snd_mts64_probe,
+	.remove = snd_mts64_remove,
+	.driver = {
+		.name = PLATFORM_DRIVER
+	}
+};
+
+/*********************************************************************
+ * module init stuff
+ *********************************************************************/
+static void snd_mts64_unregister_all(void)
+{
+	int i;
+
+	for (i = 0; i < SNDRV_CARDS; ++i) {
+		if (platform_devices[i]) {
+			platform_device_unregister(platform_devices[i]);
+			platform_devices[i] = NULL;
+		}
+	}		
+	platform_driver_unregister(&snd_mts64_driver);
+	parport_unregister_driver(&mts64_parport_driver);
+}
+
+static int __init snd_mts64_module_init(void)
+{
+	int err;
+
+	if ((err = platform_driver_register(&snd_mts64_driver)) < 0)
+		return err;
+
+	if (parport_register_driver(&mts64_parport_driver) != 0) {
+		platform_driver_unregister(&snd_mts64_driver);
+		return -EIO;
+	}
+
+	if (device_count == 0) {
+		snd_mts64_unregister_all();
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void __exit snd_mts64_module_exit(void)
+{
+	snd_mts64_unregister_all();
+}
+
+module_init(snd_mts64_module_init);
+module_exit(snd_mts64_module_exit);
diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c
index e552ec3..1679300 100644
--- a/sound/drivers/opl4/opl4_proc.c
+++ b/sound/drivers/opl4/opl4_proc.c
@@ -105,13 +105,13 @@
 					  struct file *file, long long offset, int orig)
 {
 	switch (orig) {
-	case 0: /* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1: /* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2: /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = entry->size + offset;
 		break;
 	default:
@@ -159,8 +159,7 @@
 
 void snd_opl4_free_proc(struct snd_opl4 *opl4)
 {
-	if (opl4->proc_entry)
-		snd_info_unregister(opl4->proc_entry);
+	snd_info_free_entry(opl4->proc_entry);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/drivers/vx/vx_mixer.c b/sound/drivers/vx/vx_mixer.c
index c1d7fcd..1613ed8 100644
--- a/sound/drivers/vx/vx_mixer.c
+++ b/sound/drivers/vx/vx_mixer.c
@@ -23,6 +23,7 @@
 #include <sound/driver.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/vx_core.h>
 #include "vx_cmd.h"
 
@@ -455,10 +456,13 @@
 
 static struct snd_kcontrol_new vx_control_output_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Master Playback Volume",
 	.info =		vx_output_level_info,
 	.get =		vx_output_level_get,
 	.put =		vx_output_level_put,
+	/* tlv will be filled later */
 };
 
 /*
@@ -712,12 +716,17 @@
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_audio_gain, -10975, 25, 0);
+
 static struct snd_kcontrol_new vx_control_audio_gain = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	.info =         vx_audio_gain_info,
 	.get =          vx_audio_gain_get,
-	.put =          vx_audio_gain_put
+	.put =          vx_audio_gain_put,
+	.tlv = { .p = db_scale_audio_gain },
 };
 static struct snd_kcontrol_new vx_control_output_switch = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -729,9 +738,12 @@
 static struct snd_kcontrol_new vx_control_monitor_gain = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name =         "Monitoring Volume",
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.info =         vx_audio_gain_info,	/* shared */
 	.get =          vx_audio_monitor_get,
-	.put =          vx_audio_monitor_put
+	.put =          vx_audio_monitor_put,
+	.tlv = { .p = db_scale_audio_gain },
 };
 static struct snd_kcontrol_new vx_control_monitor_switch = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -918,6 +930,7 @@
 	for (i = 0; i < chip->hw->num_outs; i++) {
 		temp = vx_control_output_level;
 		temp.index = i;
+		temp.tlv.p = chip->hw->output_level_db_scale;
 		if ((err = snd_ctl_add(card, snd_ctl_new1(&temp, chip))) < 0)
 			return err;
 	}
diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c
index dc7cc20..5da49e2 100644
--- a/sound/i2c/other/ak4xxx-adda.c
+++ b/sound/i2c/other/ak4xxx-adda.c
@@ -28,12 +28,14 @@
 #include <linux/init.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/ak4xxx-adda.h>
 
 MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>, Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("Routines for control of AK452x / AK43xx  AD/DA converters");
 MODULE_LICENSE("GPL");
 
+/* write the given register and save the data to the cache */
 void snd_akm4xxx_write(struct snd_akm4xxx *ak, int chip, unsigned char reg,
 		       unsigned char val)
 {
@@ -41,15 +43,7 @@
 	ak->ops.write(ak, chip, reg, val);
 
 	/* save the data */
-	if (ak->type == SND_AK4524 || ak->type == SND_AK4528) {
-		if ((reg != 0x04 && reg != 0x05) || (val & 0x80) == 0)
-			snd_akm4xxx_set(ak, chip, reg, val);
-		else
-			snd_akm4xxx_set_ipga(ak, chip, reg, val);
-	} else {
-		/* AK4529, or else */
-		snd_akm4xxx_set(ak, chip, reg, val);
-	}
+	snd_akm4xxx_set(ak, chip, reg, val);
 	ak->ops.unlock(ak, chip);
 }
 
@@ -73,12 +67,6 @@
 		for (reg = 0x04; reg < maxreg; reg++)
 			snd_akm4xxx_write(ak, chip, reg,
 					  snd_akm4xxx_get(ak, chip, reg));
-		if (ak->type == SND_AK4528)
-			continue;
-		/* IPGA */
-		for (reg = 0x04; reg < 0x06; reg++)
-			snd_akm4xxx_write(ak, chip, reg,
-					  snd_akm4xxx_get_ipga(ak, chip, reg));
 	}
 }
 
@@ -137,11 +125,48 @@
 	case SND_AK4381:
 		ak4381_reset(ak, state);
 		break;
+	default:
+		break;
 	}
 }
 
 EXPORT_SYMBOL(snd_akm4xxx_reset);
 
+
+/*
+ * Volume conversion table for non-linear volumes
+ * from -63.5dB (mute) to 0dB step 0.5dB
+ *
+ * Used for AK4524 input/ouput attenuation, AK4528, and
+ * AK5365 input attenuation
+ */
+static unsigned char vol_cvt_datt[128] = {
+	0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04,
+	0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06,
+	0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x09, 0x0a,
+	0x0a, 0x0b, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f,
+	0x10, 0x10, 0x11, 0x12, 0x12, 0x13, 0x13, 0x14,
+	0x15, 0x16, 0x17, 0x17, 0x18, 0x19, 0x1a, 0x1c,
+	0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x23,
+	0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, 0x2b, 0x2d,
+	0x2e, 0x30, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
+	0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3e, 0x3f, 0x40,
+	0x41, 0x42, 0x43, 0x44, 0x46, 0x47, 0x48, 0x4a,
+	0x4b, 0x4d, 0x4e, 0x50, 0x51, 0x52, 0x53, 0x54,
+	0x55, 0x56, 0x58, 0x59, 0x5b, 0x5c, 0x5e, 0x5f,
+	0x60, 0x61, 0x62, 0x64, 0x65, 0x66, 0x67, 0x69,
+	0x6a, 0x6c, 0x6d, 0x6f, 0x70, 0x71, 0x72, 0x73,
+	0x75, 0x76, 0x77, 0x79, 0x7a, 0x7c, 0x7d, 0x7f,
+};
+
+/*
+ * dB tables
+ */
+static DECLARE_TLV_DB_SCALE(db_scale_vol_datt, -6350, 50, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_8bit, -12750, 50, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_7bit, -6350, 50, 1);
+static DECLARE_TLV_DB_LINEAR(db_scale_linear, TLV_DB_GAIN_MUTE, 0);
+
 /*
  * initialize all the ak4xxx chips
  */
@@ -155,8 +180,6 @@
 		0x01, 0x03, /* 1: ADC/DAC enable */
 		0x04, 0x00, /* 4: ADC left muted */
 		0x05, 0x00, /* 5: ADC right muted */
-		0x04, 0x80, /* 4: ADC IPGA gain 0dB */
-		0x05, 0x80, /* 5: ADC IPGA gain 0dB */
 		0x06, 0x00, /* 6: DAC left muted */
 		0x07, 0x00, /* 7: DAC right muted */
 		0xff, 0xff
@@ -238,6 +261,9 @@
 	int chip, num_chips;
 	unsigned char *ptr, reg, data, *inits;
 
+	memset(ak->images, 0, sizeof(ak->images));
+	memset(ak->volumes, 0, sizeof(ak->volumes));
+
 	switch (ak->type) {
 	case SND_AK4524:
 		inits = inits_ak4524;
@@ -263,6 +289,9 @@
 		inits = inits_ak4381;
 		num_chips = ak->num_dacs / 2;
 		break;
+	case SND_AK5365:
+		/* FIXME: any init sequence? */
+		return;
 	default:
 		snd_BUG();
 		return;
@@ -280,14 +309,23 @@
 
 EXPORT_SYMBOL(snd_akm4xxx_init);
 
+/*
+ * Mixer callbacks
+ */
+#define AK_IPGA 			(1<<20)	/* including IPGA */
+#define AK_VOL_CVT 			(1<<21)	/* need dB conversion */
+#define AK_NEEDSMSB 			(1<<22)	/* need MSB update bit */
+#define AK_INVERT 			(1<<23)	/* data is inverted */
 #define AK_GET_CHIP(val)		(((val) >> 8) & 0xff)
 #define AK_GET_ADDR(val)		((val) & 0xff)
-#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x7f)
+#define AK_GET_SHIFT(val)		(((val) >> 16) & 0x0f)
+#define AK_GET_VOL_CVT(val)		(((val) >> 21) & 1)
+#define AK_GET_IPGA(val)		(((val) >> 20) & 1)
+#define AK_GET_NEEDSMSB(val)		(((val) >> 22) & 1)
 #define AK_GET_INVERT(val)		(((val) >> 23) & 1)
 #define AK_GET_MASK(val)		(((val) >> 24) & 0xff)
 #define AK_COMPOSE(chip,addr,shift,mask) \
 	(((chip) << 8) | (addr) | ((shift) << 16) | ((mask) << 24))
-#define AK_INVERT 			(1<<23)
 
 static int snd_akm4xxx_volume_info(struct snd_kcontrol *kcontrol,
 				   struct snd_ctl_elem_info *uinfo)
@@ -307,31 +345,39 @@
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
-	
-	ucontrol->value.integer.value[0] = invert ? mask - val : val;
+
+	ucontrol->value.integer.value[0] = snd_akm4xxx_get_vol(ak, chip, addr);
 	return 0;
 }
 
+static int put_ak_reg(struct snd_kcontrol *kcontrol, int addr,
+		      unsigned char nval)
+{
+	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
+	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
+	int chip = AK_GET_CHIP(kcontrol->private_value);
+
+	if (snd_akm4xxx_get_vol(ak, chip, addr) == nval)
+		return 0;
+
+	snd_akm4xxx_set_vol(ak, chip, addr, nval);
+	if (AK_GET_VOL_CVT(kcontrol->private_value) && nval < 128)
+		nval = vol_cvt_datt[nval];
+	if (AK_GET_IPGA(kcontrol->private_value) && nval >= 128)
+		nval++; /* need to correct + 1 since both 127 and 128 are 0dB */
+	if (AK_GET_INVERT(kcontrol->private_value))
+		nval = mask - nval;
+	if (AK_GET_NEEDSMSB(kcontrol->private_value))
+		nval |= 0x80;
+	snd_akm4xxx_write(ak, chip, addr, nval);
+	return 1;
+}
+
 static int snd_akm4xxx_volume_put(struct snd_kcontrol *kcontrol,
 				  struct snd_ctl_elem_value *ucontrol)
 {
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char nval = ucontrol->value.integer.value[0] % (mask+1);
-	int change;
-
-	if (invert)
-		nval = mask - nval;
-	change = snd_akm4xxx_get(ak, chip, addr) != nval;
-	if (change)
-		snd_akm4xxx_write(ak, chip, addr, nval);
-	return change;
+	return put_ak_reg(kcontrol, AK_GET_ADDR(kcontrol->private_value),
+			  ucontrol->value.integer.value[0]);
 }
 
 static int snd_akm4xxx_stereo_volume_info(struct snd_kcontrol *kcontrol,
@@ -352,77 +398,21 @@
 	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
 	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
-	
-	ucontrol->value.integer.value[0] = invert ? mask - val : val;
 
-	val = snd_akm4xxx_get(ak, chip, addr+1);
-	ucontrol->value.integer.value[1] = invert ? mask - val : val;
-
+	ucontrol->value.integer.value[0] = snd_akm4xxx_get_vol(ak, chip, addr);
+	ucontrol->value.integer.value[1] = snd_akm4xxx_get_vol(ak, chip, addr+1);
 	return 0;
 }
 
 static int snd_akm4xxx_stereo_volume_put(struct snd_kcontrol *kcontrol,
 					 struct snd_ctl_elem_value *ucontrol)
 {
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
 	int addr = AK_GET_ADDR(kcontrol->private_value);
-	int invert = AK_GET_INVERT(kcontrol->private_value);
-	unsigned int mask = AK_GET_MASK(kcontrol->private_value);
-	unsigned char nval = ucontrol->value.integer.value[0] % (mask+1);
-	int change0, change1;
+	int change;
 
-	if (invert)
-		nval = mask - nval;
-	change0 = snd_akm4xxx_get(ak, chip, addr) != nval;
-	if (change0)
-		snd_akm4xxx_write(ak, chip, addr, nval);
-
-	nval = ucontrol->value.integer.value[1] % (mask+1);
-	if (invert)
-		nval = mask - nval;
-	change1 = snd_akm4xxx_get(ak, chip, addr+1) != nval;
-	if (change1)
-		snd_akm4xxx_write(ak, chip, addr+1, nval);
-
-
-	return change0 || change1;
-}
-
-static int snd_akm4xxx_ipga_gain_info(struct snd_kcontrol *kcontrol,
-				      struct snd_ctl_elem_info *uinfo)
-{
-	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
-	uinfo->count = 1;
-	uinfo->value.integer.min = 0;
-	uinfo->value.integer.max = 36;
-	return 0;
-}
-
-static int snd_akm4xxx_ipga_gain_get(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-	ucontrol->value.integer.value[0] =
-		snd_akm4xxx_get_ipga(ak, chip, addr) & 0x7f;
-	return 0;
-}
-
-static int snd_akm4xxx_ipga_gain_put(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
-	int chip = AK_GET_CHIP(kcontrol->private_value);
-	int addr = AK_GET_ADDR(kcontrol->private_value);
-	unsigned char nval = (ucontrol->value.integer.value[0] % 37) | 0x80;
-	int change = snd_akm4xxx_get_ipga(ak, chip, addr) != nval;
-	if (change)
-		snd_akm4xxx_write(ak, chip, addr, nval);
+	change = put_ak_reg(kcontrol, addr, ucontrol->value.integer.value[0]);
+	change |= put_ak_reg(kcontrol, addr + 1,
+			     ucontrol->value.integer.value[1]);
 	return change;
 }
 
@@ -472,179 +462,280 @@
 	return change;
 }
 
+static int ak4xxx_switch_info(struct snd_kcontrol *kcontrol,
+			      struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 1;
+	return 0;
+}
+
+static int ak4xxx_switch_get(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
+	int chip = AK_GET_CHIP(kcontrol->private_value);
+	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int shift = AK_GET_SHIFT(kcontrol->private_value);
+	int invert = AK_GET_INVERT(kcontrol->private_value);
+	unsigned char val = snd_akm4xxx_get(ak, chip, addr);
+
+	if (invert)
+		val = ! val;
+	ucontrol->value.integer.value[0] = (val & (1<<shift)) != 0;
+	return 0;
+}
+
+static int ak4xxx_switch_put(struct snd_kcontrol *kcontrol,
+			     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol);
+	int chip = AK_GET_CHIP(kcontrol->private_value);
+	int addr = AK_GET_ADDR(kcontrol->private_value);
+	int shift = AK_GET_SHIFT(kcontrol->private_value);
+	int invert = AK_GET_INVERT(kcontrol->private_value);
+	long flag = ucontrol->value.integer.value[0];
+	unsigned char val, oval;
+	int change;
+
+	if (invert)
+		flag = ! flag;
+	oval = snd_akm4xxx_get(ak, chip, addr);
+	if (flag)
+		val = oval | (1<<shift);
+	else
+		val = oval & ~(1<<shift);
+	change = (oval != val);
+	if (change)
+		snd_akm4xxx_write(ak, chip, addr, val);
+	return change;
+}
+
 /*
  * build AK4xxx controls
  */
 
-int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
+static int build_dac_controls(struct snd_akm4xxx *ak)
 {
-	unsigned int idx, num_emphs;
-	struct snd_kcontrol *ctl;
-	int err;
-	int mixer_ch = 0;
-	int num_stereo;
+	int idx, err, mixer_ch, num_stereo;
+	struct snd_kcontrol_new knew;
 
-	ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
-	if (! ctl)
-		return -ENOMEM;
-
+	mixer_ch = 0;
 	for (idx = 0; idx < ak->num_dacs; ) {
-		memset(ctl, 0, sizeof(*ctl));
-		if (ak->channel_names == NULL) {
-			strcpy(ctl->id.name, "DAC Volume");
+		memset(&knew, 0, sizeof(knew));
+		if (! ak->dac_info || ! ak->dac_info[mixer_ch].name) {
+			knew.name = "DAC Volume";
+			knew.index = mixer_ch + ak->idx_offset * 2;
 			num_stereo = 1;
-			ctl->id.index = mixer_ch + ak->idx_offset * 2;
 		} else {
-			strcpy(ctl->id.name, ak->channel_names[mixer_ch]);
-			num_stereo = ak->num_stereo[mixer_ch];
-			ctl->id.index = 0;
+			knew.name = ak->dac_info[mixer_ch].name;
+			num_stereo = ak->dac_info[mixer_ch].num_channels;
 		}
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
+		knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		knew.count = 1;
+		knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ;
 		if (num_stereo == 2) {
-			ctl->info = snd_akm4xxx_stereo_volume_info;
-			ctl->get = snd_akm4xxx_stereo_volume_get;
-			ctl->put = snd_akm4xxx_stereo_volume_put;
+			knew.info = snd_akm4xxx_stereo_volume_info;
+			knew.get = snd_akm4xxx_stereo_volume_get;
+			knew.put = snd_akm4xxx_stereo_volume_put;
 		} else {
-			ctl->info = snd_akm4xxx_volume_info;
-			ctl->get = snd_akm4xxx_volume_get;
-			ctl->put = snd_akm4xxx_volume_put;
+			knew.info = snd_akm4xxx_volume_info;
+			knew.get = snd_akm4xxx_volume_get;
+			knew.put = snd_akm4xxx_volume_put;
 		}
 		switch (ak->type) {
 		case SND_AK4524:
 			/* register 6 & 7 */
-			ctl->private_value =
-				AK_COMPOSE(idx/2, (idx%2) + 6, 0, 127);
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 6, 0, 127) |
+				AK_VOL_CVT;
+			knew.tlv.p = db_scale_vol_datt;
 			break;
 		case SND_AK4528:
 			/* register 4 & 5 */
-			ctl->private_value =
-				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127);
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127) |
+				AK_VOL_CVT;
+			knew.tlv.p = db_scale_vol_datt;
 			break;
 		case SND_AK4529: {
 			/* registers 2-7 and b,c */
 			int val = idx < 6 ? idx + 2 : (idx - 6) + 0xb;
-			ctl->private_value =
+			knew.private_value =
 				AK_COMPOSE(0, val, 0, 255) | AK_INVERT;
+			knew.tlv.p = db_scale_8bit;
 			break;
 		}
 		case SND_AK4355:
 			/* register 4-9, chip #0 only */
-			ctl->private_value = AK_COMPOSE(0, idx + 4, 0, 255);
+			knew.private_value = AK_COMPOSE(0, idx + 4, 0, 255);
+			knew.tlv.p = db_scale_8bit;
 			break;
-		case SND_AK4358:
-			if (idx >= 6)
-				/* register 4-9, chip #0 only */
-				ctl->private_value =
-					AK_COMPOSE(0, idx + 5, 0, 255);
-			else
-				/* register 4-9, chip #0 only */
-				ctl->private_value =
-					AK_COMPOSE(0, idx + 4, 0, 255);
+		case SND_AK4358: {
+			/* register 4-9 and 11-12, chip #0 only */
+			int  addr = idx < 6 ? idx + 4 : idx + 5;
+			knew.private_value =
+				AK_COMPOSE(0, addr, 0, 127) | AK_NEEDSMSB;
+			knew.tlv.p = db_scale_7bit;
 			break;
+		}
 		case SND_AK4381:
 			/* register 3 & 4 */
-			ctl->private_value =
+			knew.private_value =
 				AK_COMPOSE(idx/2, (idx%2) + 3, 0, 255);
+			knew.tlv.p = db_scale_linear;
 			break;
 		default:
-			err = -EINVAL;
-			goto __error;
+			return -EINVAL;
 		}
 
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
+			return err;
 
 		idx += num_stereo;
 		mixer_ch++;
 	}
-	for (idx = 0; idx < ak->num_adcs && ak->type == SND_AK4524; ++idx) {
-		memset(ctl, 0, sizeof(*ctl));
-		strcpy(ctl->id.name, "ADC Volume");
-		ctl->id.index = idx + ak->idx_offset * 2;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_volume_info;
-		ctl->get = snd_akm4xxx_volume_get;
-		ctl->put = snd_akm4xxx_volume_put;
-		/* register 4 & 5 */
-		ctl->private_value =
-			AK_COMPOSE(idx/2, (idx%2) + 4, 0, 127);
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
-		if (err < 0)
-			goto __error;
+	return 0;
+}
 
-		memset(ctl, 0, sizeof(*ctl));
-		strcpy(ctl->id.name, "IPGA Analog Capture Volume");
-		ctl->id.index = idx + ak->idx_offset * 2;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_ipga_gain_info;
-		ctl->get = snd_akm4xxx_ipga_gain_get;
-		ctl->put = snd_akm4xxx_ipga_gain_put;
+static int build_adc_controls(struct snd_akm4xxx *ak)
+{
+	int idx, err, mixer_ch, num_stereo;
+	struct snd_kcontrol_new knew;
+
+	mixer_ch = 0;
+	for (idx = 0; idx < ak->num_adcs;) {
+		memset(&knew, 0, sizeof(knew));
+		if (! ak->adc_info || ! ak->adc_info[mixer_ch].name) {
+			knew.name = "ADC Volume";
+			knew.index = mixer_ch + ak->idx_offset * 2;
+			num_stereo = 1;
+		} else {
+			knew.name = ak->adc_info[mixer_ch].name;
+			num_stereo = ak->adc_info[mixer_ch].num_channels;
+		}
+		knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		knew.count = 1;
+		knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		if (num_stereo == 2) {
+			knew.info = snd_akm4xxx_stereo_volume_info;
+			knew.get = snd_akm4xxx_stereo_volume_get;
+			knew.put = snd_akm4xxx_stereo_volume_put;
+		} else {
+			knew.info = snd_akm4xxx_volume_info;
+			knew.get = snd_akm4xxx_volume_get;
+			knew.put = snd_akm4xxx_volume_put;
+		}
 		/* register 4 & 5 */
-		ctl->private_value = AK_COMPOSE(idx/2, (idx%2) + 4, 0, 0);
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		if (ak->type == SND_AK5365)
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 151) |
+				AK_VOL_CVT | AK_IPGA;
+		else
+			knew.private_value =
+				AK_COMPOSE(idx/2, (idx%2) + 4, 0, 163) |
+				AK_VOL_CVT | AK_IPGA;
+		knew.tlv.p = db_scale_vol_datt;
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
+			return err;
+
+		if (ak->type == SND_AK5365 && (idx % 2) == 0) {
+			if (! ak->adc_info || 
+			    ! ak->adc_info[mixer_ch].switch_name)
+				knew.name = "Capture Switch";
+			else
+				knew.name = ak->adc_info[mixer_ch].switch_name;
+			knew.info = ak4xxx_switch_info;
+			knew.get = ak4xxx_switch_get;
+			knew.put = ak4xxx_switch_put;
+			knew.access = 0;
+			/* register 2, bit 0 (SMUTE): 0 = normal operation,
+			   1 = mute */
+			knew.private_value =
+				AK_COMPOSE(idx/2, 2, 0, 0) | AK_INVERT;
+			err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
+			if (err < 0)
+				return err;
+		}
+
+		idx += num_stereo;
+		mixer_ch++;
 	}
-	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
-		num_emphs = 1;
-	else
-		num_emphs = ak->num_dacs / 2;
+	return 0;
+}
+
+static int build_deemphasis(struct snd_akm4xxx *ak, int num_emphs)
+{
+	int idx, err;
+	struct snd_kcontrol_new knew;
+
 	for (idx = 0; idx < num_emphs; idx++) {
-		memset(ctl, 0, sizeof(*ctl));
-		strcpy(ctl->id.name, "Deemphasis");
-		ctl->id.index = idx + ak->idx_offset;
-		ctl->id.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
-		ctl->count = 1;
-		ctl->info = snd_akm4xxx_deemphasis_info;
-		ctl->get = snd_akm4xxx_deemphasis_get;
-		ctl->put = snd_akm4xxx_deemphasis_put;
+		memset(&knew, 0, sizeof(knew));
+		knew.name = "Deemphasis";
+		knew.index = idx + ak->idx_offset;
+		knew.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+		knew.count = 1;
+		knew.info = snd_akm4xxx_deemphasis_info;
+		knew.get = snd_akm4xxx_deemphasis_get;
+		knew.put = snd_akm4xxx_deemphasis_put;
 		switch (ak->type) {
 		case SND_AK4524:
 		case SND_AK4528:
 			/* register 3 */
-			ctl->private_value = AK_COMPOSE(idx, 3, 0, 0);
+			knew.private_value = AK_COMPOSE(idx, 3, 0, 0);
 			break;
 		case SND_AK4529: {
 			int shift = idx == 3 ? 6 : (2 - idx) * 2;
 			/* register 8 with shift */
-			ctl->private_value = AK_COMPOSE(0, 8, shift, 0);
+			knew.private_value = AK_COMPOSE(0, 8, shift, 0);
 			break;
 		}
 		case SND_AK4355:
 		case SND_AK4358:
-			ctl->private_value = AK_COMPOSE(idx, 3, 0, 0);
+			knew.private_value = AK_COMPOSE(idx, 3, 0, 0);
 			break;
 		case SND_AK4381:
-			ctl->private_value = AK_COMPOSE(idx, 1, 1, 0);
+			knew.private_value = AK_COMPOSE(idx, 1, 1, 0);
 			break;
+		default:
+			return -EINVAL;
 		}
-		ctl->private_data = ak;
-		err = snd_ctl_add(ak->card,
-				  snd_ctl_new(ctl, SNDRV_CTL_ELEM_ACCESS_READ|
-					      SNDRV_CTL_ELEM_ACCESS_WRITE));
+		err = snd_ctl_add(ak->card, snd_ctl_new1(&knew, ak));
 		if (err < 0)
-			goto __error;
+			return err;
 	}
-	err = 0;
-
- __error:
-	kfree(ctl);
-	return err;
+	return 0;
 }
 
+int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak)
+{
+	int err, num_emphs;
+
+	err = build_dac_controls(ak);
+	if (err < 0)
+		return err;
+
+	err = build_adc_controls(ak);
+	if (err < 0)
+		return err;
+
+	if (ak->type == SND_AK4355 || ak->type == SND_AK4358)
+		num_emphs = 1;
+	else
+		num_emphs = ak->num_dacs / 2;
+	err = build_deemphasis(ak, num_emphs);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+	
 EXPORT_SYMBOL(snd_akm4xxx_build_controls);
 
 static int __init alsa_akm4xxx_module_init(void)
diff --git a/sound/isa/ad1816a/ad1816a_lib.c b/sound/isa/ad1816a/ad1816a_lib.c
index 8fcf2c1..fd9b61e 100644
--- a/sound/isa/ad1816a/ad1816a_lib.c
+++ b/sound/isa/ad1816a/ad1816a_lib.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <sound/core.h>
+#include <sound/tlv.h>
 #include <sound/ad1816a.h>
 
 #include <asm/io.h>
@@ -765,6 +766,13 @@
 	return change;
 }
 
+#define AD1816A_SINGLE_TLV(xname, reg, shift, mask, invert, xtlv)	\
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .info = snd_ad1816a_info_single, \
+  .get = snd_ad1816a_get_single, .put = snd_ad1816a_put_single, \
+  .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
 #define AD1816A_SINGLE(xname, reg, shift, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_ad1816a_info_single, \
   .get = snd_ad1816a_get_single, .put = snd_ad1816a_put_single, \
@@ -822,6 +830,14 @@
 	return change;
 }
 
+#define AD1816A_DOUBLE_TLV(xname, reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .info = snd_ad1816a_info_double,		\
+  .get = snd_ad1816a_get_double, .put = snd_ad1816a_put_double, \
+  .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
+
 #define AD1816A_DOUBLE(xname, reg, shift_left, shift_right, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_ad1816a_info_double, \
   .get = snd_ad1816a_get_double, .put = snd_ad1816a_put_double, \
@@ -890,28 +906,44 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_4bit, -4500, 300, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit, -4650, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0);
+
 static struct snd_kcontrol_new snd_ad1816a_controls[] __devinitdata = {
 AD1816A_DOUBLE("Master Playback Switch", AD1816A_MASTER_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Master Playback Volume", AD1816A_MASTER_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Master Playback Volume", AD1816A_MASTER_ATT, 8, 0, 31, 1,
+		   db_scale_5bit),
 AD1816A_DOUBLE("PCM Playback Switch", AD1816A_VOICE_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("PCM Playback Volume", AD1816A_VOICE_ATT, 8, 0, 63, 1),
+AD1816A_DOUBLE_TLV("PCM Playback Volume", AD1816A_VOICE_ATT, 8, 0, 63, 1,
+		   db_scale_6bit),
 AD1816A_DOUBLE("Line Playback Switch", AD1816A_LINE_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Line Playback Volume", AD1816A_LINE_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Line Playback Volume", AD1816A_LINE_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_DOUBLE("CD Playback Switch", AD1816A_CD_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("CD Playback Volume", AD1816A_CD_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("CD Playback Volume", AD1816A_CD_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_DOUBLE("Synth Playback Switch", AD1816A_SYNTH_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Synth Playback Volume", AD1816A_SYNTH_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Synth Playback Volume", AD1816A_SYNTH_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_DOUBLE("FM Playback Switch", AD1816A_FM_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("FM Playback Volume", AD1816A_FM_ATT, 8, 0, 63, 1),
+AD1816A_DOUBLE_TLV("FM Playback Volume", AD1816A_FM_ATT, 8, 0, 63, 1,
+		   db_scale_6bit),
 AD1816A_SINGLE("Mic Playback Switch", AD1816A_MIC_GAIN_ATT, 15, 1, 1),
-AD1816A_SINGLE("Mic Playback Volume", AD1816A_MIC_GAIN_ATT, 8, 31, 1),
+AD1816A_SINGLE_TLV("Mic Playback Volume", AD1816A_MIC_GAIN_ATT, 8, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_SINGLE("Mic Boost", AD1816A_MIC_GAIN_ATT, 14, 1, 0),
 AD1816A_DOUBLE("Video Playback Switch", AD1816A_VID_GAIN_ATT, 15, 7, 1, 1),
-AD1816A_DOUBLE("Video Playback Volume", AD1816A_VID_GAIN_ATT, 8, 0, 31, 1),
+AD1816A_DOUBLE_TLV("Video Playback Volume", AD1816A_VID_GAIN_ATT, 8, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 AD1816A_SINGLE("Phone Capture Switch", AD1816A_PHONE_IN_GAIN_ATT, 15, 1, 1),
-AD1816A_SINGLE("Phone Capture Volume", AD1816A_PHONE_IN_GAIN_ATT, 0, 15, 1),
+AD1816A_SINGLE_TLV("Phone Capture Volume", AD1816A_PHONE_IN_GAIN_ATT, 0, 15, 1,
+		   db_scale_4bit),
 AD1816A_SINGLE("Phone Playback Switch", AD1816A_PHONE_OUT_ATT, 7, 1, 1),
-AD1816A_SINGLE("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1),
+AD1816A_SINGLE_TLV("Phone Playback Volume", AD1816A_PHONE_OUT_ATT, 0, 31, 1,
+		   db_scale_5bit),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Capture Source",
@@ -920,7 +952,8 @@
 	.put = snd_ad1816a_put_mux,
 },
 AD1816A_DOUBLE("Capture Switch", AD1816A_ADC_PGA, 15, 7, 1, 1),
-AD1816A_DOUBLE("Capture Volume", AD1816A_ADC_PGA, 8, 0, 15, 0),
+AD1816A_DOUBLE_TLV("Capture Volume", AD1816A_ADC_PGA, 8, 0, 15, 0,
+		   db_scale_rec_gain),
 AD1816A_SINGLE("3D Control - Switch", AD1816A_3D_PHAT_CTRL, 15, 1, 1),
 AD1816A_SINGLE("3D Control - Level", AD1816A_3D_PHAT_CTRL, 0, 15, 0),
 };
diff --git a/sound/isa/ad1848/ad1848_lib.c b/sound/isa/ad1848/ad1848_lib.c
index e711f87..a6fbd5d 100644
--- a/sound/isa/ad1848/ad1848_lib.c
+++ b/sound/isa/ad1848/ad1848_lib.c
@@ -29,6 +29,7 @@
 #include <sound/core.h>
 #include <sound/ad1848.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/pcm_params.h>
 
 #include <asm/io.h>
@@ -118,6 +119,8 @@
 #endif
 }
 
+EXPORT_SYMBOL(snd_ad1848_out);
+
 static void snd_ad1848_dout(struct snd_ad1848 *chip,
 			    unsigned char reg, unsigned char value)
 {
@@ -941,6 +944,8 @@
 	return 0;
 }
 
+EXPORT_SYMBOL(snd_ad1848_create);
+
 static struct snd_pcm_ops snd_ad1848_playback_ops = {
 	.open =		snd_ad1848_playback_open,
 	.close =	snd_ad1848_playback_close,
@@ -988,12 +993,16 @@
 	return 0;
 }
 
+EXPORT_SYMBOL(snd_ad1848_pcm);
+
 const struct snd_pcm_ops *snd_ad1848_get_pcm_ops(int direction)
 {
 	return direction == SNDRV_PCM_STREAM_PLAYBACK ?
 		&snd_ad1848_playback_ops : &snd_ad1848_capture_ops;
 }
 
+EXPORT_SYMBOL(snd_ad1848_get_pcm_ops);
+
 /*
  *  MIXER part
  */
@@ -1171,7 +1180,8 @@
 
 /*
  */
-int snd_ad1848_add_ctl(struct snd_ad1848 *chip, const char *name, int index, int type, unsigned long value)
+int snd_ad1848_add_ctl_elem(struct snd_ad1848 *chip,
+			    const struct ad1848_mix_elem *c)
 {
 	static struct snd_kcontrol_new newctls[] = {
 		[AD1848_MIX_SINGLE] = {
@@ -1196,32 +1206,46 @@
 	struct snd_kcontrol *ctl;
 	int err;
 
-	ctl = snd_ctl_new1(&newctls[type], chip);
+	ctl = snd_ctl_new1(&newctls[c->type], chip);
 	if (! ctl)
 		return -ENOMEM;
-	strlcpy(ctl->id.name, name, sizeof(ctl->id.name));
-	ctl->id.index = index;
-	ctl->private_value = value;
+	strlcpy(ctl->id.name, c->name, sizeof(ctl->id.name));
+	ctl->id.index = c->index;
+	ctl->private_value = c->private_value;
+	if (c->tlv) {
+		ctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		ctl->tlv.p = c->tlv;
+	}
 	if ((err = snd_ctl_add(chip->card, ctl)) < 0)
 		return err;
 	return 0;
 }
 
+EXPORT_SYMBOL(snd_ad1848_add_ctl_elem);
+
+static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0);
 
 static struct ad1848_mix_elem snd_ad1848_controls[] = {
 AD1848_DOUBLE("PCM Playback Switch", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 7, 7, 1, 1),
-AD1848_DOUBLE("PCM Playback Volume", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 0, 0, 63, 1),
+AD1848_DOUBLE_TLV("PCM Playback Volume", 0, AD1848_LEFT_OUTPUT, AD1848_RIGHT_OUTPUT, 0, 0, 63, 1,
+		  db_scale_6bit),
 AD1848_DOUBLE("Aux Playback Switch", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 7, 7, 1, 1),
-AD1848_DOUBLE("Aux Playback Volume", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 0, 0, 31, 1),
+AD1848_DOUBLE_TLV("Aux Playback Volume", 0, AD1848_AUX1_LEFT_INPUT, AD1848_AUX1_RIGHT_INPUT, 0, 0, 31, 1,
+		  db_scale_5bit_12db_max),
 AD1848_DOUBLE("Aux Playback Switch", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 7, 7, 1, 1),
-AD1848_DOUBLE("Aux Playback Volume", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 0, 0, 31, 1),
-AD1848_DOUBLE("Capture Volume", 0, AD1848_LEFT_INPUT, AD1848_RIGHT_INPUT, 0, 0, 15, 0),
+AD1848_DOUBLE_TLV("Aux Playback Volume", 1, AD1848_AUX2_LEFT_INPUT, AD1848_AUX2_RIGHT_INPUT, 0, 0, 31, 1,
+		  db_scale_5bit_12db_max),
+AD1848_DOUBLE_TLV("Capture Volume", 0, AD1848_LEFT_INPUT, AD1848_RIGHT_INPUT, 0, 0, 15, 0,
+		  db_scale_rec_gain),
 {
 	.name = "Capture Source",
 	.type = AD1848_MIX_CAPTURE,
 },
 AD1848_SINGLE("Loopback Capture Switch", 0, AD1848_LOOPBACK, 0, 1, 0),
-AD1848_SINGLE("Loopback Capture Volume", 0, AD1848_LOOPBACK, 1, 63, 0)
+AD1848_SINGLE_TLV("Loopback Capture Volume", 0, AD1848_LOOPBACK, 1, 63, 0,
+		  db_scale_6bit),
 };
                                         
 int snd_ad1848_mixer(struct snd_ad1848 *chip)
@@ -1245,12 +1269,7 @@
 	return 0;
 }
 
-EXPORT_SYMBOL(snd_ad1848_out);
-EXPORT_SYMBOL(snd_ad1848_create);
-EXPORT_SYMBOL(snd_ad1848_pcm);
-EXPORT_SYMBOL(snd_ad1848_get_pcm_ops);
 EXPORT_SYMBOL(snd_ad1848_mixer);
-EXPORT_SYMBOL(snd_ad1848_add_ctl);
 
 /*
  *  INIT part
diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c
index 34998de..8581820 100644
--- a/sound/isa/es18xx.c
+++ b/sound/isa/es18xx.c
@@ -2038,7 +2038,80 @@
 static struct platform_device *platform_devices[SNDRV_CARDS];
 
 #ifdef CONFIG_PNP
-static int pnp_registered;
+static int pnp_registered, pnpc_registered;
+
+static struct pnp_device_id snd_audiodrive_pnpbiosids[] = {
+	{ .id = "ESS1869" },
+	{ .id = "" }		/* end */
+};
+
+MODULE_DEVICE_TABLE(pnp, snd_audiodrive_pnpbiosids);
+
+/* PnP main device initialization */
+static int __devinit snd_audiodrive_pnp_init_main(int dev, struct pnp_dev *pdev,
+						  struct pnp_resource_table *cfg)
+{
+	int err;
+
+	pnp_init_resource_table(cfg);
+	if (port[dev] != SNDRV_AUTO_PORT)
+		pnp_resource_change(&cfg->port_resource[0], port[dev], 16);
+	if (fm_port[dev] != SNDRV_AUTO_PORT)
+		pnp_resource_change(&cfg->port_resource[1], fm_port[dev], 4);
+	if (mpu_port[dev] != SNDRV_AUTO_PORT)
+		pnp_resource_change(&cfg->port_resource[2], mpu_port[dev], 2);
+	if (dma1[dev] != SNDRV_AUTO_DMA)
+		pnp_resource_change(&cfg->dma_resource[0], dma1[dev], 1);
+	if (dma2[dev] != SNDRV_AUTO_DMA)
+		pnp_resource_change(&cfg->dma_resource[1], dma2[dev], 1);
+	if (irq[dev] != SNDRV_AUTO_IRQ)
+		pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1);
+	if (pnp_device_is_isapnp(pdev)) {
+		err = pnp_manual_config_dev(pdev, cfg, 0);
+		if (err < 0)
+			snd_printk(KERN_ERR PFX "PnP manual resources are invalid, using auto config\n");
+	}
+	err = pnp_activate_dev(pdev);
+	if (err < 0) {
+		snd_printk(KERN_ERR PFX "PnP configure failure (out of resources?)\n");
+		return -EBUSY;
+	}
+	/* ok. hack using Vendor-Defined Card-Level registers */
+	/* skip csn and logdev initialization - already done in isapnp_configure */
+	if (pnp_device_is_isapnp(pdev)) {
+		isapnp_cfg_begin(isapnp_card_number(pdev), isapnp_csn_number(pdev));
+		isapnp_write_byte(0x27, pnp_irq(pdev, 0));	/* Hardware Volume IRQ Number */
+		if (mpu_port[dev] != SNDRV_AUTO_PORT)
+			isapnp_write_byte(0x28, pnp_irq(pdev, 0)); /* MPU-401 IRQ Number */
+		isapnp_write_byte(0x72, pnp_irq(pdev, 0));	/* second IRQ */
+		isapnp_cfg_end();
+	}
+	port[dev] = pnp_port_start(pdev, 0);
+	fm_port[dev] = pnp_port_start(pdev, 1);
+	mpu_port[dev] = pnp_port_start(pdev, 2);
+	dma1[dev] = pnp_dma(pdev, 0);
+	dma2[dev] = pnp_dma(pdev, 1);
+	irq[dev] = pnp_irq(pdev, 0);
+	snd_printdd("PnP ES18xx: port=0x%lx, fm port=0x%lx, mpu port=0x%lx\n", port[dev], fm_port[dev], mpu_port[dev]);
+	snd_printdd("PnP ES18xx: dma1=%i, dma2=%i, irq=%i\n", dma1[dev], dma2[dev], irq[dev]);
+	return 0;
+}
+
+static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard,
+					struct pnp_dev *pdev)
+{
+	struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL);
+
+	if (!cfg)
+		return -ENOMEM;
+	acard->dev = pdev;
+	if (snd_audiodrive_pnp_init_main(dev, acard->dev, cfg) < 0) {
+		kfree(cfg);
+		return -EBUSY;
+	}
+	kfree(cfg);
+	return 0;
+}
 
 static struct pnp_card_device_id snd_audiodrive_pnpids[] = {
 	/* ESS 1868 (integrated on Compaq dual P-Pro motherboard and Genius 18PnP 3D) */
@@ -2061,13 +2134,11 @@
 
 MODULE_DEVICE_TABLE(pnp_card, snd_audiodrive_pnpids);
 
-static int __devinit snd_audiodrive_pnp(int dev, struct snd_audiodrive *acard,
+static int __devinit snd_audiodrive_pnpc(int dev, struct snd_audiodrive *acard,
 					struct pnp_card_link *card,
 					const struct pnp_card_device_id *id)
 {
-	struct pnp_dev *pdev;
 	struct pnp_resource_table * cfg = kmalloc(sizeof(struct pnp_resource_table), GFP_KERNEL);
-	int err;
 
 	if (!cfg)
 		return -ENOMEM;
@@ -2082,58 +2153,16 @@
 		return -EBUSY;
 	}
 	/* Control port initialization */
-	err = pnp_activate_dev(acard->devc);
-	if (err < 0) {
+	if (pnp_activate_dev(acard->devc) < 0) {
 		snd_printk(KERN_ERR PFX "PnP control configure failure (out of resources?)\n");
-		kfree(cfg);
 		return -EAGAIN;
 	}
 	snd_printdd("pnp: port=0x%llx\n",
 			(unsigned long long)pnp_port_start(acard->devc, 0));
-	/* PnP initialization */
-	pdev = acard->dev;
-	pnp_init_resource_table(cfg);
-	if (port[dev] != SNDRV_AUTO_PORT)
-		pnp_resource_change(&cfg->port_resource[0], port[dev], 16);
-	if (fm_port[dev] != SNDRV_AUTO_PORT)
-		pnp_resource_change(&cfg->port_resource[1], fm_port[dev], 4);
-	if (mpu_port[dev] != SNDRV_AUTO_PORT)
-		pnp_resource_change(&cfg->port_resource[2], mpu_port[dev], 2);
-	if (dma1[dev] != SNDRV_AUTO_DMA)
-		pnp_resource_change(&cfg->dma_resource[0], dma1[dev], 1);
-	if (dma2[dev] != SNDRV_AUTO_DMA)
-		pnp_resource_change(&cfg->dma_resource[1], dma2[dev], 1);
-	if (irq[dev] != SNDRV_AUTO_IRQ)
-		pnp_resource_change(&cfg->irq_resource[0], irq[dev], 1);
-	err = pnp_manual_config_dev(pdev, cfg, 0);
-	if (err < 0)
-		snd_printk(KERN_ERR PFX "PnP manual resources are invalid, using auto config\n");
-	err = pnp_activate_dev(pdev);
-	if (err < 0) {
-		snd_printk(KERN_ERR PFX "PnP configure failure (out of resources?)\n");
+	if (snd_audiodrive_pnp_init_main(dev, acard->dev, cfg) < 0) {
 		kfree(cfg);
 		return -EBUSY;
 	}
-	/* ok. hack using Vendor-Defined Card-Level registers */
-	/* skip csn and logdev initialization - already done in isapnp_configure */
-	if (pnp_device_is_isapnp(pdev)) {
-		isapnp_cfg_begin(isapnp_card_number(pdev), isapnp_csn_number(pdev));
-		isapnp_write_byte(0x27, pnp_irq(pdev, 0));	/* Hardware Volume IRQ Number */
-		if (mpu_port[dev] != SNDRV_AUTO_PORT)
-			isapnp_write_byte(0x28, pnp_irq(pdev, 0)); /* MPU-401 IRQ Number */
-		isapnp_write_byte(0x72, pnp_irq(pdev, 0));	/* second IRQ */
-		isapnp_cfg_end();
-	} else {
-		snd_printk(KERN_ERR PFX "unable to install ISA PnP hack, expect malfunction\n");
-	}
-	port[dev] = pnp_port_start(pdev, 0);
-	fm_port[dev] = pnp_port_start(pdev, 1);
-	mpu_port[dev] = pnp_port_start(pdev, 2);
-	dma1[dev] = pnp_dma(pdev, 0);
-	dma2[dev] = pnp_dma(pdev, 1);
-	irq[dev] = pnp_irq(pdev, 0);
-	snd_printdd("PnP ES18xx: port=0x%lx, fm port=0x%lx, mpu port=0x%lx\n", port[dev], fm_port[dev], mpu_port[dev]);
-	snd_printdd("PnP ES18xx: dma1=%i, dma2=%i, irq=%i\n", dma1[dev], dma2[dev], irq[dev]);
 	kfree(cfg);
 	return 0;
 }
@@ -2302,7 +2331,69 @@
 #ifdef CONFIG_PNP
 static unsigned int __devinitdata es18xx_pnp_devices;
 
-static int __devinit snd_audiodrive_pnp_detect(struct pnp_card_link *pcard,
+static int __devinit snd_audiodrive_pnp_detect(struct pnp_dev *pdev,
+					    const struct pnp_device_id *id)
+{
+	static int dev;
+	int err;
+	struct snd_card *card;
+
+	if (pnp_device_is_isapnp(pdev))
+		return -ENOENT;	/* we have another procedure - card */
+	for (; dev < SNDRV_CARDS; dev++) {
+		if (enable[dev] && isapnp[dev])
+			break;
+	}
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+
+	card = snd_es18xx_card_new(dev);
+	if (! card)
+		return -ENOMEM;
+	if ((err = snd_audiodrive_pnp(dev, card->private_data, pdev)) < 0) {
+		snd_card_free(card);
+		return err;
+	}
+	snd_card_set_dev(card, &pdev->dev);
+	if ((err = snd_audiodrive_probe(card, dev)) < 0) {
+		snd_card_free(card);
+		return err;
+	}
+	pnp_set_drvdata(pdev, card);
+	dev++;
+	es18xx_pnp_devices++;
+	return 0;
+}
+
+static void __devexit snd_audiodrive_pnp_remove(struct pnp_dev * pdev)
+{
+	snd_card_free(pnp_get_drvdata(pdev));
+	pnp_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int snd_audiodrive_pnp_suspend(struct pnp_dev *pdev, pm_message_t state)
+{
+	return snd_es18xx_suspend(pnp_get_drvdata(pdev), state);
+}
+static int snd_audiodrive_pnp_resume(struct pnp_dev *pdev)
+{
+	return snd_es18xx_resume(pnp_get_drvdata(pdev));
+}
+#endif
+
+static struct pnp_driver es18xx_pnp_driver = {
+	.name = "es18xx-pnpbios",
+	.id_table = snd_audiodrive_pnpbiosids,
+	.probe = snd_audiodrive_pnp_detect,
+	.remove = __devexit_p(snd_audiodrive_pnp_remove),
+#ifdef CONFIG_PM
+	.suspend = snd_audiodrive_pnp_suspend,
+	.resume = snd_audiodrive_pnp_resume,
+#endif
+};
+
+static int __devinit snd_audiodrive_pnpc_detect(struct pnp_card_link *pcard,
 					       const struct pnp_card_device_id *pid)
 {
 	static int dev;
@@ -2320,7 +2411,7 @@
 	if (! card)
 		return -ENOMEM;
 
-	if ((res = snd_audiodrive_pnp(dev, card->private_data, pcard, pid)) < 0) {
+	if ((res = snd_audiodrive_pnpc(dev, card->private_data, pcard, pid)) < 0) {
 		snd_card_free(card);
 		return res;
 	}
@@ -2336,19 +2427,19 @@
 	return 0;
 }
 
-static void __devexit snd_audiodrive_pnp_remove(struct pnp_card_link * pcard)
+static void __devexit snd_audiodrive_pnpc_remove(struct pnp_card_link * pcard)
 {
 	snd_card_free(pnp_get_card_drvdata(pcard));
 	pnp_set_card_drvdata(pcard, NULL);
 }
 
 #ifdef CONFIG_PM
-static int snd_audiodrive_pnp_suspend(struct pnp_card_link *pcard, pm_message_t state)
+static int snd_audiodrive_pnpc_suspend(struct pnp_card_link *pcard, pm_message_t state)
 {
 	return snd_es18xx_suspend(pnp_get_card_drvdata(pcard), state);
 }
 
-static int snd_audiodrive_pnp_resume(struct pnp_card_link *pcard)
+static int snd_audiodrive_pnpc_resume(struct pnp_card_link *pcard)
 {
 	return snd_es18xx_resume(pnp_get_card_drvdata(pcard));
 }
@@ -2359,11 +2450,11 @@
 	.flags = PNP_DRIVER_RES_DISABLE,
 	.name = "es18xx",
 	.id_table = snd_audiodrive_pnpids,
-	.probe = snd_audiodrive_pnp_detect,
-	.remove = __devexit_p(snd_audiodrive_pnp_remove),
+	.probe = snd_audiodrive_pnpc_detect,
+	.remove = __devexit_p(snd_audiodrive_pnpc_remove),
 #ifdef CONFIG_PM
-	.suspend	= snd_audiodrive_pnp_suspend,
-	.resume		= snd_audiodrive_pnp_resume,
+	.suspend	= snd_audiodrive_pnpc_suspend,
+	.resume		= snd_audiodrive_pnpc_resume,
 #endif
 };
 #endif /* CONFIG_PNP */
@@ -2373,8 +2464,10 @@
 	int i;
 
 #ifdef CONFIG_PNP
-	if (pnp_registered)
+	if (pnpc_registered)
 		pnp_unregister_card_driver(&es18xx_pnpc_driver);
+	if (pnp_registered)
+		pnp_unregister_driver(&es18xx_pnp_driver);
 #endif
 	for (i = 0; i < ARRAY_SIZE(platform_devices); ++i)
 		platform_device_unregister(platform_devices[i]);
@@ -2405,11 +2498,13 @@
 	}
 
 #ifdef CONFIG_PNP
-	err = pnp_register_card_driver(&es18xx_pnpc_driver);
-	if (!err) {
+	err = pnp_register_driver(&es18xx_pnp_driver);
+	if (!err)
 		pnp_registered = 1;
-		cards += es18xx_pnp_devices;
-	}
+	err = pnp_register_card_driver(&es18xx_pnpc_driver);
+	if (!err)
+		pnpc_registered = 1;
+	cards += es18xx_pnp_devices;
 #endif
 
 	if(!cards) {
diff --git a/sound/isa/gus/gus_mem_proc.c b/sound/isa/gus/gus_mem_proc.c
index 4080255..80f0a83 100644
--- a/sound/isa/gus/gus_mem_proc.c
+++ b/sound/isa/gus/gus_mem_proc.c
@@ -61,13 +61,13 @@
 	struct gus_proc_private *priv = entry->private_data;
 
 	switch (orig) {
-	case 0:	/* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1:	/* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2: /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = priv->size + offset;
 		break;
 	default:
diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c
index 4031b61..da92bf6 100644
--- a/sound/isa/opl3sa2.c
+++ b/sound/isa/opl3sa2.c
@@ -33,6 +33,7 @@
 #include <sound/mpu401.h>
 #include <sound/opl3.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 #include <asm/io.h>
 
@@ -337,6 +338,14 @@
   .info = snd_opl3sa2_info_single, \
   .get = snd_opl3sa2_get_single, .put = snd_opl3sa2_put_single, \
   .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24) }
+#define OPL3SA2_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_opl3sa2_info_single, \
+  .get = snd_opl3sa2_get_single, .put = snd_opl3sa2_put_single, \
+  .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_opl3sa2_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -395,6 +404,14 @@
   .info = snd_opl3sa2_info_double, \
   .get = snd_opl3sa2_get_double, .put = snd_opl3sa2_put_double, \
   .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22) }
+#define OPL3SA2_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_opl3sa2_info_double, \
+  .get = snd_opl3sa2_get_double, .put = snd_opl3sa2_put_double, \
+  .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_opl3sa2_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -469,11 +486,16 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_master, -3000, 200, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+
 static struct snd_kcontrol_new snd_opl3sa2_controls[] = {
 OPL3SA2_DOUBLE("Master Playback Switch", 0, 0x07, 0x08, 7, 7, 1, 1),
-OPL3SA2_DOUBLE("Master Playback Volume", 0, 0x07, 0x08, 0, 0, 15, 1),
+OPL3SA2_DOUBLE_TLV("Master Playback Volume", 0, 0x07, 0x08, 0, 0, 15, 1,
+		   db_scale_master),
 OPL3SA2_SINGLE("Mic Playback Switch", 0, 0x09, 7, 1, 1),
-OPL3SA2_SINGLE("Mic Playback Volume", 0, 0x09, 0, 31, 1)
+OPL3SA2_SINGLE_TLV("Mic Playback Volume", 0, 0x09, 0, 31, 1,
+		   db_scale_5bit_12db_max),
 };
 
 static struct snd_kcontrol_new snd_opl3sa2_tone_controls[] = {
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index 4359903..9ae659f 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -347,8 +347,8 @@
 setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int* gpio_pol)
 {
 	struct device_node *np;
-	u32* pp;
-	
+	const u32* pp;
+
 	np = find_devices("gpio");
 	if (!np)
 		return -ENODEV;
@@ -356,7 +356,8 @@
 	np = np->child;
 	while(np != 0) {
 		if (name) {
-			char *property = get_property(np,"audio-gpio",NULL);
+			const char *property =
+				get_property(np,"audio-gpio",NULL);
 			if (property != 0 && strcmp(property,name) == 0)
 				break;
 		} else if (compatible && device_is_compatible(np, compatible))
@@ -365,11 +366,11 @@
 	}
 	if (!np)
 		return -ENODEV;
-	pp = (u32 *)get_property(np, "AAPL,address", NULL);
+	pp = get_property(np, "AAPL,address", NULL);
 	if (!pp)
 		return -ENODEV;
 	*gpio_addr = (*pp) & 0x0000ffff;
-	pp = (u32 *)get_property(np, "audio-gpio-active-state", NULL);
+	pp = get_property(np, "audio-gpio-active-state", NULL);
 	if (pp)
 		*gpio_pol = *pp;
 	else
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index e49c0fe..8a6b180 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -475,6 +475,7 @@
 	depends on SND_FM801_TEA575X_BOOL
 	default SND_FM801
 	select VIDEO_V4L1
+	select VIDEO_DEV
 
 config SND_HDA_INTEL
 	tristate "Intel HD Audio"
@@ -743,4 +744,17 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-ymfpci.
 
+config SND_AC97_POWER_SAVE
+	bool "AC97 Power-Saving Mode"
+	depends on SND_AC97_CODEC && EXPERIMENTAL
+	default n
+	help
+	  Say Y here to enable the aggressive power-saving support of
+	  AC97 codecs.  In this mode, the power-mode is dynamically
+	  controlled at each open/close.
+
+	  The mode is activated by passing power_save=1 option to
+	  snd-ac97-codec driver.  You can toggle it dynamically over
+	  sysfs, too.
+
 endmenu
diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index 51e83d7..a79e918 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include <sound/asoundef.h>
 #include <sound/initval.h>
@@ -47,6 +48,11 @@
 module_param(enable_loopback, bool, 0444);
 MODULE_PARM_DESC(enable_loopback, "Enable AC97 ADC/DAC Loopback Control");
 
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+static int power_save;
+module_param(power_save, bool, 0644);
+MODULE_PARM_DESC(power_save, "Enable AC97 power-saving control");
+#endif
 /*
 
  */
@@ -151,7 +157,7 @@
 { 0x4e534300, 0xffffffff, "LM4540,43,45,46,48",	NULL,		NULL }, // only guess --jk
 { 0x4e534331, 0xffffffff, "LM4549",		NULL,		NULL },
 { 0x4e534350, 0xffffffff, "LM4550",		patch_lm4550,  	NULL }, // volume wrap fix 
-{ 0x50534304, 0xffffffff, "UCB1400",		NULL,		NULL },
+{ 0x50534304, 0xffffffff, "UCB1400",		patch_ucb1400,	NULL },
 { 0x53494c20, 0xffffffe0, "Si3036,8",		mpatch_si3036,	mpatch_si3036, AC97_MODEM_PATCH },
 { 0x54524102, 0xffffffff, "TR28022",		NULL,		NULL },
 { 0x54524106, 0xffffffff, "TR28026",		NULL,		NULL },
@@ -187,6 +193,8 @@
 };
 
 
+static void update_power_regs(struct snd_ac97 *ac97);
+
 /*
  *  I/O routines
  */
@@ -554,6 +562,18 @@
 	}
 	err = snd_ac97_update_bits(ac97, reg, val_mask, val);
 	snd_ac97_page_restore(ac97, page_save);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	/* check analog mixer power-down */
+	if ((val_mask & 0x8000) &&
+	    (kcontrol->private_value & (1<<30))) {
+		if (val & 0x8000)
+			ac97->power_up &= ~(1 << (reg>>1));
+		else
+			ac97->power_up |= 1 << (reg>>1);
+		if (power_save)
+			update_power_regs(ac97);
+	}
+#endif
 	return err;
 }
 
@@ -962,6 +982,10 @@
 static int snd_ac97_free(struct snd_ac97 *ac97)
 {
 	if (ac97) {
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+		if (ac97->power_workq)
+			destroy_workqueue(ac97->power_workq);
+#endif
 		snd_ac97_proc_done(ac97);
 		if (ac97->bus)
 			ac97->bus->codec[ac97->num] = NULL;
@@ -1117,7 +1141,9 @@
 /*
  * create mute switch(es) for normal stereo controls
  */
-static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg, int check_stereo, struct snd_ac97 *ac97)
+static int snd_ac97_cmute_new_stereo(struct snd_card *card, char *name, int reg,
+				     int check_stereo, int check_amix,
+				     struct snd_ac97 *ac97)
 {
 	struct snd_kcontrol *kctl;
 	int err;
@@ -1137,10 +1163,14 @@
 	}
 	if (mute_mask == 0x8080) {
 		struct snd_kcontrol_new tmp = AC97_DOUBLE(name, reg, 15, 7, 1, 1);
+		if (check_amix)
+			tmp.private_value |= (1 << 30);
 		tmp.index = ac97->num;
 		kctl = snd_ctl_new1(&tmp, ac97);
 	} else {
 		struct snd_kcontrol_new tmp = AC97_SINGLE(name, reg, 15, 1, 1);
+		if (check_amix)
+			tmp.private_value |= (1 << 30);
 		tmp.index = ac97->num;
 		kctl = snd_ctl_new1(&tmp, ac97);
 	}
@@ -1153,6 +1183,32 @@
 }
 
 /*
+ * set dB information
+ */
+static DECLARE_TLV_DB_SCALE(db_scale_4bit, -4500, 300, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit, -4650, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_6bit, -9450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_12db_max, -3450, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_rec_gain, 0, 150, 0);
+
+static unsigned int *find_db_scale(unsigned int maxval)
+{
+	switch (maxval) {
+	case 0x0f: return db_scale_4bit;
+	case 0x1f: return db_scale_5bit;
+	case 0x3f: return db_scale_6bit;
+	}
+	return NULL;
+}
+
+static void set_tlv_db_scale(struct snd_kcontrol *kctl, unsigned int *tlv)
+{	
+	kctl->tlv.p = tlv;
+	if (tlv)
+		kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+}
+
+/*
  * create a volume for normal stereo/mono controls
  */
 static int snd_ac97_cvol_new(struct snd_card *card, char *name, int reg, unsigned int lo_max,
@@ -1174,6 +1230,10 @@
 		tmp.index = ac97->num;
 		kctl = snd_ctl_new1(&tmp, ac97);
 	}
+	if (reg >= AC97_PHONE && reg <= AC97_PCM)
+		set_tlv_db_scale(kctl, db_scale_5bit_12db_max);
+	else
+		set_tlv_db_scale(kctl, find_db_scale(lo_max));
 	err = snd_ctl_add(card, kctl);
 	if (err < 0)
 		return err;
@@ -1186,7 +1246,9 @@
 /*
  * create a mute-switch and a volume for normal stereo/mono controls
  */
-static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx, int reg, int check_stereo, struct snd_ac97 *ac97)
+static int snd_ac97_cmix_new_stereo(struct snd_card *card, const char *pfx,
+				    int reg, int check_stereo, int check_amix,
+				    struct snd_ac97 *ac97)
 {
 	int err;
 	char name[44];
@@ -1197,7 +1259,9 @@
 
 	if (snd_ac97_try_bit(ac97, reg, 15)) {
 		sprintf(name, "%s Switch", pfx);
-		if ((err = snd_ac97_cmute_new_stereo(card, name, reg, check_stereo, ac97)) < 0)
+		if ((err = snd_ac97_cmute_new_stereo(card, name, reg,
+						     check_stereo, check_amix,
+						     ac97)) < 0)
 			return err;
 	}
 	check_volume_resolution(ac97, reg, &lo_max, &hi_max);
@@ -1209,8 +1273,10 @@
 	return 0;
 }
 
-#define snd_ac97_cmix_new(card, pfx, reg, ac97)	snd_ac97_cmix_new_stereo(card, pfx, reg, 0, ac97)
-#define snd_ac97_cmute_new(card, name, reg, ac97)	snd_ac97_cmute_new_stereo(card, name, reg, 0, ac97)
+#define snd_ac97_cmix_new(card, pfx, reg, acheck, ac97) \
+	snd_ac97_cmix_new_stereo(card, pfx, reg, 0, acheck, ac97)
+#define snd_ac97_cmute_new(card, name, reg, acheck, ac97) \
+	snd_ac97_cmute_new_stereo(card, name, reg, 0, acheck, ac97)
 
 static unsigned int snd_ac97_determine_spdif_rates(struct snd_ac97 *ac97);
 
@@ -1226,9 +1292,11 @@
 	/* AD claims to remove this control from AD1887, although spec v2.2 does not allow this */
 	if (snd_ac97_try_volume_mix(ac97, AC97_MASTER)) {
 		if (ac97->flags & AC97_HAS_NO_MASTER_VOL)
-			err = snd_ac97_cmute_new(card, "Master Playback Switch", AC97_MASTER, ac97);
+			err = snd_ac97_cmute_new(card, "Master Playback Switch",
+						 AC97_MASTER, 0, ac97);
 		else
-			err = snd_ac97_cmix_new(card, "Master Playback", AC97_MASTER, ac97);
+			err = snd_ac97_cmix_new(card, "Master Playback",
+						AC97_MASTER, 0, ac97);
 		if (err < 0)
 			return err;
 	}
@@ -1245,6 +1313,7 @@
 		snd_ac97_change_volume_params2(ac97, AC97_CENTER_LFE_MASTER, 0, &max);
 		kctl->private_value &= ~(0xff << 16);
 		kctl->private_value |= (int)max << 16;
+		set_tlv_db_scale(kctl, find_db_scale(max));
 		snd_ac97_write_cache(ac97, AC97_CENTER_LFE_MASTER, ac97->regs[AC97_CENTER_LFE_MASTER] | max);
 	}
 
@@ -1258,6 +1327,7 @@
 		snd_ac97_change_volume_params2(ac97, AC97_CENTER_LFE_MASTER, 8, &max);
 		kctl->private_value &= ~(0xff << 16);
 		kctl->private_value |= (int)max << 16;
+		set_tlv_db_scale(kctl, find_db_scale(max));
 		snd_ac97_write_cache(ac97, AC97_CENTER_LFE_MASTER, ac97->regs[AC97_CENTER_LFE_MASTER] | max << 8);
 	}
 
@@ -1265,19 +1335,23 @@
 	if ((snd_ac97_try_volume_mix(ac97, AC97_SURROUND_MASTER)) 
 		&& !(ac97->flags & AC97_AD_MULTI)) {
 		/* Surround Master (0x38) is with stereo mutes */
-		if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback", AC97_SURROUND_MASTER, 1, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new_stereo(card, "Surround Playback",
+						    AC97_SURROUND_MASTER, 1, 0,
+						    ac97)) < 0)
 			return err;
 	}
 
 	/* build headphone controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_HEADPHONE)) {
-		if ((err = snd_ac97_cmix_new(card, "Headphone Playback", AC97_HEADPHONE, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new(card, "Headphone Playback",
+					     AC97_HEADPHONE, 0, ac97)) < 0)
 			return err;
 	}
 	
 	/* build master mono controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_MASTER_MONO)) {
-		if ((err = snd_ac97_cmix_new(card, "Master Mono Playback", AC97_MASTER_MONO, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new(card, "Master Mono Playback",
+					     AC97_MASTER_MONO, 0, ac97)) < 0)
 			return err;
 	}
 	
@@ -1301,8 +1375,9 @@
 		((ac97->flags & AC97_HAS_PC_BEEP) ||
 	    snd_ac97_try_volume_mix(ac97, AC97_PC_BEEP))) {
 		for (idx = 0; idx < 2; idx++)
-			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_pc_beep[idx], ac97))) < 0)
+			if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_pc_beep[idx], ac97))) < 0)
 				return err;
+		set_tlv_db_scale(kctl, db_scale_4bit);
 		snd_ac97_write_cache(ac97, AC97_PC_BEEP,
 				     snd_ac97_read(ac97, AC97_PC_BEEP) | 0x801e);
 	}
@@ -1310,7 +1385,8 @@
 	/* build Phone controls */
 	if (!(ac97->flags & AC97_HAS_NO_PHONE)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_PHONE)) {
-			if ((err = snd_ac97_cmix_new(card, "Phone Playback", AC97_PHONE, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Phone Playback",
+						     AC97_PHONE, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1318,7 +1394,8 @@
 	/* build MIC controls */
 	if (!(ac97->flags & AC97_HAS_NO_MIC)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_MIC)) {
-			if ((err = snd_ac97_cmix_new(card, "Mic Playback", AC97_MIC, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Mic Playback",
+						     AC97_MIC, 1, ac97)) < 0)
 				return err;
 			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_boost, ac97))) < 0)
 				return err;
@@ -1327,14 +1404,16 @@
 
 	/* build Line controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_LINE)) {
-		if ((err = snd_ac97_cmix_new(card, "Line Playback", AC97_LINE, ac97)) < 0)
+		if ((err = snd_ac97_cmix_new(card, "Line Playback",
+					     AC97_LINE, 1, ac97)) < 0)
 			return err;
 	}
 	
 	/* build CD controls */
 	if (!(ac97->flags & AC97_HAS_NO_CD)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_CD)) {
-			if ((err = snd_ac97_cmix_new(card, "CD Playback", AC97_CD, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "CD Playback",
+						     AC97_CD, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1342,7 +1421,8 @@
 	/* build Video controls */
 	if (!(ac97->flags & AC97_HAS_NO_VIDEO)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_VIDEO)) {
-			if ((err = snd_ac97_cmix_new(card, "Video Playback", AC97_VIDEO, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Video Playback",
+						     AC97_VIDEO, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1350,7 +1430,8 @@
 	/* build Aux controls */
 	if (!(ac97->flags & AC97_HAS_NO_AUX)) {
 		if (snd_ac97_try_volume_mix(ac97, AC97_AUX)) {
-			if ((err = snd_ac97_cmix_new(card, "Aux Playback", AC97_AUX, ac97)) < 0)
+			if ((err = snd_ac97_cmix_new(card, "Aux Playback",
+						     AC97_AUX, 1, ac97)) < 0)
 				return err;
 		}
 	}
@@ -1363,31 +1444,38 @@
 		else
 			init_val = 0x9f1f;
 		for (idx = 0; idx < 2; idx++)
-			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_pcm[idx], ac97))) < 0)
+			if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_pcm[idx], ac97))) < 0)
 				return err;
+		set_tlv_db_scale(kctl, db_scale_5bit);
 		ac97->spec.ad18xx.pcmreg[0] = init_val;
 		if (ac97->scaps & AC97_SCAP_SURROUND_DAC) {
 			for (idx = 0; idx < 2; idx++)
-				if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_surround[idx], ac97))) < 0)
+				if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_surround[idx], ac97))) < 0)
 					return err;
+			set_tlv_db_scale(kctl, db_scale_5bit);
 			ac97->spec.ad18xx.pcmreg[1] = init_val;
 		}
 		if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) {
 			for (idx = 0; idx < 2; idx++)
-				if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_center[idx], ac97))) < 0)
+				if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_center[idx], ac97))) < 0)
 					return err;
+			set_tlv_db_scale(kctl, db_scale_5bit);
 			for (idx = 0; idx < 2; idx++)
-				if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_ad18xx_lfe[idx], ac97))) < 0)
+				if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_ad18xx_lfe[idx], ac97))) < 0)
 					return err;
+			set_tlv_db_scale(kctl, db_scale_5bit);
 			ac97->spec.ad18xx.pcmreg[2] = init_val;
 		}
 		snd_ac97_write_cache(ac97, AC97_PCM, init_val);
 	} else {
 		if (!(ac97->flags & AC97_HAS_NO_STD_PCM)) {
 			if (ac97->flags & AC97_HAS_NO_PCM_VOL)
-				err = snd_ac97_cmute_new(card, "PCM Playback Switch", AC97_PCM, ac97);
+				err = snd_ac97_cmute_new(card,
+							 "PCM Playback Switch",
+							 AC97_PCM, 0, ac97);
 			else
-				err = snd_ac97_cmix_new(card, "PCM Playback", AC97_PCM, ac97);
+				err = snd_ac97_cmix_new(card, "PCM Playback",
+							AC97_PCM, 0, ac97);
 			if (err < 0)
 				return err;
 		}
@@ -1398,19 +1486,23 @@
 		if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_src, ac97))) < 0)
 			return err;
 		if (snd_ac97_try_bit(ac97, AC97_REC_GAIN, 15)) {
-			if ((err = snd_ac97_cmute_new(card, "Capture Switch", AC97_REC_GAIN, ac97)) < 0)
+			err = snd_ac97_cmute_new(card, "Capture Switch",
+						 AC97_REC_GAIN, 0, ac97);
+			if (err < 0)
 				return err;
 		}
-		if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0)
+		if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_control_capture_vol, ac97))) < 0)
 			return err;
+		set_tlv_db_scale(kctl, db_scale_rec_gain);
 		snd_ac97_write_cache(ac97, AC97_REC_SEL, 0x0000);
 		snd_ac97_write_cache(ac97, AC97_REC_GAIN, 0x0000);
 	}
 	/* build MIC Capture controls */
 	if (snd_ac97_try_volume_mix(ac97, AC97_REC_GAIN_MIC)) {
 		for (idx = 0; idx < 2; idx++)
-			if ((err = snd_ctl_add(card, snd_ac97_cnew(&snd_ac97_controls_mic_capture[idx], ac97))) < 0)
+			if ((err = snd_ctl_add(card, kctl = snd_ac97_cnew(&snd_ac97_controls_mic_capture[idx], ac97))) < 0)
 				return err;
+		set_tlv_db_scale(kctl, db_scale_rec_gain);
 		snd_ac97_write_cache(ac97, AC97_REC_GAIN_MIC, 0x0000);
 	}
 
@@ -1481,6 +1573,12 @@
 	}
 
 	/* build S/PDIF controls */
+
+	/* Hack for ASUS P5P800-VM, which does not indicate S/PDIF capability */
+	if (ac97->subsystem_vendor == 0x1043 &&
+	    ac97->subsystem_device == 0x810f)
+		ac97->ext_id |= AC97_EI_SPDIF;
+
 	if ((ac97->ext_id & AC97_EI_SPDIF) && !(ac97->scaps & AC97_SCAP_NO_SPDIF)) {
 		if (ac97->build_ops->build_spdif) {
 			if ((err = ac97->build_ops->build_spdif(ac97)) < 0)
@@ -1817,18 +1915,25 @@
 	return 0;
 }
 
-/* unregister ac97 codec */
-static int snd_ac97_dev_unregister(struct snd_device *device)
+/* disconnect ac97 codec */
+static int snd_ac97_dev_disconnect(struct snd_device *device)
 {
 	struct snd_ac97 *ac97 = device->device_data;
 	if (ac97->dev.bus)
 		device_unregister(&ac97->dev);
-	return snd_ac97_free(ac97);
+	return 0;
 }
 
 /* build_ops to do nothing */
 static struct snd_ac97_build_ops null_build_ops;
 
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+static void do_update_power(void *data)
+{
+	update_power_regs(data);
+}
+#endif
+
 /**
  * snd_ac97_mixer - create an Codec97 component
  * @bus: the AC97 bus which codec is attached to
@@ -1860,7 +1965,7 @@
 	static struct snd_device_ops ops = {
 		.dev_free =	snd_ac97_dev_free,
 		.dev_register =	snd_ac97_dev_register,
-		.dev_unregister =	snd_ac97_dev_unregister,
+		.dev_disconnect =	snd_ac97_dev_disconnect,
 	};
 
 	snd_assert(rac97 != NULL, return -EINVAL);
@@ -1883,6 +1988,10 @@
 	bus->codec[ac97->num] = ac97;
 	mutex_init(&ac97->reg_mutex);
 	mutex_init(&ac97->page_mutex);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	ac97->power_workq = create_workqueue("ac97");
+	INIT_WORK(&ac97->power_work, do_update_power, ac97);
+#endif
 
 #ifdef CONFIG_PCI
 	if (ac97->pci) {
@@ -2117,15 +2226,8 @@
 			return -ENOMEM;
 		}
 	}
-	/* make sure the proper powerdown bits are cleared */
-	if (ac97->scaps && ac97_is_audio(ac97)) {
-		reg = snd_ac97_read(ac97, AC97_EXTENDED_STATUS);
-		if (ac97->scaps & AC97_SCAP_SURROUND_DAC) 
-			reg &= ~AC97_EA_PRJ;
-		if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC) 
-			reg &= ~(AC97_EA_PRI | AC97_EA_PRK);
-		snd_ac97_write_cache(ac97, AC97_EXTENDED_STATUS, reg);
-	}
+	if (ac97_is_audio(ac97))
+		update_power_regs(ac97);
 	snd_ac97_proc_init(ac97);
 	if ((err = snd_device_new(card, SNDRV_DEV_CODEC, ac97, &ops)) < 0) {
 		snd_ac97_free(ac97);
@@ -2153,22 +2255,155 @@
 		snd_ac97_write(ac97, AC97_HEADPHONE, 0x9f9f);
 	}
 
-	power = ac97->regs[AC97_POWERDOWN] | 0x8000;	/* EAPD */
-	power |= 0x4000;	/* Headphone amplifier powerdown */
-	power |= 0x0300;	/* ADC & DAC powerdown */
+	/* surround, CLFE, mic powerdown */
+	power = ac97->regs[AC97_EXTENDED_STATUS];
+	if (ac97->scaps & AC97_SCAP_SURROUND_DAC)
+		power |= AC97_EA_PRJ;
+	if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC)
+		power |= AC97_EA_PRI | AC97_EA_PRK;
+	power |= AC97_EA_PRL;
+	snd_ac97_write(ac97, AC97_EXTENDED_STATUS, power);
+
+	/* powerdown external amplifier */
+	if (ac97->scaps & AC97_SCAP_INV_EAPD)
+		power = ac97->regs[AC97_POWERDOWN] & ~AC97_PD_EAPD;
+	else if (! (ac97->scaps & AC97_SCAP_EAPD_LED))
+		power = ac97->regs[AC97_POWERDOWN] | AC97_PD_EAPD;
+	power |= AC97_PD_PR6;	/* Headphone amplifier powerdown */
+	power |= AC97_PD_PR0 | AC97_PD_PR1;	/* ADC & DAC powerdown */
 	snd_ac97_write(ac97, AC97_POWERDOWN, power);
 	udelay(100);
-	power |= 0x0400;	/* Analog Mixer powerdown (Vref on) */
+	power |= AC97_PD_PR2 | AC97_PD_PR3;	/* Analog Mixer powerdown */
 	snd_ac97_write(ac97, AC97_POWERDOWN, power);
-	udelay(100);
-#if 0
-	/* FIXME: this causes click noises on some boards at resume */
-	power |= 0x3800;	/* AC-link powerdown, internal Clk disable */
-	snd_ac97_write(ac97, AC97_POWERDOWN, power);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	if (power_save) {
+		udelay(100);
+		/* AC-link powerdown, internal Clk disable */
+		/* FIXME: this may cause click noises on some boards */
+		power |= AC97_PD_PR4 | AC97_PD_PR5;
+		snd_ac97_write(ac97, AC97_POWERDOWN, power);
+	}
 #endif
 }
 
 
+struct ac97_power_reg {
+	unsigned short reg;
+	unsigned short power_reg;
+	unsigned short mask;
+};
+
+enum { PWIDX_ADC, PWIDX_FRONT, PWIDX_CLFE, PWIDX_SURR, PWIDX_MIC, PWIDX_SIZE };
+
+static struct ac97_power_reg power_regs[PWIDX_SIZE] = {
+	[PWIDX_ADC] = { AC97_PCM_LR_ADC_RATE, AC97_POWERDOWN, AC97_PD_PR0},
+	[PWIDX_FRONT] = { AC97_PCM_FRONT_DAC_RATE, AC97_POWERDOWN, AC97_PD_PR1},
+	[PWIDX_CLFE] = { AC97_PCM_LFE_DAC_RATE, AC97_EXTENDED_STATUS,
+			 AC97_EA_PRI | AC97_EA_PRK},
+	[PWIDX_SURR] = { AC97_PCM_SURR_DAC_RATE, AC97_EXTENDED_STATUS,
+			 AC97_EA_PRJ},
+	[PWIDX_MIC] = { AC97_PCM_MIC_ADC_RATE, AC97_EXTENDED_STATUS,
+			AC97_EA_PRL},
+};
+
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+/**
+ * snd_ac97_update_power - update the powerdown register
+ * @ac97: the codec instance
+ * @reg: the rate register, e.g. AC97_PCM_FRONT_DAC_RATE
+ * @powerup: non-zero when power up the part
+ *
+ * Update the AC97 powerdown register bits of the given part.
+ */
+int snd_ac97_update_power(struct snd_ac97 *ac97, int reg, int powerup)
+{
+	int i;
+
+	if (! ac97)
+		return 0;
+
+	if (reg) {
+		/* SPDIF requires DAC power, too */
+		if (reg == AC97_SPDIF)
+			reg = AC97_PCM_FRONT_DAC_RATE;
+		for (i = 0; i < PWIDX_SIZE; i++) {
+			if (power_regs[i].reg == reg) {
+				if (powerup)
+					ac97->power_up |= (1 << i);
+				else
+					ac97->power_up &= ~(1 << i);
+				break;
+			}
+		}
+	}
+
+	if (! power_save)
+		return 0;
+
+	if (! powerup && ac97->power_workq)
+		/* adjust power-down bits after two seconds delay
+		 * (for avoiding loud click noises for many (OSS) apps
+		 *  that open/close frequently)
+		 */
+		queue_delayed_work(ac97->power_workq, &ac97->power_work, HZ*2);
+	else
+		update_power_regs(ac97);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(snd_ac97_update_power);
+#endif /* CONFIG_SND_AC97_POWER_SAVE */
+
+static void update_power_regs(struct snd_ac97 *ac97)
+{
+	unsigned int power_up, bits;
+	int i;
+
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	if (power_save)
+		power_up = ac97->power_up;
+	else {
+#endif
+		power_up = (1 << PWIDX_FRONT) | (1 << PWIDX_ADC);
+		power_up |= (1 << PWIDX_MIC);
+		if (ac97->scaps & AC97_SCAP_SURROUND_DAC)
+			power_up |= (1 << PWIDX_SURR);
+		if (ac97->scaps & AC97_SCAP_CENTER_LFE_DAC)
+			power_up |= (1 << PWIDX_CLFE);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	}
+#endif
+	if (power_up) {
+		if (ac97->regs[AC97_POWERDOWN] & AC97_PD_PR2) {
+			/* needs power-up analog mix and vref */
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR3, 0);
+			msleep(1);
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR2, 0);
+		}
+	}
+	for (i = 0; i < PWIDX_SIZE; i++) {
+		if (power_up & (1 << i))
+			bits = 0;
+		else
+			bits = power_regs[i].mask;
+		snd_ac97_update_bits(ac97, power_regs[i].power_reg,
+				     power_regs[i].mask, bits);
+	}
+	if (! power_up) {
+		if (! (ac97->regs[AC97_POWERDOWN] & AC97_PD_PR2)) {
+			/* power down analog mix and vref */
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR2, AC97_PD_PR2);
+			snd_ac97_update_bits(ac97, AC97_POWERDOWN,
+					     AC97_PD_PR3, AC97_PD_PR3);
+		}
+	}
+}
+
+
 #ifdef CONFIG_PM
 /**
  * snd_ac97_suspend - General suspend function for AC97 codec
@@ -2484,6 +2719,7 @@
 	msw->put = master_mute_sw_put;
 	snd_ac97_remove_ctl(ac97, "External Amplifier", NULL);
 	snd_ac97_update_bits(ac97, AC97_POWERDOWN, 0x8000, 0x8000); /* mute LED on */
+	ac97->scaps |= AC97_SCAP_EAPD_LED;
 	return 0;
 }
 
diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 094cfc1..dc28b11 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -32,6 +32,7 @@
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include "ac97_patch.h"
 #include "ac97_id.h"
@@ -51,6 +52,20 @@
 	return 0;
 }
 
+/* replace with a new TLV */
+static void reset_tlv(struct snd_ac97 *ac97, const char *name,
+		      unsigned int *tlv)
+{
+	struct snd_ctl_elem_id sid;
+	struct snd_kcontrol *kctl;
+	memset(&sid, 0, sizeof(sid));
+	strcpy(sid.name, name);
+	sid.iface = SNDRV_CTL_ELEM_IFACE_MIXER;
+	kctl = snd_ctl_find_id(ac97->bus->card, &sid);
+	if (kctl && kctl->tlv.p)
+		kctl->tlv.p = tlv;
+}
+
 /* set to the page, update bits and restore the page */
 static int ac97_update_bits_page(struct snd_ac97 *ac97, unsigned short reg, unsigned short mask, unsigned short value, unsigned short page)
 {
@@ -466,7 +481,7 @@
 	ac97->build_ops = &patch_wolfson_wm9705_ops;
 #ifdef CONFIG_TOUCHSCREEN_WM9705
 	/* WM9705 touchscreen uses AUX and VIDEO for touch */
-	ac97->flags |=3D AC97_HAS_NO_VIDEO | AC97_HAS_NO_AUX;
+	ac97->flags |= AC97_HAS_NO_VIDEO | AC97_HAS_NO_AUX;
 #endif
 	return 0;
 }
@@ -1380,6 +1395,17 @@
 
 #endif
 
+static const struct snd_ac97_res_table ad1819_restbl[] = {
+	{ AC97_PHONE, 0x9f1f },
+	{ AC97_MIC, 0x9f1f },
+	{ AC97_LINE, 0x9f1f },
+	{ AC97_CD, 0x9f1f },
+	{ AC97_VIDEO, 0x9f1f },
+	{ AC97_AUX, 0x9f1f },
+	{ AC97_PCM, 0x9f1f },
+	{ } /* terminator */
+};
+
 int patch_ad1819(struct snd_ac97 * ac97)
 {
 	unsigned short scfg;
@@ -1387,6 +1413,7 @@
 	// patch for Analog Devices
 	scfg = snd_ac97_read(ac97, AC97_AD_SERIAL_CFG);
 	snd_ac97_write_cache(ac97, AC97_AD_SERIAL_CFG, scfg | 0x7000); /* select all codecs */
+	ac97->res_table = ad1819_restbl;
 	return 0;
 }
 
@@ -1522,12 +1549,16 @@
 	AC97_SINGLE("Line Jack Sense", AC97_AD_JACK_SPDIF, 8, 1, 1), /* inverted */
 };
 
+static DECLARE_TLV_DB_SCALE(db_scale_6bit_6db_max, -8850, 150, 0);
+
 static int patch_ad1885_specific(struct snd_ac97 * ac97)
 {
 	int err;
 
 	if ((err = patch_build_controls(ac97, snd_ac97_controls_ad1885, ARRAY_SIZE(snd_ac97_controls_ad1885))) < 0)
 		return err;
+	reset_tlv(ac97, "Headphone Playback Volume",
+		  db_scale_6bit_6db_max);
 	return 0;
 }
 
@@ -1551,12 +1582,27 @@
 	return 0;
 }
 
+static int patch_ad1886_specific(struct snd_ac97 * ac97)
+{
+	reset_tlv(ac97, "Headphone Playback Volume",
+		  db_scale_6bit_6db_max);
+	return 0;
+}
+
+static struct snd_ac97_build_ops patch_ad1886_build_ops = {
+	.build_specific = &patch_ad1886_specific,
+#ifdef CONFIG_PM
+	.resume = ad18xx_resume
+#endif
+};
+
 int patch_ad1886(struct snd_ac97 * ac97)
 {
 	patch_ad1881(ac97);
 	/* Presario700 workaround */
 	/* for Jack Sense/SPDIF Register misetting causing */
 	snd_ac97_write_cache(ac97, AC97_AD_JACK_SPDIF, 0x0010);
+	ac97->build_ops = &patch_ad1886_build_ops;
 	return 0;
 }
 
@@ -2015,6 +2061,8 @@
 	/* AC97_SINGLE("IEC958 Input Monitor", AC97_ALC650_MULTICH, 13, 1, 0), */
 };
 
+static DECLARE_TLV_DB_SCALE(db_scale_5bit_3db_max, -4350, 150, 0);
+
 static int patch_alc650_specific(struct snd_ac97 * ac97)
 {
 	int err;
@@ -2025,6 +2073,9 @@
 		if ((err = patch_build_controls(ac97, snd_ac97_spdif_controls_alc650, ARRAY_SIZE(snd_ac97_spdif_controls_alc650))) < 0)
 			return err;
 	}
+	if (ac97->id != AC97_ID_ALC650F)
+		reset_tlv(ac97, "Master Playback Volume",
+			  db_scale_5bit_3db_max);
 	return 0;
 }
 
@@ -2208,7 +2259,8 @@
 		val &= ~(1 << 1); /* Pin 47 is spdif input pin */
 	else { /* ALC655 */
 		if (ac97->subsystem_vendor == 0x1462 &&
-		    ac97->subsystem_device == 0x0131) /* MSI S270 laptop */
+		    (ac97->subsystem_device == 0x0131 || /* MSI S270 laptop */
+		     ac97->subsystem_device == 0x0161)) /* LG K1 Express */
 			val &= ~(1 << 1); /* Pin 47 is EAPD (for internal speaker) */
 		else
 			val |= (1 << 1); /* Pin 47 is spdif input pin */
@@ -2759,6 +2811,10 @@
  */
 int patch_vt1617a(struct snd_ac97 * ac97)
 {
+	/* bring analog power consumption to normal, like WinXP driver
+	 * for EPIA SP
+	 */
+	snd_ac97_write_cache(ac97, 0x5c, 0x20);
 	ac97->ext_id |= AC97_EI_SPDIF;	/* force the detection of spdif */
 	ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000;
 	return 0;
@@ -2872,3 +2928,41 @@
 	ac97->res_table = lm4550_restbl;
 	return 0;
 }
+
+/* 
+ *  UCB1400 codec (http://www.semiconductors.philips.com/acrobat_download/datasheets/UCB1400-02.pdf)
+ */
+static const struct snd_kcontrol_new snd_ac97_controls_ucb1400[] = {
+/* enable/disable headphone driver which allows direct connection to
+   stereo headphone without the use of external DC blocking
+   capacitors */
+AC97_SINGLE("Headphone Driver", 0x6a, 6, 1, 0),
+/* Filter used to compensate the DC offset is added in the ADC to remove idle
+   tones from the audio band. */
+AC97_SINGLE("DC Filter", 0x6a, 4, 1, 0),
+/* Control smart-low-power mode feature. Allows automatic power down
+   of unused blocks in the ADC analog front end and the PLL. */
+AC97_SINGLE("Smart Low Power Mode", 0x6c, 4, 3, 0),
+};
+
+static int patch_ucb1400_specific(struct snd_ac97 * ac97)
+{
+	int idx, err;
+	for (idx = 0; idx < ARRAY_SIZE(snd_ac97_controls_ucb1400); idx++)
+		if ((err = snd_ctl_add(ac97->bus->card, snd_ctl_new1(&snd_ac97_controls_ucb1400[idx], ac97))) < 0)
+			return err;
+	return 0;
+}
+
+static struct snd_ac97_build_ops patch_ucb1400_ops = {
+	.build_specific	= patch_ucb1400_specific,
+};
+
+int patch_ucb1400(struct snd_ac97 * ac97)
+{
+	ac97->build_ops = &patch_ucb1400_ops;
+	/* enable headphone driver and smart low power mode by default */
+	snd_ac97_write(ac97, 0x6a, 0x0050);
+	snd_ac97_write(ac97, 0x6c, 0x0030);
+	return 0;
+}
diff --git a/sound/pci/ac97/ac97_patch.h b/sound/pci/ac97/ac97_patch.h
index adcaa04..7419792 100644
--- a/sound/pci/ac97/ac97_patch.h
+++ b/sound/pci/ac97/ac97_patch.h
@@ -58,5 +58,6 @@
 int patch_vt1616(struct snd_ac97 * ac97);
 int patch_vt1617a(struct snd_ac97 * ac97);
 int patch_it2646(struct snd_ac97 * ac97);
+int patch_ucb1400(struct snd_ac97 * ac97);
 int mpatch_si3036(struct snd_ac97 * ac97);
 int patch_lm4550(struct snd_ac97 * ac97);
diff --git a/sound/pci/ac97/ac97_pcm.c b/sound/pci/ac97/ac97_pcm.c
index f684aa2..3758d07 100644
--- a/sound/pci/ac97/ac97_pcm.c
+++ b/sound/pci/ac97/ac97_pcm.c
@@ -269,6 +269,7 @@
 			return -EINVAL;
 	}
 
+	snd_ac97_update_power(ac97, reg, 1);
 	switch (reg) {
 	case AC97_PCM_MIC_ADC_RATE:
 		if ((ac97->regs[AC97_EXTENDED_STATUS] & AC97_EA_VRM) == 0)	/* MIC VRA */
@@ -606,6 +607,7 @@
 			goto error;
 		}
 	}
+	pcm->cur_dbl = r;
 	spin_unlock_irq(&pcm->bus->bus_lock);
 	for (i = 3; i < 12; i++) {
 		if (!(slots & (1 << i)))
@@ -651,6 +653,21 @@
 	unsigned short slots = pcm->aslots;
 	int i, cidx;
 
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	int r = pcm->cur_dbl;
+	for (i = 3; i < 12; i++) {
+		if (!(slots & (1 << i)))
+			continue;
+		for (cidx = 0; cidx < 4; cidx++) {
+			if (pcm->r[r].rslots[cidx] & (1 << i)) {
+				int reg = get_slot_reg(pcm, cidx, i, r);
+				snd_ac97_update_power(pcm->r[r].codec[cidx],
+						      reg, 0);
+			}
+		}
+	}
+#endif
+
 	bus = pcm->bus;
 	spin_lock_irq(&pcm->bus->bus_lock);
 	for (i = 3; i < 12; i++) {
@@ -660,6 +677,7 @@
 			bus->used_slots[pcm->stream][cidx] &= ~(1 << i);
 	}
 	pcm->aslots = 0;
+	pcm->cur_dbl = 0;
 	spin_unlock_irq(&pcm->bus->bus_lock);
 	return 0;
 }
diff --git a/sound/pci/ac97/ac97_proc.c b/sound/pci/ac97/ac97_proc.c
index 2118df5..a3fdd7d 100644
--- a/sound/pci/ac97/ac97_proc.c
+++ b/sound/pci/ac97/ac97_proc.c
@@ -457,14 +457,10 @@
 
 void snd_ac97_proc_done(struct snd_ac97 * ac97)
 {
-	if (ac97->proc_regs) {
-		snd_info_unregister(ac97->proc_regs);
-		ac97->proc_regs = NULL;
-	}
-	if (ac97->proc) {
-		snd_info_unregister(ac97->proc);
-		ac97->proc = NULL;
-	}
+	snd_info_free_entry(ac97->proc_regs);
+	ac97->proc_regs = NULL;
+	snd_info_free_entry(ac97->proc);
+	ac97->proc = NULL;
 }
 
 void snd_ac97_bus_proc_init(struct snd_ac97_bus * bus)
@@ -485,8 +481,6 @@
 
 void snd_ac97_bus_proc_done(struct snd_ac97_bus * bus)
 {
-	if (bus->proc) {
-		snd_info_unregister(bus->proc);
-		bus->proc = NULL;
-	}
+	snd_info_free_entry(bus->proc);
+	bus->proc = NULL;
 }
diff --git a/sound/pci/ac97/ak4531_codec.c b/sound/pci/ac97/ak4531_codec.c
index 94c26ec..c153cb7 100644
--- a/sound/pci/ac97/ak4531_codec.c
+++ b/sound/pci/ac97/ak4531_codec.c
@@ -27,6 +27,7 @@
 
 #include <sound/core.h>
 #include <sound/ak4531_codec.h>
+#include <sound/tlv.h>
 
 MODULE_AUTHOR("Jaroslav Kysela <perex@suse.cz>");
 MODULE_DESCRIPTION("Universal routines for AK4531 codec");
@@ -63,6 +64,14 @@
   .info = snd_ak4531_info_single, \
   .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \
   .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22) }
+#define AK4531_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv)    \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_ak4531_info_single, \
+  .get = snd_ak4531_get_single, .put = snd_ak4531_put_single, \
+  .private_value = reg | (shift << 16) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_ak4531_info_single(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -122,6 +131,14 @@
   .info = snd_ak4531_info_double, \
   .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \
   .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22) }
+#define AK4531_DOUBLE_TLV(xname, xindex, left_reg, right_reg, left_shift, right_shift, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .index = xindex, \
+  .info = snd_ak4531_info_double, \
+  .get = snd_ak4531_get_double, .put = snd_ak4531_put_double, \
+  .private_value = left_reg | (right_reg << 8) | (left_shift << 16) | (right_shift << 19) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_ak4531_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -250,50 +267,62 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_master, -6200, 200, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_mono, -2800, 400, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_input, -5000, 200, 0);
+
 static struct snd_kcontrol_new snd_ak4531_controls[] = {
 
-AK4531_DOUBLE("Master Playback Switch", 0, AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1),
+AK4531_DOUBLE_TLV("Master Playback Switch", 0,
+		  AK4531_LMASTER, AK4531_RMASTER, 7, 7, 1, 1,
+		  db_scale_master),
 AK4531_DOUBLE("Master Playback Volume", 0, AK4531_LMASTER, AK4531_RMASTER, 0, 0, 0x1f, 1),
 
-AK4531_SINGLE("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1),
+AK4531_SINGLE_TLV("Master Mono Playback Switch", 0, AK4531_MONO_OUT, 7, 1, 1,
+		  db_scale_mono),
 AK4531_SINGLE("Master Mono Playback Volume", 0, AK4531_MONO_OUT, 0, 0x07, 1),
 
 AK4531_DOUBLE("PCM Switch", 0, AK4531_LVOICE, AK4531_RVOICE, 7, 7, 1, 1),
-AK4531_DOUBLE("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("PCM Volume", 0, AK4531_LVOICE, AK4531_RVOICE, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("PCM Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 3, 2, 1, 0),
 AK4531_DOUBLE("PCM Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 2, 2, 1, 0),
 
 AK4531_DOUBLE("PCM Switch", 1, AK4531_LFM, AK4531_RFM, 7, 7, 1, 1),
-AK4531_DOUBLE("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("PCM Volume", 1, AK4531_LFM, AK4531_RFM, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("PCM Playback Switch", 1, AK4531_OUT_SW1, AK4531_OUT_SW1, 6, 5, 1, 0),
 AK4531_INPUT_SW("PCM Capture Route", 1, AK4531_LIN_SW1, AK4531_RIN_SW1, 6, 5),
 
 AK4531_DOUBLE("CD Switch", 0, AK4531_LCD, AK4531_RCD, 7, 7, 1, 1),
-AK4531_DOUBLE("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("CD Volume", 0, AK4531_LCD, AK4531_RCD, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("CD Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 2, 1, 1, 0),
 AK4531_INPUT_SW("CD Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 2, 1),
 
 AK4531_DOUBLE("Line Switch", 0, AK4531_LLINE, AK4531_RLINE, 7, 7, 1, 1),
-AK4531_DOUBLE("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("Line Volume", 0, AK4531_LLINE, AK4531_RLINE, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("Line Playback Switch", 0, AK4531_OUT_SW1, AK4531_OUT_SW1, 4, 3, 1, 0),
 AK4531_INPUT_SW("Line Capture Route", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 4, 3),
 
 AK4531_DOUBLE("Aux Switch", 0, AK4531_LAUXA, AK4531_RAUXA, 7, 7, 1, 1),
-AK4531_DOUBLE("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1),
+AK4531_DOUBLE_TLV("Aux Volume", 0, AK4531_LAUXA, AK4531_RAUXA, 0, 0, 0x1f, 1,
+		  db_scale_input),
 AK4531_DOUBLE("Aux Playback Switch", 0, AK4531_OUT_SW2, AK4531_OUT_SW2, 5, 4, 1, 0),
 AK4531_INPUT_SW("Aux Capture Route", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 4, 3),
 
 AK4531_SINGLE("Mono Switch", 0, AK4531_MONO1, 7, 1, 1),
-AK4531_SINGLE("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1),
+AK4531_SINGLE_TLV("Mono Volume", 0, AK4531_MONO1, 0, 0x1f, 1, db_scale_input),
 AK4531_SINGLE("Mono Playback Switch", 0, AK4531_OUT_SW2, 0, 1, 0),
 AK4531_DOUBLE("Mono Capture Switch", 0, AK4531_LIN_SW2, AK4531_RIN_SW2, 0, 0, 1, 0),
 
 AK4531_SINGLE("Mono Switch", 1, AK4531_MONO2, 7, 1, 1),
-AK4531_SINGLE("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1),
+AK4531_SINGLE_TLV("Mono Volume", 1, AK4531_MONO2, 0, 0x1f, 1, db_scale_input),
 AK4531_SINGLE("Mono Playback Switch", 1, AK4531_OUT_SW2, 1, 1, 0),
 AK4531_DOUBLE("Mono Capture Switch", 1, AK4531_LIN_SW2, AK4531_RIN_SW2, 1, 1, 1, 0),
 
-AK4531_SINGLE("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1),
+AK4531_SINGLE_TLV("Mic Volume", 0, AK4531_MIC, 0, 0x1f, 1, db_scale_input),
 AK4531_SINGLE("Mic Switch", 0, AK4531_MIC, 7, 1, 1),
 AK4531_SINGLE("Mic Playback Switch", 0, AK4531_OUT_SW1, 0, 1, 0),
 AK4531_DOUBLE("Mic Capture Switch", 0, AK4531_LIN_SW1, AK4531_RIN_SW1, 0, 0, 1, 0),
diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c
index 146eed7..9855f52 100644
--- a/sound/pci/ca0106/ca0106_mixer.c
+++ b/sound/pci/ca0106/ca0106_mixer.c
@@ -70,9 +70,13 @@
 #include <sound/pcm.h>
 #include <sound/ac97_codec.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 
 #include "ca0106.h"
 
+static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale1, -5175, 25, 1);
+static DECLARE_TLV_DB_SCALE(snd_ca0106_db_scale2, -10350, 50, 1);
+
 static int snd_ca0106_shared_spdif_info(struct snd_kcontrol *kcontrol,
 					struct snd_ctl_elem_info *uinfo)
 {
@@ -469,18 +473,24 @@
 #define CA_VOLUME(xname,chid,reg) \
 {								\
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname,	\
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |		\
+	          SNDRV_CTL_ELEM_ACCESS_TLV_READ,		\
 	.info =	 snd_ca0106_volume_info,			\
 	.get =   snd_ca0106_volume_get,				\
 	.put =   snd_ca0106_volume_put,				\
+	.tlv = { .p = snd_ca0106_db_scale1 },			\
 	.private_value = ((chid) << 8) | (reg)			\
 }
 
 #define I2C_VOLUME(xname,chid) \
 {								\
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname,	\
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |		\
+	          SNDRV_CTL_ELEM_ACCESS_TLV_READ,		\
 	.info =  snd_ca0106_i2c_volume_info,			\
 	.get =   snd_ca0106_i2c_volume_get,			\
 	.put =   snd_ca0106_i2c_volume_put,			\
+	.tlv = { .p = snd_ca0106_db_scale2 },			\
 	.private_value = chid					\
 }
 
diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c
index 9631456..1990430 100644
--- a/sound/pci/cs4281.c
+++ b/sound/pci/cs4281.c
@@ -33,6 +33,7 @@
 #include <sound/pcm.h>
 #include <sound/rawmidi.h>
 #include <sound/ac97_codec.h>
+#include <sound/tlv.h>
 #include <sound/opl3.h>
 #include <sound/initval.h>
 
@@ -1054,6 +1055,8 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dsp, -4650, 150, 0);
+
 static struct snd_kcontrol_new snd_cs4281_fm_vol = 
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1062,6 +1065,7 @@
 	.get = snd_cs4281_get_volume,
 	.put = snd_cs4281_put_volume, 
 	.private_value = ((BA0_FMLVC << 16) | BA0_FMRVC),
+	.tlv = { .p = db_scale_dsp },
 };
 
 static struct snd_kcontrol_new snd_cs4281_pcm_vol = 
@@ -1072,6 +1076,7 @@
 	.get = snd_cs4281_get_volume,
 	.put = snd_cs4281_put_volume, 
 	.private_value = ((BA0_PPLVC << 16) | BA0_PPRVC),
+	.tlv = { .p = db_scale_dsp },
 };
 
 static void snd_cs4281_mixer_free_ac97_bus(struct snd_ac97_bus *bus)
diff --git a/sound/pci/cs46xx/dsp_spos.c b/sound/pci/cs46xx/dsp_spos.c
index 5c9711c..89c4027 100644
--- a/sound/pci/cs46xx/dsp_spos.c
+++ b/sound/pci/cs46xx/dsp_spos.c
@@ -868,35 +868,23 @@
 	struct dsp_spos_instance * ins = chip->dsp_spos_instance;
 	int i;
 
-	if (ins->proc_sym_info_entry) {
-		snd_info_unregister(ins->proc_sym_info_entry);
-		ins->proc_sym_info_entry = NULL;
-	}
-  
-	if (ins->proc_modules_info_entry) {
-		snd_info_unregister(ins->proc_modules_info_entry);
-		ins->proc_modules_info_entry = NULL;
-	}
- 
-	if (ins->proc_parameter_dump_info_entry) {
-		snd_info_unregister(ins->proc_parameter_dump_info_entry);
-		ins->proc_parameter_dump_info_entry = NULL;
-	}
-  
-	if (ins->proc_sample_dump_info_entry) {
-		snd_info_unregister(ins->proc_sample_dump_info_entry);
-		ins->proc_sample_dump_info_entry = NULL;
-	}
-  
-	if (ins->proc_scb_info_entry) {
-		snd_info_unregister(ins->proc_scb_info_entry);
-		ins->proc_scb_info_entry = NULL;
-	}
-  
-	if (ins->proc_task_info_entry) {
-		snd_info_unregister(ins->proc_task_info_entry);
-		ins->proc_task_info_entry = NULL;
-	}
+	snd_info_free_entry(ins->proc_sym_info_entry);
+	ins->proc_sym_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_modules_info_entry);
+	ins->proc_modules_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_parameter_dump_info_entry);
+	ins->proc_parameter_dump_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_sample_dump_info_entry);
+	ins->proc_sample_dump_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_scb_info_entry);
+	ins->proc_scb_info_entry = NULL;
+
+	snd_info_free_entry(ins->proc_task_info_entry);
+	ins->proc_task_info_entry = NULL;
 
 	mutex_lock(&chip->spos_mutex);
 	for (i = 0; i < ins->nscb; ++i) {
@@ -905,10 +893,8 @@
 	}
 	mutex_unlock(&chip->spos_mutex);
 
-	if (ins->proc_dsp_dir) {
-		snd_info_unregister (ins->proc_dsp_dir);
-		ins->proc_dsp_dir = NULL;
-	}
+	snd_info_free_entry(ins->proc_dsp_dir);
+	ins->proc_dsp_dir = NULL;
 
 	return 0;
 }
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 232b337..343f51d 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -233,7 +233,7 @@
 
 		snd_printdd("cs46xx_dsp_proc_free_scb_desc: freeing %s\n",scb->scb_name);
 
-		snd_info_unregister(scb->proc_info);
+		snd_info_free_entry(scb->proc_info);
 		scb->proc_info = NULL;
 
 		snd_assert (scb_info != NULL, return);
diff --git a/sound/pci/cs5535audio/Makefile b/sound/pci/cs5535audio/Makefile
index 2911a8a..ad947b4 100644
--- a/sound/pci/cs5535audio/Makefile
+++ b/sound/pci/cs5535audio/Makefile
@@ -4,7 +4,7 @@
 
 snd-cs5535audio-objs := cs5535audio.o cs5535audio_pcm.o
 
-ifdef CONFIG_PM
+ifeq ($(CONFIG_PM),y)
 snd-cs5535audio-objs += cs5535audio_pm.o
 endif
 
diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c
index 289bcd9..493ec08 100644
--- a/sound/pci/emu10k1/emu10k1.c
+++ b/sound/pci/emu10k1/emu10k1.c
@@ -232,7 +232,7 @@
 	return 0;
 }
 
-int snd_emu10k1_resume(struct pci_dev *pci)
+static int snd_emu10k1_resume(struct pci_dev *pci)
 {
 	struct snd_card *card = pci_get_drvdata(pci);
 	struct snd_emu10k1 *emu = card->private_data;
diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index 79f24cd..be65d4d 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -927,6 +927,7 @@
 	 .ca0151_chip = 1,
 	 .spk71 = 1,
 	 .spdif_bug = 1,
+	 .adc_1361t = 1,  /* 24 bit capture instead of 16bit */
 	 .ac97_chip = 1} ,
 	{.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10051102,
 	 .driver = "Audigy2", .name = "Audigy 2 EX [1005]", 
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index bda8bdf..da1610a 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -1626,12 +1626,7 @@
 // initialization of the module
 static int __init alsa_card_emu10k1x_init(void)
 {
-	int err;
-
-	if ((err = pci_register_driver(&driver)) > 0)
-		return err;
-
-	return 0;
+	return pci_register_driver(&driver);
 }
 
 // clean up the module
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index dfba002..13cd6ce 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 
 #include <sound/core.h>
+#include <sound/tlv.h>
 #include <sound/emu10k1.h>
 
 #if 0		/* for testing purposes - digital out -> capture */
@@ -266,6 +267,7 @@
 	{ 0x37c4448b, 0xa45ef51d, 0x262f3267, 0x081e36dc, 0xfd8f5d14 }
 };
 
+/* dB gain = (float) 20 * log10( float(db_table_value) / 0x8000000 ) */
 static const u32 db_table[101] = {
 	0x00000000, 0x01571f82, 0x01674b41, 0x01783a1b, 0x0189f540,
 	0x019c8651, 0x01aff763, 0x01c45306, 0x01d9a446, 0x01eff6b8,
@@ -290,6 +292,9 @@
 	0x7fffffff,
 };
 
+/* EMU10k1/EMU10k2 DSP control db gain */
+static DECLARE_TLV_DB_SCALE(snd_emu10k1_db_scale1, -4000, 40, 1);
+
 static const u32 onoff_table[2] = {
 	0x00000000, 0x00000001
 };
@@ -755,6 +760,11 @@
 		knew.device = gctl->id.device;
 		knew.subdevice = gctl->id.subdevice;
 		knew.info = snd_emu10k1_gpr_ctl_info;
+		if (gctl->tlv.p) {
+			knew.tlv.p = gctl->tlv.p;
+			knew.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+				SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+		} 
 		knew.get = snd_emu10k1_gpr_ctl_get;
 		knew.put = snd_emu10k1_gpr_ctl_put;
 		memset(nctl, 0, sizeof(*nctl));
@@ -1013,6 +1023,7 @@
 	ctl->gpr[0] = gpr + 0; ctl->value[0] = defval;
 	ctl->min = 0;
 	ctl->max = 100;
+	ctl->tlv.p = snd_emu10k1_db_scale1;
 	ctl->translation = EMU10K1_GPR_TRANSLATION_TABLE100;	
 }
 
@@ -1027,6 +1038,7 @@
 	ctl->gpr[1] = gpr + 1; ctl->value[1] = defval;
 	ctl->min = 0;
 	ctl->max = 100;
+	ctl->tlv.p = snd_emu10k1_db_scale1;
 	ctl->translation = EMU10K1_GPR_TRANSLATION_TABLE100;
 }
 
diff --git a/sound/pci/emu10k1/p16v.c b/sound/pci/emu10k1/p16v.c
index 9905651..4e0f954 100644
--- a/sound/pci/emu10k1/p16v.c
+++ b/sound/pci/emu10k1/p16v.c
@@ -100,6 +100,7 @@
 #include <sound/pcm.h>
 #include <sound/ac97_codec.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 #include <sound/emu10k1.h>
 #include "p16v.h"
 
@@ -784,12 +785,16 @@
 	}
         return change;
 }
+static DECLARE_TLV_DB_SCALE(snd_p16v_db_scale1, -5175, 25, 1);
 
 #define P16V_VOL(xname,xreg,xhl) { \
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+        .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |             \
+                  SNDRV_CTL_ELEM_ACCESS_TLV_READ,               \
 	.info = snd_p16v_volume_info, \
 	.get = snd_p16v_volume_get, \
 	.put = snd_p16v_volume_put, \
+	.tlv = { .p = snd_p16v_db_scale1 },	\
 	.private_value = ((xreg) | ((xhl) << 8)) \
 }
 
diff --git a/sound/pci/es1938.c b/sound/pci/es1938.c
index cc0f34f..3ce5a4e 100644
--- a/sound/pci/es1938.c
+++ b/sound/pci/es1938.c
@@ -62,6 +62,7 @@
 #include <sound/opl3.h>
 #include <sound/mpu401.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 #include <asm/io.h>
 
@@ -1164,6 +1165,14 @@
 		return snd_es1938_read(chip, reg);
 }
 
+#define ES1938_SINGLE_TLV(xname, xindex, reg, shift, mask, invert, xtlv)    \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ,\
+  .name = xname, .index = xindex, \
+  .info = snd_es1938_info_single, \
+  .get = snd_es1938_get_single, .put = snd_es1938_put_single, \
+  .private_value = reg | (shift << 8) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = xtlv } }
 #define ES1938_SINGLE(xname, xindex, reg, shift, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
   .info = snd_es1938_info_single, \
@@ -1217,6 +1226,14 @@
 	return snd_es1938_reg_bits(chip, reg, mask, val) != val;
 }
 
+#define ES1938_DOUBLE_TLV(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ,\
+  .name = xname, .index = xindex, \
+  .info = snd_es1938_info_double, \
+  .get = snd_es1938_get_double, .put = snd_es1938_put_double, \
+  .private_value = left_reg | (right_reg << 8) | (shift_left << 16) | (shift_right << 19) | (mask << 24) | (invert << 22), \
+  .tlv = { .p = xtlv } }
 #define ES1938_DOUBLE(xname, xindex, left_reg, right_reg, shift_left, shift_right, mask, invert) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
   .info = snd_es1938_info_double, \
@@ -1297,8 +1314,41 @@
 	return change;
 }
 
+static unsigned int db_scale_master[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 54, TLV_DB_SCALE_ITEM(-3600, 50, 1),
+	54, 63, TLV_DB_SCALE_ITEM(-900, 100, 0),
+};
+
+static unsigned int db_scale_audio1[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-3300, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(-900, 150, 0),
+};
+
+static unsigned int db_scale_audio2[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-3450, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(-1050, 150, 0),
+};
+
+static unsigned int db_scale_mic[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-2400, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(0, 150, 0),
+};
+
+static unsigned int db_scale_line[] = {
+	TLV_DB_RANGE_HEAD(2),
+	0, 8, TLV_DB_SCALE_ITEM(-3150, 300, 1),
+	8, 15, TLV_DB_SCALE_ITEM(-750, 150, 0),
+};
+
+static DECLARE_TLV_DB_SCALE(db_scale_capture, 0, 150, 0);
+
 static struct snd_kcontrol_new snd_es1938_controls[] = {
-ES1938_DOUBLE("Master Playback Volume", 0, 0x60, 0x62, 0, 0, 63, 0),
+ES1938_DOUBLE_TLV("Master Playback Volume", 0, 0x60, 0x62, 0, 0, 63, 0,
+		  db_scale_master),
 ES1938_DOUBLE("Master Playback Switch", 0, 0x60, 0x62, 6, 6, 1, 1),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1309,19 +1359,27 @@
 },
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READ |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name = "Hardware Master Playback Switch",
-	.access = SNDRV_CTL_ELEM_ACCESS_READ,
 	.info = snd_es1938_info_hw_switch,
 	.get = snd_es1938_get_hw_switch,
+	.tlv = { .p = db_scale_master },
 },
 ES1938_SINGLE("Hardware Volume Split", 0, 0x64, 7, 1, 0),
-ES1938_DOUBLE("Line Playback Volume", 0, 0x3e, 0x3e, 4, 0, 15, 0),
+ES1938_DOUBLE_TLV("Line Playback Volume", 0, 0x3e, 0x3e, 4, 0, 15, 0,
+		  db_scale_line),
 ES1938_DOUBLE("CD Playback Volume", 0, 0x38, 0x38, 4, 0, 15, 0),
-ES1938_DOUBLE("FM Playback Volume", 0, 0x36, 0x36, 4, 0, 15, 0),
-ES1938_DOUBLE("Mono Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0),
-ES1938_DOUBLE("Mic Playback Volume", 0, 0x1a, 0x1a, 4, 0, 15, 0),
-ES1938_DOUBLE("Aux Playback Volume", 0, 0x3a, 0x3a, 4, 0, 15, 0),
-ES1938_DOUBLE("Capture Volume", 0, 0xb4, 0xb4, 4, 0, 15, 0),
+ES1938_DOUBLE_TLV("FM Playback Volume", 0, 0x36, 0x36, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Mono Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("Mic Playback Volume", 0, 0x1a, 0x1a, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Aux Playback Volume", 0, 0x3a, 0x3a, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("Capture Volume", 0, 0xb4, 0xb4, 4, 0, 15, 0,
+		  db_scale_capture),
 ES1938_SINGLE("PC Speaker Volume", 0, 0x3c, 0, 7, 0),
 ES1938_SINGLE("Record Monitor", 0, 0xa8, 3, 1, 0),
 ES1938_SINGLE("Capture Switch", 0, 0x1c, 4, 1, 1),
@@ -1332,16 +1390,26 @@
 	.get = snd_es1938_get_mux,
 	.put = snd_es1938_put_mux,
 },
-ES1938_DOUBLE("Mono Input Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0),
-ES1938_DOUBLE("PCM Capture Volume", 0, 0x69, 0x69, 4, 0, 15, 0),
-ES1938_DOUBLE("Mic Capture Volume", 0, 0x68, 0x68, 4, 0, 15, 0),
-ES1938_DOUBLE("Line Capture Volume", 0, 0x6e, 0x6e, 4, 0, 15, 0),
-ES1938_DOUBLE("FM Capture Volume", 0, 0x6b, 0x6b, 4, 0, 15, 0),
-ES1938_DOUBLE("Mono Capture Volume", 0, 0x6f, 0x6f, 4, 0, 15, 0),
-ES1938_DOUBLE("CD Capture Volume", 0, 0x6a, 0x6a, 4, 0, 15, 0),
-ES1938_DOUBLE("Aux Capture Volume", 0, 0x6c, 0x6c, 4, 0, 15, 0),
-ES1938_DOUBLE("PCM Playback Volume", 0, 0x7c, 0x7c, 4, 0, 15, 0),
-ES1938_DOUBLE("PCM Playback Volume", 1, 0x14, 0x14, 4, 0, 15, 0),
+ES1938_DOUBLE_TLV("Mono Input Playback Volume", 0, 0x6d, 0x6d, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("PCM Capture Volume", 0, 0x69, 0x69, 4, 0, 15, 0,
+		  db_scale_audio2),
+ES1938_DOUBLE_TLV("Mic Capture Volume", 0, 0x68, 0x68, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Line Capture Volume", 0, 0x6e, 0x6e, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("FM Capture Volume", 0, 0x6b, 0x6b, 4, 0, 15, 0,
+		  db_scale_mic),
+ES1938_DOUBLE_TLV("Mono Capture Volume", 0, 0x6f, 0x6f, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("CD Capture Volume", 0, 0x6a, 0x6a, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("Aux Capture Volume", 0, 0x6c, 0x6c, 4, 0, 15, 0,
+		  db_scale_line),
+ES1938_DOUBLE_TLV("PCM Playback Volume", 0, 0x7c, 0x7c, 4, 0, 15, 0,
+		  db_scale_audio2),
+ES1938_DOUBLE_TLV("PCM Playback Volume", 1, 0x14, 0x14, 4, 0, 15, 0,
+		  db_scale_audio1),
 ES1938_SINGLE("3D Control - Level", 0, 0x52, 0, 63, 0),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c
index 3c5ab7c..f3c4038 100644
--- a/sound/pci/es1968.c
+++ b/sound/pci/es1968.c
@@ -1905,7 +1905,7 @@
 	/* Figure out which volume control button was pushed,
 	   based on differences from the default register
 	   values. */
-	x = inb(chip->io_port + 0x1c);
+	x = inb(chip->io_port + 0x1c) & 0xee;
 	/* Reset the volume control registers. */
 	outb(0x88, chip->io_port + 0x1c);
 	outb(0x88, chip->io_port + 0x1d);
@@ -1921,7 +1921,8 @@
 	/* FIXME: we can't call snd_ac97_* functions since here is in tasklet. */
 	spin_lock_irqsave(&chip->ac97_lock, flags);
 	val = chip->ac97->regs[AC97_MASTER];
-	if (x & 1) {
+	switch (x) {
+	case 0x88:
 		/* mute */
 		val ^= 0x8000;
 		chip->ac97->regs[AC97_MASTER] = val;
@@ -1929,26 +1930,31 @@
 		outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX);
 		snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
 			       &chip->master_switch->id);
-	} else {
-		val &= 0x7fff;
-		if (((x>>1) & 7) > 4) {
-			/* volume up */
-			if ((val & 0xff) > 0)
-				val--;
-			if ((val & 0xff00) > 0)
-				val -= 0x0100;
-		} else {
-			/* volume down */
-			if ((val & 0xff) < 0x1f)
-				val++;
-			if ((val & 0xff00) < 0x1f00)
-				val += 0x0100;
-		}
+		break;
+	case 0xaa:
+		/* volume up */
+		if ((val & 0x7f) > 0)
+			val--;
+		if ((val & 0x7f00) > 0)
+			val -= 0x0100;
 		chip->ac97->regs[AC97_MASTER] = val;
 		outw(val, chip->io_port + ESM_AC97_DATA);
 		outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX);
 		snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
 			       &chip->master_volume->id);
+		break;
+	case 0x66:
+		/* volume down */
+		if ((val & 0x7f) < 0x1f)
+			val++;
+		if ((val & 0x7f00) < 0x1f00)
+			val += 0x0100;
+		chip->ac97->regs[AC97_MASTER] = val;
+		outw(val, chip->io_port + ESM_AC97_DATA);
+		outb(AC97_MASTER, chip->io_port + ESM_AC97_INDEX);
+		snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
+			       &chip->master_volume->id);
+		break;
 	}
 	spin_unlock_irqrestore(&chip->ac97_lock, flags);
 }
diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c
index 13868c9..bdfda19 100644
--- a/sound/pci/fm801.c
+++ b/sound/pci/fm801.c
@@ -2,6 +2,7 @@
  *  The driver for the ForteMedia FM801 based soundcards
  *  Copyright (c) by Jaroslav Kysela <perex@suse.cz>
  *
+ *  Support FM only card by Andy Shevchenko <andy@smile.org.ua>
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -28,6 +29,7 @@
 #include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include <sound/mpu401.h>
 #include <sound/opl3.h>
@@ -54,6 +56,7 @@
  *    1 = MediaForte 256-PCS
  *    2 = MediaForte 256-PCPR
  *    3 = MediaForte 64-PCR
+ *   16 = setup tuner only (this is additional bit), i.e. SF-64-PCR FM card
  *  High 16-bits are video (radio) device number + 1
  */
 static int tea575x_tuner[SNDRV_CARDS];
@@ -158,6 +161,7 @@
 	unsigned int multichannel: 1,	/* multichannel support */
 		     secondary: 1;	/* secondary codec */
 	unsigned char secondary_addr;	/* address of the secondary codec */
+	unsigned int tea575x_tuner;	/* tuner flags */
 
 	unsigned short ply_ctrl; /* playback control */
 	unsigned short cap_ctrl; /* capture control */
@@ -318,10 +322,8 @@
   2, 4, 6
 };
 
-#define CHANNELS sizeof(channels) / sizeof(channels[0])
-
 static struct snd_pcm_hw_constraint_list hw_constraints_channels = {
-	.count = CHANNELS,
+	.count = ARRAY_SIZE(channels),
 	.list = channels,
 	.mask = 0,
 };
@@ -1052,6 +1054,13 @@
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .info = snd_fm801_info_double, \
   .get = snd_fm801_get_double, .put = snd_fm801_put_double, \
   .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24) }
+#define FM801_DOUBLE_TLV(xname, reg, shift_left, shift_right, mask, invert, xtlv) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
+  .name = xname, .info = snd_fm801_info_double, \
+  .get = snd_fm801_get_double, .put = snd_fm801_put_double, \
+  .private_value = reg | (shift_left << 8) | (shift_right << 12) | (mask << 16) | (invert << 24), \
+  .tlv = { .p = (xtlv) } }
 
 static int snd_fm801_info_double(struct snd_kcontrol *kcontrol,
 				 struct snd_ctl_elem_info *uinfo)
@@ -1148,14 +1157,19 @@
 	return snd_fm801_update_bits(chip, FM801_REC_SRC, 7, val);
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dsp, -3450, 150, 0);
+
 #define FM801_CONTROLS ARRAY_SIZE(snd_fm801_controls)
 
 static struct snd_kcontrol_new snd_fm801_controls[] __devinitdata = {
-FM801_DOUBLE("Wave Playback Volume", FM801_PCM_VOL, 0, 8, 31, 1),
+FM801_DOUBLE_TLV("Wave Playback Volume", FM801_PCM_VOL, 0, 8, 31, 1,
+		 db_scale_dsp),
 FM801_SINGLE("Wave Playback Switch", FM801_PCM_VOL, 15, 1, 1),
-FM801_DOUBLE("I2S Playback Volume", FM801_I2S_VOL, 0, 8, 31, 1),
+FM801_DOUBLE_TLV("I2S Playback Volume", FM801_I2S_VOL, 0, 8, 31, 1,
+		 db_scale_dsp),
 FM801_SINGLE("I2S Playback Switch", FM801_I2S_VOL, 15, 1, 1),
-FM801_DOUBLE("FM Playback Volume", FM801_FM_VOL, 0, 8, 31, 1),
+FM801_DOUBLE_TLV("FM Playback Volume", FM801_FM_VOL, 0, 8, 31, 1,
+		 db_scale_dsp),
 FM801_SINGLE("FM Playback Switch", FM801_FM_VOL, 15, 1, 1),
 {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1253,6 +1267,9 @@
 	int id;
 	unsigned short cmdw;
 
+	if (chip->tea575x_tuner & 0x0010)
+		goto __ac97_ok;
+
 	/* codec cold reset + AC'97 warm reset */
 	outw((1<<5) | (1<<6), FM801_REG(chip, CODEC_CTRL));
 	inw(FM801_REG(chip, CODEC_CTRL)); /* flush posting data */
@@ -1290,6 +1307,8 @@
 		wait_for_codec(chip, 0, AC97_VENDOR_ID1, msecs_to_jiffies(750));
 	}
 
+      __ac97_ok:
+
 	/* init volume */
 	outw(0x0808, FM801_REG(chip, PCM_VOL));
 	outw(0x9f1f, FM801_REG(chip, FM_VOL));
@@ -1298,9 +1317,12 @@
 	/* I2S control - I2S mode */
 	outw(0x0003, FM801_REG(chip, I2S_MODE));
 
-	/* interrupt setup - unmask MPU, PLAYBACK & CAPTURE */
+	/* interrupt setup */
 	cmdw = inw(FM801_REG(chip, IRQ_MASK));
-	cmdw &= ~0x0083;
+	if (chip->irq < 0)
+		cmdw |= 0x00c3;		/* mask everything, no PCM nor MPU */
+	else
+		cmdw &= ~0x0083;	/* unmask MPU, PLAYBACK & CAPTURE */
 	outw(cmdw, FM801_REG(chip, IRQ_MASK));
 
 	/* interrupt clear */
@@ -1365,20 +1387,23 @@
 	chip->card = card;
 	chip->pci = pci;
 	chip->irq = -1;
+	chip->tea575x_tuner = tea575x_tuner;
 	if ((err = pci_request_regions(pci, "FM801")) < 0) {
 		kfree(chip);
 		pci_disable_device(pci);
 		return err;
 	}
 	chip->port = pci_resource_start(pci, 0);
-	if (request_irq(pci->irq, snd_fm801_interrupt, IRQF_DISABLED|IRQF_SHARED,
-			"FM801", chip)) {
-		snd_printk(KERN_ERR "unable to grab IRQ %d\n", chip->irq);
-		snd_fm801_free(chip);
-		return -EBUSY;
+	if ((tea575x_tuner & 0x0010) == 0) {
+		if (request_irq(pci->irq, snd_fm801_interrupt, IRQF_DISABLED|IRQF_SHARED,
+				"FM801", chip)) {
+			snd_printk(KERN_ERR "unable to grab IRQ %d\n", chip->irq);
+			snd_fm801_free(chip);
+			return -EBUSY;
+		}
+		chip->irq = pci->irq;
+		pci_set_master(pci);
 	}
-	chip->irq = pci->irq;
-	pci_set_master(pci);
 
 	pci_read_config_byte(pci, PCI_REVISION_ID, &rev);
 	if (rev >= 0xb1)	/* FM801-AU */
@@ -1394,12 +1419,12 @@
 	snd_card_set_dev(card, &pci->dev);
 
 #ifdef TEA575X_RADIO
-	if (tea575x_tuner > 0 && (tea575x_tuner & 0xffff) < 4) {
+	if (tea575x_tuner > 0 && (tea575x_tuner & 0x000f) < 4) {
 		chip->tea.dev_nr = tea575x_tuner >> 16;
 		chip->tea.card = card;
 		chip->tea.freq_fixup = 10700;
 		chip->tea.private_data = chip;
-		chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0xffff) - 1];
+		chip->tea.ops = &snd_fm801_tea_ops[(tea575x_tuner & 0x000f) - 1];
 		snd_tea575x_init(&chip->tea);
 	}
 #endif
@@ -1439,6 +1464,9 @@
 	sprintf(card->longname, "%s at 0x%lx, irq %i",
 		card->shortname, chip->port, chip->irq);
 
+	if (tea575x_tuner[dev] & 0x0010)
+		goto __fm801_tuner_only;
+
 	if ((err = snd_fm801_pcm(chip, 0, NULL)) < 0) {
 		snd_card_free(card);
 		return err;
@@ -1465,6 +1493,7 @@
 		return err;
 	}
 
+      __fm801_tuner_only:
 	if ((err = snd_card_register(card)) < 0) {
 		snd_card_free(card);
 		return err;
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 23201f3..9c3d7ac 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -29,6 +29,7 @@
 #include <sound/core.h>
 #include "hda_codec.h"
 #include <sound/asoundef.h>
+#include <sound/tlv.h>
 #include <sound/initval.h>
 #include "hda_local.h"
 
@@ -50,8 +51,10 @@
 /* codec vendor labels */
 static struct hda_vendor_id hda_vendor_ids[] = {
 	{ 0x10ec, "Realtek" },
+	{ 0x1057, "Motorola" },
 	{ 0x11d4, "Analog Devices" },
 	{ 0x13f6, "C-Media" },
+	{ 0x14f1, "Conexant" },
 	{ 0x434d, "C-Media" },
 	{ 0x8384, "SigmaTel" },
 	{} /* terminator */
@@ -841,6 +844,31 @@
 	return change;
 }
 
+int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag,
+			  unsigned int size, unsigned int __user *_tlv)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	hda_nid_t nid = get_amp_nid(kcontrol);
+	int dir = get_amp_direction(kcontrol);
+	u32 caps, val1, val2;
+
+	if (size < 4 * sizeof(unsigned int))
+		return -ENOMEM;
+	caps = query_amp_caps(codec, nid, dir);
+	val2 = (((caps & AC_AMPCAP_STEP_SIZE) >> AC_AMPCAP_STEP_SIZE_SHIFT) + 1) * 25;
+	val1 = -((caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT);
+	val1 = ((int)val1) * ((int)val2);
+	if (put_user(SNDRV_CTL_TLVT_DB_SCALE, _tlv))
+		return -EFAULT;
+	if (put_user(2 * sizeof(unsigned int), _tlv + 1))
+		return -EFAULT;
+	if (put_user(val1, _tlv + 2))
+		return -EFAULT;
+	if (put_user(val2, _tlv + 3))
+		return -EFAULT;
+	return 0;
+}
+
 /* switch */
 int snd_hda_mixer_amp_switch_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
@@ -1477,10 +1505,10 @@
 				formats |= SNDRV_PCM_FMTBIT_S32_LE;
 				if (val & AC_SUPPCM_BITS_32)
 					bps = 32;
-				else if (val & AC_SUPPCM_BITS_20)
-					bps = 20;
 				else if (val & AC_SUPPCM_BITS_24)
 					bps = 24;
+				else if (val & AC_SUPPCM_BITS_20)
+					bps = 20;
 			}
 		}
 		else if (streams == AC_SUPFMT_FLOAT32) { /* should be exclusive */
@@ -1916,7 +1944,7 @@
 
 	/* front */
 	snd_hda_codec_setup_stream(codec, nids[HDA_FRONT], stream_tag, 0, format);
-	if (mout->hp_nid)
+	if (mout->hp_nid && mout->hp_nid != nids[HDA_FRONT])
 		/* headphone out will just decode front left/right (stereo) */
 		snd_hda_codec_setup_stream(codec, mout->hp_nid, stream_tag, 0, format);
 	/* extra outputs copied from front */
@@ -1984,7 +2012,7 @@
  * in the order of front, rear, CLFE, side, ...
  *
  * If more extra outputs (speaker and headphone) are found, the pins are
- * assisnged to hp_pin and speaker_pins[], respectively.  If no line-out jack
+ * assisnged to hp_pins[] and speaker_pins[], respectively.  If no line-out jack
  * is detected, one of speaker of HP pins is assigned as the primary
  * output, i.e. to line_out_pins[0].  So, line_outs is always positive
  * if any analog output exists.
@@ -2046,14 +2074,26 @@
 			cfg->speaker_outs++;
 			break;
 		case AC_JACK_HP_OUT:
-			cfg->hp_pin = nid;
+			if (cfg->hp_outs >= ARRAY_SIZE(cfg->hp_pins))
+				continue;
+			cfg->hp_pins[cfg->hp_outs] = nid;
+			cfg->hp_outs++;
 			break;
-		case AC_JACK_MIC_IN:
-			if (loc == AC_JACK_LOC_FRONT)
-				cfg->input_pins[AUTO_PIN_FRONT_MIC] = nid;
-			else
-				cfg->input_pins[AUTO_PIN_MIC] = nid;
+		case AC_JACK_MIC_IN: {
+			int preferred, alt;
+			if (loc == AC_JACK_LOC_FRONT) {
+				preferred = AUTO_PIN_FRONT_MIC;
+				alt = AUTO_PIN_MIC;
+			} else {
+				preferred = AUTO_PIN_MIC;
+				alt = AUTO_PIN_FRONT_MIC;
+			}
+			if (!cfg->input_pins[preferred])
+				cfg->input_pins[preferred] = nid;
+			else if (!cfg->input_pins[alt])
+				cfg->input_pins[alt] = nid;
 			break;
+		}
 		case AC_JACK_LINE_IN:
 			if (loc == AC_JACK_LOC_FRONT)
 				cfg->input_pins[AUTO_PIN_FRONT_LINE] = nid;
@@ -2119,8 +2159,10 @@
 		   cfg->speaker_outs, cfg->speaker_pins[0],
 		   cfg->speaker_pins[1], cfg->speaker_pins[2],
 		   cfg->speaker_pins[3], cfg->speaker_pins[4]);
-	snd_printd("   hp=0x%x, dig_out=0x%x, din_in=0x%x\n",
-		   cfg->hp_pin, cfg->dig_out_pin, cfg->dig_in_pin);
+	snd_printd("   hp_outs=%d (0x%x/0x%x/0x%x/0x%x/0x%x)\n",
+		   cfg->hp_outs, cfg->hp_pins[0],
+		   cfg->hp_pins[1], cfg->hp_pins[2],
+		   cfg->hp_pins[3], cfg->hp_pins[4]);
 	snd_printd("   inputs: mic=0x%x, fmic=0x%x, line=0x%x, fline=0x%x,"
 		   " cd=0x%x, aux=0x%x\n",
 		   cfg->input_pins[AUTO_PIN_MIC],
@@ -2141,10 +2183,12 @@
 			       sizeof(cfg->speaker_pins));
 			cfg->speaker_outs = 0;
 			memset(cfg->speaker_pins, 0, sizeof(cfg->speaker_pins));
-		} else if (cfg->hp_pin) {
-			cfg->line_outs = 1;
-			cfg->line_out_pins[0] = cfg->hp_pin;
-			cfg->hp_pin = 0;
+		} else if (cfg->hp_outs) {
+			cfg->line_outs = cfg->hp_outs;
+			memcpy(cfg->line_out_pins, cfg->hp_pins,
+			       sizeof(cfg->hp_pins));
+			cfg->hp_outs = 0;
+			memset(cfg->hp_pins, 0, sizeof(cfg->hp_pins));
 		}
 	}
 
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index 40520e9..c12bc4e 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -479,7 +479,7 @@
 struct hda_amp_info {
 	u32 key;		/* hash key */
 	u32 amp_caps;		/* amp capabilities */
-	u16 vol[2];		/* current volume & mute*/
+	u16 vol[2];		/* current volume & mute */
 	u16 status;		/* update flag */
 	u16 next;		/* next link */
 };
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 85ad164a..97e9af1 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -46,11 +46,18 @@
 };
 
 /* patch-specific record */
+
+#define MAX_PCM_VOLS	2
+struct pcm_vol {
+	struct hda_gnode *node;	/* Node for PCM volume */
+	unsigned int index;	/* connection of PCM volume */
+};
+
 struct hda_gspec {
 	struct hda_gnode *dac_node[2];	/* DAC node */
 	struct hda_gnode *out_pin_node[2];	/* Output pin (Line-Out) node */
-	struct hda_gnode *pcm_vol_node[2];	/* Node for PCM volume */
-	unsigned int pcm_vol_index[2];	/* connection of PCM volume */
+	struct pcm_vol pcm_vol[MAX_PCM_VOLS];	/* PCM volumes */
+	unsigned int pcm_vol_nodes;	/* number of PCM volumes */
 
 	struct hda_gnode *adc_node;	/* ADC node */
 	struct hda_gnode *cap_vol_node;	/* Node for capture volume */
@@ -285,9 +292,11 @@
 			return node == spec->dac_node[dac_idx];
 		}
 		spec->dac_node[dac_idx] = node;
-		if (node->wid_caps & AC_WCAP_OUT_AMP) {
-			spec->pcm_vol_node[dac_idx] = node;
-			spec->pcm_vol_index[dac_idx] = 0;
+		if ((node->wid_caps & AC_WCAP_OUT_AMP) &&
+		    spec->pcm_vol_nodes < MAX_PCM_VOLS) {
+			spec->pcm_vol[spec->pcm_vol_nodes].node = node;
+			spec->pcm_vol[spec->pcm_vol_nodes].index = 0;
+			spec->pcm_vol_nodes++;
 		}
 		return 1; /* found */
 	}
@@ -307,13 +316,16 @@
 				select_input_connection(codec, node, i);
 			unmute_input(codec, node, i);
 			unmute_output(codec, node);
-			if (! spec->pcm_vol_node[dac_idx]) {
-				if (node->wid_caps & AC_WCAP_IN_AMP) {
-					spec->pcm_vol_node[dac_idx] = node;
-					spec->pcm_vol_index[dac_idx] = i;
-				} else if (node->wid_caps & AC_WCAP_OUT_AMP) {
-					spec->pcm_vol_node[dac_idx] = node;
-					spec->pcm_vol_index[dac_idx] = 0;
+			if (spec->dac_node[dac_idx] &&
+			    spec->pcm_vol_nodes < MAX_PCM_VOLS &&
+			    !(spec->dac_node[dac_idx]->wid_caps &
+			      AC_WCAP_OUT_AMP)) {
+				if ((node->wid_caps & AC_WCAP_IN_AMP) ||
+				    (node->wid_caps & AC_WCAP_OUT_AMP)) {
+					int n = spec->pcm_vol_nodes;
+					spec->pcm_vol[n].node = node;
+					spec->pcm_vol[n].index = i;
+					spec->pcm_vol_nodes++;
 				}
 			}
 			return 1;
@@ -370,7 +382,9 @@
 			/* set PIN-Out enable */
 			snd_hda_codec_write(codec, node->nid, 0,
 					    AC_VERB_SET_PIN_WIDGET_CONTROL,
-					    AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
+					    AC_PINCTL_OUT_EN |
+					    ((node->pin_caps & AC_PINCAP_HP_DRV) ?
+					     AC_PINCTL_HP_EN : 0));
 			return node;
 		}
 	}
@@ -461,14 +475,19 @@
 			return "Front Line";
 		return "Line";
 	case AC_JACK_CD:
+#if 0
 		if (pinctl)
 			*pinctl |= AC_PINCTL_VREF_GRD;
+#endif
 		return "CD";
 	case AC_JACK_AUX:
 		if ((location & 0x0f) == AC_JACK_LOC_FRONT)
 			return "Front Aux";
 		return "Aux";
 	case AC_JACK_MIC_IN:
+		if (node->pin_caps &
+		    (AC_PINCAP_VREF_80 << AC_PINCAP_VREF_SHIFT))
+			*pinctl |= AC_PINCTL_VREF_80;
 		if ((location & 0x0f) == AC_JACK_LOC_FRONT)
 			return "Front Mic";
 		return "Mic";
@@ -556,6 +575,29 @@
 	return 1; /* found */
 }
 
+/* add a capture source element */
+static void add_cap_src(struct hda_gspec *spec, int idx)
+{
+	struct hda_input_mux_item *csrc;
+	char *buf;
+	int num, ocap;
+
+	num = spec->input_mux.num_items;
+	csrc = &spec->input_mux.items[num];
+	buf = spec->cap_labels[num];
+	for (ocap = 0; ocap < num; ocap++) {
+		if (! strcmp(buf, spec->cap_labels[ocap])) {
+			/* same label already exists,
+			 * put the index number to be unique
+			 */
+			sprintf(buf, "%s %d", spec->cap_labels[ocap], num);
+			break;
+		}
+	}
+	csrc->index = idx;
+	spec->input_mux.num_items++;
+}
+
 /*
  * parse input
  */
@@ -576,28 +618,26 @@
 	 * if it reaches to a proper input PIN, add the path as the
 	 * input path.
 	 */
+	/* first, check the direct connections to PIN widgets */
 	for (i = 0; i < adc_node->nconns; i++) {
 		node = hda_get_node(spec, adc_node->conn_list[i]);
-		if (! node)
-			continue;
-		err = parse_adc_sub_nodes(codec, spec, node);
-		if (err < 0)
-			return err;
-		else if (err > 0) {
-			struct hda_input_mux_item *csrc = &spec->input_mux.items[spec->input_mux.num_items];
-			char *buf = spec->cap_labels[spec->input_mux.num_items];
-			int ocap;
-			for (ocap = 0; ocap < spec->input_mux.num_items; ocap++) {
-				if (! strcmp(buf, spec->cap_labels[ocap])) {
-					/* same label already exists,
-					 * put the index number to be unique
-					 */
-					sprintf(buf, "%s %d", spec->cap_labels[ocap],
-						spec->input_mux.num_items);
-				}
-			}
-			csrc->index = i;
-			spec->input_mux.num_items++;
+		if (node && node->type == AC_WID_PIN) {
+			err = parse_adc_sub_nodes(codec, spec, node);
+			if (err < 0)
+				return err;
+			else if (err > 0)
+				add_cap_src(spec, i);
+		}
+	}
+	/* ... then check the rests, more complicated connections */
+	for (i = 0; i < adc_node->nconns; i++) {
+		node = hda_get_node(spec, adc_node->conn_list[i]);
+		if (node && node->type != AC_WID_PIN) {
+			err = parse_adc_sub_nodes(codec, spec, node);
+			if (err < 0)
+				return err;
+			else if (err > 0)
+				add_cap_src(spec, i);
 		}
 	}
 
@@ -647,9 +687,6 @@
 /*
  * create mixer controls if possible
  */
-#define DIR_OUT		0x1
-#define DIR_IN		0x2
-
 static int create_mixer(struct hda_codec *codec, struct hda_gnode *node,
 			unsigned int index, const char *type, const char *dir_sfx)
 {
@@ -722,18 +759,37 @@
 /*
  * build output mixer controls
  */
+static int create_output_mixers(struct hda_codec *codec, const char **names)
+{
+	struct hda_gspec *spec = codec->spec;
+	int i, err;
+
+	for (i = 0; i < spec->pcm_vol_nodes; i++) {
+		err = create_mixer(codec, spec->pcm_vol[i].node,
+				   spec->pcm_vol[i].index,
+				   names[i], "Playback");
+		if (err < 0)
+			return err;
+	}
+	return 0;
+}
+
 static int build_output_controls(struct hda_codec *codec)
 {
 	struct hda_gspec *spec = codec->spec;
-	static const char *types[2] = { "Master", "Headphone" };
-	int i, err;
+	static const char *types_speaker[] = { "Speaker", "Headphone" };
+	static const char *types_line[] = { "Front", "Headphone" };
 
-	for (i = 0; i < 2 && spec->pcm_vol_node[i]; i++) {
-		err = create_mixer(codec, spec->pcm_vol_node[i],
-				   spec->pcm_vol_index[i],
-				   types[i], "Playback");
-		if (err < 0)
-			return err;
+	switch (spec->pcm_vol_nodes) {
+	case 1:
+		return create_mixer(codec, spec->pcm_vol[0].node,
+				    spec->pcm_vol[0].index,
+				    "Master", "Playback");
+	case 2:
+		if (defcfg_type(spec->out_pin_node[0]) == AC_JACK_SPEAKER)
+			return create_output_mixers(codec, types_speaker);
+		else
+			return create_output_mixers(codec, types_line);
 	}
 	return 0;
 }
@@ -743,28 +799,57 @@
 {
 	struct hda_gspec *spec = codec->spec;
 	struct hda_gnode *adc_node = spec->adc_node;
-	int err;
+	int i, err;
+	static struct snd_kcontrol_new cap_sel = {
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Capture Source",
+		.info = capture_source_info,
+		.get = capture_source_get,
+		.put = capture_source_put,
+	};
 
-	if (! adc_node)
+	if (! adc_node || ! spec->input_mux.num_items)
 		return 0; /* not found */
 
+	spec->cur_cap_src = 0;
+	select_input_connection(codec, adc_node,
+				spec->input_mux.items[0].index);
+
 	/* create capture volume and switch controls if the ADC has an amp */
-	err = create_mixer(codec, adc_node, 0, NULL, "Capture");
+	/* do we have only a single item? */
+	if (spec->input_mux.num_items == 1) {
+		err = create_mixer(codec, adc_node,
+				   spec->input_mux.items[0].index,
+				   NULL, "Capture");
+		if (err < 0)
+			return err;
+		return 0;
+	}
 
 	/* create input MUX if multiple sources are available */
-	if (spec->input_mux.num_items > 1) {
-		static struct snd_kcontrol_new cap_sel = {
-			.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-			.name = "Capture Source",
-			.info = capture_source_info,
-			.get = capture_source_get,
-			.put = capture_source_put,
-		};
-		if ((err = snd_ctl_add(codec->bus->card, snd_ctl_new1(&cap_sel, codec))) < 0)
+	if ((err = snd_ctl_add(codec->bus->card,
+			       snd_ctl_new1(&cap_sel, codec))) < 0)
+		return err;
+
+	/* no volume control? */
+	if (! (adc_node->wid_caps & AC_WCAP_IN_AMP) ||
+	    ! (adc_node->amp_in_caps & AC_AMPCAP_NUM_STEPS))
+		return 0;
+
+	for (i = 0; i < spec->input_mux.num_items; i++) {
+		struct snd_kcontrol_new knew;
+		char name[32];
+		sprintf(name, "%s Capture Volume",
+			spec->input_mux.items[i].label);
+		knew = (struct snd_kcontrol_new)
+			HDA_CODEC_VOLUME(name, adc_node->nid,
+					 spec->input_mux.items[i].index,
+					 HDA_INPUT);
+		if ((err = snd_ctl_add(codec->bus->card,
+				       snd_ctl_new1(&knew, codec))) < 0)
 			return err;
-		spec->cur_cap_src = 0;
-		select_input_connection(codec, adc_node, spec->input_mux.items[0].index);
 	}
+
 	return 0;
 }
 
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 79d63c9..e9d4cb4 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -55,6 +55,7 @@
 static int position_fix;
 static int probe_mask = -1;
 static int single_cmd;
+static int disable_msi;
 
 module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for Intel HD audio interface.");
@@ -68,6 +69,8 @@
 MODULE_PARM_DESC(probe_mask, "Bitmask to probe codecs (default = -1).");
 module_param(single_cmd, bool, 0444);
 MODULE_PARM_DESC(single_cmd, "Use single command to communicate with codecs (for debugging only).");
+module_param(disable_msi, int, 0);
+MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
 
 /* just for backward compatibility */
@@ -252,7 +255,7 @@
 struct azx_dev {
 	u32 *bdl;			/* virtual address of the BDL */
 	dma_addr_t bdl_addr;		/* physical address of the BDL */
-	volatile u32 *posbuf;			/* position buffer pointer */
+	u32 *posbuf;			/* position buffer pointer */
 
 	unsigned int bufsize;		/* size of the play buffer in bytes */
 	unsigned int fragsize;		/* size of each period in bytes */
@@ -271,8 +274,8 @@
 	/* for sanity check of position buffer */
 	unsigned int period_intr;
 
-	unsigned int opened: 1;
-	unsigned int running: 1;
+	unsigned int opened :1;
+	unsigned int running :1;
 };
 
 /* CORB/RIRB */
@@ -330,8 +333,9 @@
 
 	/* flags */
 	int position_fix;
-	unsigned int initialized: 1;
-	unsigned int single_cmd: 1;
+	unsigned int initialized :1;
+	unsigned int single_cmd :1;
+	unsigned int polling_mode :1;
 };
 
 /* driver types */
@@ -516,23 +520,36 @@
 static unsigned int azx_rirb_get_response(struct hda_codec *codec)
 {
 	struct azx *chip = codec->bus->private_data;
-	int timeout = 50;
+	unsigned long timeout;
 
-	while (chip->rirb.cmds) {
-		if (! --timeout) {
-			snd_printk(KERN_ERR
-				   "hda_intel: azx_get_response timeout, "
-				   "switching to single_cmd mode...\n");
-			chip->rirb.rp = azx_readb(chip, RIRBWP);
-			chip->rirb.cmds = 0;
-			/* switch to single_cmd mode */
-			chip->single_cmd = 1;
-			azx_free_cmd_io(chip);
-			return -1;
+ again:
+	timeout = jiffies + msecs_to_jiffies(1000);
+	do {
+		if (chip->polling_mode) {
+			spin_lock_irq(&chip->reg_lock);
+			azx_update_rirb(chip);
+			spin_unlock_irq(&chip->reg_lock);
 		}
-		msleep(1);
+		if (! chip->rirb.cmds)
+			return chip->rirb.res; /* the last value */
+		schedule_timeout_interruptible(1);
+	} while (time_after_eq(timeout, jiffies));
+
+	if (!chip->polling_mode) {
+		snd_printk(KERN_WARNING "hda_intel: azx_get_response timeout, "
+			   "switching to polling mode...\n");
+		chip->polling_mode = 1;
+		goto again;
 	}
-	return chip->rirb.res; /* the last value */
+
+	snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, "
+		   "switching to single_cmd mode...\n");
+	chip->rirb.rp = azx_readb(chip, RIRBWP);
+	chip->rirb.cmds = 0;
+	/* switch to single_cmd mode */
+	chip->single_cmd = 1;
+	azx_free_cmd_io(chip);
+	return -1;
 }
 
 /*
@@ -642,14 +659,14 @@
 	azx_writeb(chip, GCTL, azx_readb(chip, GCTL) | ICH6_GCTL_RESET);
 
 	count = 50;
-	while (! azx_readb(chip, GCTL) && --count)
+	while (!azx_readb(chip, GCTL) && --count)
 		msleep(1);
 
-	/* Brent Chartrand said to wait >= 540us for codecs to intialize */
+	/* Brent Chartrand said to wait >= 540us for codecs to initialize */
 	msleep(1);
 
 	/* check to see if controller is ready */
-	if (! azx_readb(chip, GCTL)) {
+	if (!azx_readb(chip, GCTL)) {
 		snd_printd("azx_reset: controller not ready!\n");
 		return -EBUSY;
 	}
@@ -658,7 +675,7 @@
 	azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UREN);
 
 	/* detect codecs */
-	if (! chip->codec_mask) {
+	if (!chip->codec_mask) {
 		chip->codec_mask = azx_readw(chip, STATESTS);
 		snd_printdd("codec_mask = 0x%x\n", chip->codec_mask);
 	}
@@ -766,7 +783,7 @@
 	azx_int_enable(chip);
 
 	/* initialize the codec command I/O */
-	if (! chip->single_cmd)
+	if (!chip->single_cmd)
 		azx_init_cmd_io(chip);
 
 	/* program the position buffer */
@@ -794,7 +811,7 @@
 /*
  * interrupt handler
  */
-static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs)
+static irqreturn_t azx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	struct azx *chip = dev_id;
 	struct azx_dev *azx_dev;
@@ -999,8 +1016,9 @@
 	.info =			(SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
 				 SNDRV_PCM_INFO_BLOCK_TRANSFER |
 				 SNDRV_PCM_INFO_MMAP_VALID |
-				 SNDRV_PCM_INFO_PAUSE /*|*/
-				 /*SNDRV_PCM_INFO_RESUME*/),
+				 /* No full-resume yet implemented */
+				 /* SNDRV_PCM_INFO_RESUME |*/
+				 SNDRV_PCM_INFO_PAUSE),
 	.formats =		SNDRV_PCM_FMTBIT_S16_LE,
 	.rates =		SNDRV_PCM_RATE_48000,
 	.rate_min =		48000,
@@ -1178,7 +1196,7 @@
 	if (chip->position_fix == POS_FIX_POSBUF ||
 	    chip->position_fix == POS_FIX_AUTO) {
 		/* use the position buffer */
-		pos = *azx_dev->posbuf;
+		pos = le32_to_cpu(*azx_dev->posbuf);
 		if (chip->position_fix == POS_FIX_AUTO &&
 		    azx_dev->period_intr == 1 && ! pos) {
 			printk(KERN_WARNING
@@ -1222,7 +1240,12 @@
 	struct snd_pcm *pcm;
 	struct azx_pcm *apcm;
 
-	snd_assert(cpcm->stream[0].substreams || cpcm->stream[1].substreams, return -EINVAL);
+	/* if no substreams are defined for both playback and capture,
+	 * it's just a placeholder.  ignore it.
+	 */
+	if (!cpcm->stream[0].substreams && !cpcm->stream[1].substreams)
+		return 0;
+
 	snd_assert(cpcm->name, return -EINVAL);
 
 	err = snd_pcm_new(chip->card, cpcm->name, pcm_dev,
@@ -1248,7 +1271,8 @@
 					      snd_dma_pci_data(chip->pci),
 					      1024 * 64, 1024 * 128);
 	chip->pcm[pcm_dev] = pcm;
-	chip->pcm_devs = pcm_dev + 1;
+	if (chip->pcm_devs < pcm_dev + 1)
+		chip->pcm_devs = pcm_dev + 1;
 
 	return 0;
 }
@@ -1326,7 +1350,7 @@
 		struct azx_dev *azx_dev = &chip->azx_dev[i];
 		azx_dev->bdl = (u32 *)(chip->bdl.area + off);
 		azx_dev->bdl_addr = chip->bdl.addr + off;
-		azx_dev->posbuf = (volatile u32 *)(chip->posbuf.area + i * 8);
+		azx_dev->posbuf = (u32 __iomem *)(chip->posbuf.area + i * 8);
 		/* offset: SDI0=0x80, SDI1=0xa0, ... SDO3=0x160 */
 		azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
 		/* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */
@@ -1355,6 +1379,10 @@
 		snd_pcm_suspend_all(chip->pcm[i]);
 	snd_hda_suspend(chip->bus, state);
 	azx_free_cmd_io(chip);
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
+	if (!disable_msi)
+		pci_disable_msi(chip->pci);
 	pci_disable_device(pci);
 	pci_save_state(pci);
 	return 0;
@@ -1367,6 +1395,12 @@
 
 	pci_restore_state(pci);
 	pci_enable_device(pci);
+	if (!disable_msi)
+		pci_enable_msi(pci);
+	/* FIXME: need proper error handling */
+	request_irq(pci->irq, azx_interrupt, IRQF_DISABLED|IRQF_SHARED,
+		    "HDA Intel", chip);
+	chip->irq = pci->irq;
 	pci_set_master(pci);
 	azx_init_chip(chip);
 	snd_hda_resume(chip->bus);
@@ -1398,12 +1432,14 @@
 		azx_writel(chip, DPLBASE, 0);
 		azx_writel(chip, DPUBASE, 0);
 
-		/* wait a little for interrupts to finish */
-		msleep(1);
+		synchronize_irq(chip->irq);
 	}
 
-	if (chip->irq >= 0)
+	if (chip->irq >= 0) {
 		free_irq(chip->irq, (void*)chip);
+		if (!disable_msi)
+			pci_disable_msi(chip->pci);
+	}
 	if (chip->remap_addr)
 		iounmap(chip->remap_addr);
 
@@ -1434,19 +1470,19 @@
 				struct azx **rchip)
 {
 	struct azx *chip;
-	int err = 0;
+	int err;
 	static struct snd_device_ops ops = {
 		.dev_free = azx_dev_free,
 	};
 
 	*rchip = NULL;
 	
-	if ((err = pci_enable_device(pci)) < 0)
+	err = pci_enable_device(pci);
+	if (err < 0)
 		return err;
 
 	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-	
-	if (NULL == chip) {
+	if (!chip) {
 		snd_printk(KERN_ERR SFX "cannot allocate chip\n");
 		pci_disable_device(pci);
 		return -ENOMEM;
@@ -1472,13 +1508,14 @@
 	}
 #endif
 
-	if ((err = pci_request_regions(pci, "ICH HD audio")) < 0) {
+	err = pci_request_regions(pci, "ICH HD audio");
+	if (err < 0) {
 		kfree(chip);
 		pci_disable_device(pci);
 		return err;
 	}
 
-	chip->addr = pci_resource_start(pci,0);
+	chip->addr = pci_resource_start(pci, 0);
 	chip->remap_addr = ioremap_nocache(chip->addr, pci_resource_len(pci,0));
 	if (chip->remap_addr == NULL) {
 		snd_printk(KERN_ERR SFX "ioremap error\n");
@@ -1486,6 +1523,9 @@
 		goto errout;
 	}
 
+	if (!disable_msi)
+		pci_enable_msi(pci);
+
 	if (request_irq(pci->irq, azx_interrupt, IRQF_DISABLED|IRQF_SHARED,
 			"HDA Intel", (void*)chip)) {
 		snd_printk(KERN_ERR SFX "unable to grab IRQ %d\n", pci->irq);
@@ -1519,7 +1559,7 @@
 	}
 	chip->num_streams = chip->playback_streams + chip->capture_streams;
 	chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev), GFP_KERNEL);
-	if (! chip->azx_dev) {
+	if (!chip->azx_dev) {
 		snd_printk(KERN_ERR "cannot malloc azx_dev\n");
 		goto errout;
 	}
@@ -1550,7 +1590,7 @@
 	chip->initialized = 1;
 
 	/* codec detection */
-	if (! chip->codec_mask) {
+	if (!chip->codec_mask) {
 		snd_printk(KERN_ERR SFX "no codecs found!\n");
 		err = -ENODEV;
 		goto errout;
@@ -1577,16 +1617,16 @@
 {
 	struct snd_card *card;
 	struct azx *chip;
-	int err = 0;
+	int err;
 
 	card = snd_card_new(index, id, THIS_MODULE, 0);
-	if (NULL == card) {
+	if (!card) {
 		snd_printk(KERN_ERR SFX "Error creating card!\n");
 		return -ENOMEM;
 	}
 
-	if ((err = azx_create(card, pci, pci_id->driver_data,
-			      &chip)) < 0) {
+	err = azx_create(card, pci, pci_id->driver_data, &chip);
+	if (err < 0) {
 		snd_card_free(card);
 		return err;
 	}
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 14e8aa2..f9416c3 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -30,9 +30,13 @@
 /* mono volume with index (index=0,1,...) (channel=1,2) */
 #define HDA_CODEC_VOLUME_MONO_IDX(xname, xcidx, nid, channel, xindex, direction) \
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xcidx,  \
+	  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \
+	  	    SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
+	  	    SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK, \
 	  .info = snd_hda_mixer_amp_volume_info, \
 	  .get = snd_hda_mixer_amp_volume_get, \
 	  .put = snd_hda_mixer_amp_volume_put, \
+	  .tlv = { .c = snd_hda_mixer_amp_tlv },		\
 	  .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, xindex, direction) }
 /* stereo volume with index */
 #define HDA_CODEC_VOLUME_IDX(xname, xcidx, nid, xindex, direction) \
@@ -63,6 +67,7 @@
 int snd_hda_mixer_amp_volume_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo);
 int snd_hda_mixer_amp_volume_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
 int snd_hda_mixer_amp_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
+int snd_hda_mixer_amp_tlv(struct snd_kcontrol *kcontrol, int op_flag, unsigned int size, unsigned int __user *tlv);
 int snd_hda_mixer_amp_switch_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo);
 int snd_hda_mixer_amp_switch_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
 int snd_hda_mixer_amp_switch_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol);
@@ -224,7 +229,8 @@
 	hda_nid_t line_out_pins[5]; /* sorted in the order of Front/Surr/CLFE/Side */
 	int speaker_outs;
 	hda_nid_t speaker_pins[5];
-	hda_nid_t hp_pin;
+	int hp_outs;
+	hda_nid_t hp_pins[5];
 	hda_nid_t input_pins[AUTO_PIN_LAST];
 	hda_nid_t dig_out_pin;
 	hda_nid_t dig_in_pin;
diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c
index c2f0fe8..d737f17 100644
--- a/sound/pci/hda/hda_proc.c
+++ b/sound/pci/hda/hda_proc.c
@@ -52,10 +52,9 @@
 			   struct hda_codec *codec, hda_nid_t nid, int dir)
 {
 	unsigned int caps;
-	if (dir == HDA_OUTPUT)
-		caps = snd_hda_param_read(codec, nid, AC_PAR_AMP_OUT_CAP);
-	else
-		caps = snd_hda_param_read(codec, nid, AC_PAR_AMP_IN_CAP);
+	caps = snd_hda_param_read(codec, nid,
+				  dir == HDA_OUTPUT ?
+				    AC_PAR_AMP_OUT_CAP : AC_PAR_AMP_IN_CAP);
 	if (caps == -1 || caps == 0) {
 		snd_iprintf(buffer, "N/A\n");
 		return;
@@ -74,10 +73,7 @@
 	unsigned int val;
 	int i;
 
-	if (dir == HDA_OUTPUT)
-		dir = AC_AMP_GET_OUTPUT;
-	else
-		dir = AC_AMP_GET_INPUT;
+	dir = dir == HDA_OUTPUT ? AC_AMP_GET_OUTPUT : AC_AMP_GET_INPUT;
 	for (i = 0; i < indices; i++) {
 		snd_iprintf(buffer, " [");
 		if (stereo) {
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 6823f2b..511df07 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -488,9 +488,13 @@
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 		.name = "PCM Playback Volume",
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			  SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+			  SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK,
 		.info = ad1986a_pcm_amp_vol_info,
 		.get = ad1986a_pcm_amp_vol_get,
 		.put = ad1986a_pcm_amp_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT)
 	},
 	{
@@ -637,6 +641,7 @@
 		.info = snd_hda_mixer_amp_volume_info,
 		.get = snd_hda_mixer_amp_volume_get,
 		.put = ad1986a_laptop_master_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(0x1a, 3, 0, HDA_OUTPUT),
 	},
 	{
@@ -791,6 +796,8 @@
 	  .config = AD1986A_3STACK }, /* ASUS A8N-VM CSM */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x81b3,
 	  .config = AD1986A_3STACK }, /* ASUS P5RD2-VM / P5GPL-X SE */
+	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x81cb,
+	  .config = AD1986A_3STACK }, /* ASUS M2NPV-VM */
 	{ .modelname = "laptop",	.config = AD1986A_LAPTOP },
 	{ .pci_subvendor = 0x144d, .pci_subdevice = 0xc01e,
 	  .config = AD1986A_LAPTOP }, /* FSC V2060 */
@@ -803,6 +810,8 @@
 	  .config = AD1986A_LAPTOP_EAPD }, /* Samsung X60 Chane */
 	{ .pci_subvendor = 0x144d, .pci_subdevice = 0xc024,
 	  .config = AD1986A_LAPTOP_EAPD }, /* Samsung R65-T2300 Charis */
+	{ .pci_subvendor = 0x144d, .pci_subdevice = 0xc026,
+	  .config = AD1986A_LAPTOP_EAPD }, /* Samsung X10-T2300 Culesa */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1153,
 	  .config = AD1986A_LAPTOP_EAPD }, /* ASUS M9 */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1213,
@@ -1626,10 +1635,12 @@
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct ad198x_spec *spec = codec->spec;
-	if (spec->need_dac_fix)
+	int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
+				      spec->num_channel_mode,
+				      &spec->multiout.max_channels);
+	if (! err && spec->need_dac_fix)
 		spec->multiout.num_dacs = spec->multiout.max_channels / 2;
-	return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
-				   spec->num_channel_mode, &spec->multiout.max_channels);
+	return err;
 }
 
 /* 6-stack mode */
@@ -2460,7 +2471,7 @@
 	pin = spec->autocfg.speaker_pins[0];
 	if (pin) /* connect to front */
 		ad1988_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0);
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		ad1988_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
 }
@@ -2512,7 +2523,7 @@
 	    (err = ad1988_auto_create_extra_out(codec,
 						spec->autocfg.speaker_pins[0],
 						"Speaker")) < 0 ||
-	    (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pin,
+	    (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pins[0],
 						"Headphone")) < 0 ||
 	    (err = ad1988_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
 		return err;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 18d1052..d08d2e3 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -79,6 +79,7 @@
 	ALC262_BASIC,
 	ALC262_FUJITSU,
 	ALC262_HP_BPC,
+	ALC262_BENQ_ED8,
 	ALC262_AUTO,
 	ALC262_MODEL_LAST /* last tag */
 };
@@ -89,6 +90,7 @@
 	ALC660_3ST,
 	ALC861_3ST_DIG,
 	ALC861_6ST_DIG,
+	ALC861_UNIWILL_M31,
 	ALC861_AUTO,
 	ALC861_MODEL_LAST,
 };
@@ -97,6 +99,7 @@
 enum {
 	ALC882_3ST_DIG,
 	ALC882_6ST_DIG,
+	ALC882_ARIMA,
 	ALC882_AUTO,
 	ALC882_MODEL_LAST,
 };
@@ -108,6 +111,7 @@
 	ALC883_3ST_6ch,
 	ALC883_6ST_DIG,
 	ALC888_DEMO_BOARD,
+	ALC883_ACER,
 	ALC883_AUTO,
 	ALC883_MODEL_LAST,
 };
@@ -153,6 +157,7 @@
 	/* channel model */
 	const struct hda_channel_mode *channel_mode;
 	int num_channel_mode;
+	int need_dac_fix;
 
 	/* PCM information */
 	struct hda_pcm pcm_rec[3];	/* used in alc_build_pcms() */
@@ -190,6 +195,7 @@
 	hda_nid_t dig_in_nid;
 	unsigned int num_channel_mode;
 	const struct hda_channel_mode *channel_mode;
+	int need_dac_fix;
 	unsigned int num_mux_defs;
 	const struct hda_input_mux *input_mux;
 	void (*unsol_event)(struct hda_codec *, unsigned int);
@@ -262,9 +268,12 @@
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct alc_spec *spec = codec->spec;
-	return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
-				   spec->num_channel_mode,
-				   &spec->multiout.max_channels);
+	int err = snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
+				      spec->num_channel_mode,
+				      &spec->multiout.max_channels);
+	if (! err && spec->need_dac_fix)
+		spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+	return err;
 }
 
 /*
@@ -544,6 +553,7 @@
 	
 	spec->channel_mode = preset->channel_mode;
 	spec->num_channel_mode = preset->num_channel_mode;
+	spec->need_dac_fix = preset->need_dac_fix;
 
 	spec->multiout.max_channels = spec->channel_mode[0].channels;
 
@@ -1348,6 +1358,10 @@
 };
 
 static struct hda_verb alc880_pin_tcl_S700_init_verbs[] = {
+	/* change to EAPD mode */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+	{0x20, AC_VERB_SET_PROC_COEF,  0x3060},
+
 	/* Headphone output */
 	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
 	/* Front output*/
@@ -1782,25 +1796,9 @@
 		}
 	}
 
-	/* If the use of more than one ADC is requested for the current
-	 * model, configure a second analog capture-only PCM.
-	 */
-	if (spec->num_adc_nids > 1) {
-		codec->num_pcms++;
-		info++;
-		info->name = spec->stream_name_analog;
-		/* No playback stream for second PCM */
-		info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback;
-		info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0;
-		if (spec->stream_analog_capture) {
-			snd_assert(spec->adc_nids, return -EINVAL);
-			info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
-			info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1];
-		}
-	}
-
+	/* SPDIF for stream index #1 */
 	if (spec->multiout.dig_out_nid || spec->dig_in_nid) {
-		codec->num_pcms++;
+		codec->num_pcms = 2;
 		info++;
 		info->name = spec->stream_name_digital;
 		if (spec->multiout.dig_out_nid &&
@@ -1815,6 +1813,24 @@
 		}
 	}
 
+	/* If the use of more than one ADC is requested for the current
+	 * model, configure a second analog capture-only PCM.
+	 */
+	/* Additional Analaog capture for index #2 */
+	if (spec->num_adc_nids > 1 && spec->stream_analog_capture &&
+	    spec->adc_nids) {
+		codec->num_pcms = 3;
+		info++;
+		info->name = spec->stream_name_analog;
+		/* No playback stream for second PCM */
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback;
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0;
+		if (spec->stream_analog_capture) {
+			info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
+			info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1];
+		}
+	}
+
 	return 0;
 }
 
@@ -2130,7 +2146,10 @@
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe20f, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe210, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe211, .config = ALC880_3ST },
+	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe212, .config = ALC880_3ST },
+	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe213, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe214, .config = ALC880_3ST },
+	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe234, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe302, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe303, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe304, .config = ALC880_3ST },
@@ -2145,6 +2164,7 @@
 	{ .pci_subvendor = 0x107b, .pci_subdevice = 0x4040, .config = ALC880_3ST },
 	{ .pci_subvendor = 0x107b, .pci_subdevice = 0x4041, .config = ALC880_3ST },
 	/* TCL S700 */
+	{ .modelname = "tcl", .config = ALC880_TCL_S700 },
 	{ .pci_subvendor = 0x19db, .pci_subdevice = 0x4188, .config = ALC880_TCL_S700 },
 
 	/* Back 3 jack, front 2 jack (Internal add Aux-In) */
@@ -2156,8 +2176,13 @@
 	{ .modelname = "3stack-digout", .config = ALC880_3ST_DIG },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe308, .config = ALC880_3ST_DIG },
 	{ .pci_subvendor = 0x1025, .pci_subdevice = 0x0070, .config = ALC880_3ST_DIG },
-	/* Clevo m520G NB */
-	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x0520, .config = ALC880_CLEVO },
+
+	/* Clevo laptops */
+	{ .modelname = "clevo", .config = ALC880_CLEVO },
+	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x0520,
+	  .config = ALC880_CLEVO }, /* Clevo m520G NB */
+	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x0660,
+	  .config = ALC880_CLEVO }, /* Clevo m665n */
 
 	/* Back 3 jack plus 1 SPDIF out jack, front 2 jack (Internal add Aux-In)*/
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xe305, .config = ALC880_3ST_DIG },
@@ -2222,12 +2247,16 @@
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1113, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1173, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1993, .config = ALC880_ASUS },
+	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x10c2, .config = ALC880_ASUS_DIG }, /* Asus W6A */
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x10c3, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1133, .config = ALC880_ASUS },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1123, .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x1143, .config = ALC880_ASUS },
+	{ .modelname = "asus-w1v", .config = ALC880_ASUS_W1V },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x10b3, .config = ALC880_ASUS_W1V },
+	{ .modelname = "asus-dig", .config = ALC880_ASUS_DIG },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x8181, .config = ALC880_ASUS_DIG }, /* ASUS P4GPL-X */
+	{ .modelname = "asus-dig2", .config = ALC880_ASUS_DIG2 },
 	{ .pci_subvendor = 0x1558, .pci_subdevice = 0x5401, .config = ALC880_ASUS_DIG2 },
 
 	{ .modelname = "uniwill", .config = ALC880_UNIWILL_DIG },
@@ -2243,6 +2272,7 @@
 
 	{ .modelname = "lg-lw", .config = ALC880_LG_LW },
 	{ .pci_subvendor = 0x1854, .pci_subdevice = 0x0018, .config = ALC880_LG_LW },
+	{ .pci_subvendor = 0x1854, .pci_subdevice = 0x0077, .config = ALC880_LG_LW },
 
 #ifdef CONFIG_SND_DEBUG
 	{ .modelname = "test", .config = ALC880_TEST },
@@ -2263,6 +2293,7 @@
 		.dac_nids = alc880_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
 		.channel_mode = alc880_threestack_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_3ST_DIG] = {
@@ -2273,6 +2304,7 @@
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
 		.channel_mode = alc880_threestack_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_TCL_S700] = {
@@ -2365,6 +2397,7 @@
 		.dac_nids = alc880_asus_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_ASUS_DIG] = {
@@ -2376,6 +2409,7 @@
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_ASUS_DIG2] = {
@@ -2387,6 +2421,7 @@
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_ASUS_W1V] = {
@@ -2398,6 +2433,7 @@
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_UNIWILL_DIG] = {
@@ -2408,6 +2444,7 @@
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
 		.channel_mode = alc880_asus_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_CLEVO] = {
@@ -2419,6 +2456,7 @@
 		.hp_nid = 0x03,
 		.num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
 		.channel_mode = alc880_threestack_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_capture_source,
 	},
 	[ALC880_LG] = {
@@ -2430,6 +2468,7 @@
 		.dig_out_nid = ALC880_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc880_lg_ch_modes),
 		.channel_mode = alc880_lg_ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc880_lg_capture_source,
 		.unsol_event = alc880_lg_unsol_event,
 		.init_hook = alc880_lg_automute,
@@ -2714,7 +2753,7 @@
 	pin = spec->autocfg.speaker_pins[0];
 	if (pin) /* connect to front */
 		alc880_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0);
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		alc880_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
 }
@@ -2755,7 +2794,7 @@
 	    (err = alc880_auto_create_extra_out(spec,
 						spec->autocfg.speaker_pins[0],
 						"Speaker")) < 0 ||
-	    (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pin,
+	    (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pins[0],
 						"Headphone")) < 0 ||
 	    (err = alc880_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
 		return err;
@@ -3697,7 +3736,7 @@
 			return err;
 	}
 
-	nid = cfg->hp_pin;
+	nid = cfg->hp_pins[0];
 	if (nid) {
 		err = alc260_add_playback_controls(spec, nid, "Headphone");
 		if (err < 0)
@@ -3767,7 +3806,7 @@
 	if (nid)
 		alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
 
-	nid = spec->autocfg.hp_pin;
+	nid = spec->autocfg.hp_pins[0];
 	if (nid)
 		alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
 }	
@@ -3900,7 +3939,8 @@
 	{ .pci_subvendor = 0x152d, .pci_subdevice = 0x0729,
 	  .config = ALC260_BASIC }, /* CTL Travel Master U553W */
 	{ .modelname = "hp", .config = ALC260_HP },
-	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
+	{ .modelname = "hp-3013", .config = ALC260_HP_3013 },
+	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP_3013 },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3012, .config = ALC260_HP_3013 },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3013, .config = ALC260_HP_3013 },
@@ -4266,6 +4306,13 @@
 	{ }
 };
 
+static struct hda_verb alc882_eapd_verbs[] = {
+	/* change to EAPD mode */
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+	{0x20, AC_VERB_SET_PROC_COEF, 0x3060},
+	{ } 
+};
+
 /*
  * generic initialization of ADC, input mixers and output mixers
  */
@@ -4397,6 +4444,9 @@
 	  .config = ALC882_6ST_DIG }, /* Foxconn */
 	{ .pci_subvendor = 0x1019, .pci_subdevice = 0x6668,
 	  .config = ALC882_6ST_DIG }, /* ECS to Intel*/
+	{ .modelname = "arima", .config = ALC882_ARIMA },
+	{ .pci_subvendor = 0x161f, .pci_subdevice = 0x2054,
+	  .config = ALC882_ARIMA }, /* Arima W820Di1 */
 	{ .modelname = "auto", .config = ALC882_AUTO },
 	{}
 };
@@ -4411,6 +4461,7 @@
 		.dig_in_nid = ALC882_DIGIN_NID,
 		.num_channel_mode = ARRAY_SIZE(alc882_ch_modes),
 		.channel_mode = alc882_ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc882_capture_source,
 	},
 	[ALC882_6ST_DIG] = {
@@ -4424,6 +4475,15 @@
 		.channel_mode = alc882_sixstack_modes,
 		.input_mux = &alc882_capture_source,
 	},
+	[ALC882_ARIMA] = {
+		.mixers = { alc882_base_mixer, alc882_chmode_mixer },
+		.init_verbs = { alc882_init_verbs, alc882_eapd_verbs },
+		.num_dacs = ARRAY_SIZE(alc882_dac_nids),
+		.dac_nids = alc882_dac_nids,
+		.num_channel_mode = ARRAY_SIZE(alc882_sixstack_modes),
+		.channel_mode = alc882_sixstack_modes,
+		.input_mux = &alc882_capture_source,
+	},
 };
 
 
@@ -4466,7 +4526,7 @@
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t pin;
 
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		alc882_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); /* use dac 0 */
 }
@@ -4999,16 +5059,23 @@
  */
 static struct hda_board_config alc883_cfg_tbl[] = {
 	{ .modelname = "3stack-dig", .config = ALC883_3ST_2ch_DIG },
+	{ .modelname = "3stack-6ch-dig", .config = ALC883_3ST_6ch_DIG },
+	{ .pci_subvendor = 0x1019, .pci_subdevice = 0x6668,
+	  .config = ALC883_3ST_6ch_DIG }, /* ECS to Intel*/
+	{ .modelname = "3stack-6ch", .config = ALC883_3ST_6ch },
+	{ .pci_subvendor = 0x108e, .pci_subdevice = 0x534d,
+	  .config = ALC883_3ST_6ch },
+        { .pci_subvendor = 0x8086, .pci_subdevice = 0xd601,
+          .config = ALC883_3ST_6ch }, /* D102GGC */
 	{ .modelname = "6stack-dig", .config = ALC883_6ST_DIG },
-	{ .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD },
 	{ .pci_subvendor = 0x1462, .pci_subdevice = 0x6668,
 	  .config = ALC883_6ST_DIG }, /* MSI  */
 	{ .pci_subvendor = 0x105b, .pci_subdevice = 0x6668,
 	  .config = ALC883_6ST_DIG }, /* Foxconn */
-	{ .pci_subvendor = 0x1019, .pci_subdevice = 0x6668,
-	  .config = ALC883_3ST_6ch_DIG }, /* ECS to Intel*/
-	{ .pci_subvendor = 0x108e, .pci_subdevice = 0x534d,
-	  .config = ALC883_3ST_6ch },
+	{ .modelname = "6stack-dig-demo", .config = ALC888_DEMO_BOARD },
+	{ .modelname = "acer", .config = ALC883_ACER },
+	{ .pci_subvendor = 0x1025, .pci_subdevice = 0/*0x0102*/,
+	  .config = ALC883_ACER },
 	{ .modelname = "auto", .config = ALC883_AUTO },
 	{}
 };
@@ -5038,6 +5105,7 @@
 		.dig_in_nid = ALC883_DIGIN_NID,
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
 		.channel_mode = alc883_3ST_6ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 	},	
 	[ALC883_3ST_6ch] = {
@@ -5049,6 +5117,7 @@
 		.adc_nids = alc883_adc_nids,
 		.num_channel_mode = ARRAY_SIZE(alc883_3ST_6ch_modes),
 		.channel_mode = alc883_3ST_6ch_modes,
+		.need_dac_fix = 1,
 		.input_mux = &alc883_capture_source,
 	},	
 	[ALC883_6ST_DIG] = {
@@ -5077,6 +5146,23 @@
 		.channel_mode = alc883_sixstack_modes,
 		.input_mux = &alc883_capture_source,
 	},
+	[ALC883_ACER] = {
+		.mixers = { alc883_base_mixer,
+			    alc883_chmode_mixer },
+		/* On TravelMate laptops, GPIO 0 enables the internal speaker
+		 * and the headphone jack.  Turn this on and rely on the
+		 * standard mute methods whenever the user wants to turn
+		 * these outputs off.
+		 */
+		.init_verbs = { alc883_init_verbs, alc880_gpio1_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc883_dac_nids),
+		.dac_nids = alc883_dac_nids,
+		.num_adc_nids = ARRAY_SIZE(alc883_adc_nids),
+		.adc_nids = alc883_adc_nids,
+		.num_channel_mode = ARRAY_SIZE(alc883_3ST_2ch_modes),
+		.channel_mode = alc883_3ST_2ch_modes,
+		.input_mux = &alc883_capture_source,
+	},
 };
 
 
@@ -5121,7 +5207,7 @@
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t pin;
 
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		/* use dac 0 */
 		alc883_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
@@ -5217,8 +5303,10 @@
 	spec->stream_digital_playback = &alc883_pcm_digital_playback;
 	spec->stream_digital_capture = &alc883_pcm_digital_capture;
 
-	spec->adc_nids = alc883_adc_nids;
-	spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
+	if (! spec->adc_nids && spec->input_mux) {
+		spec->adc_nids = alc883_adc_nids;
+		spec->num_adc_nids = ARRAY_SIZE(alc883_adc_nids);
+	}
 
 	codec->patch_ops = alc_patch_ops;
 	if (board_config == ALC883_AUTO)
@@ -5481,6 +5569,7 @@
 		.info = snd_hda_mixer_amp_volume_info,
 		.get = snd_hda_mixer_amp_volume_get,
 		.put = alc262_fujitsu_master_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(0x0c, 3, 0, HDA_OUTPUT),
 	},
 	{
@@ -5499,6 +5588,13 @@
 	{ } /* end */
 };
 
+/* additional init verbs for Benq laptops */
+static struct hda_verb alc262_EAPD_verbs[] = {
+	{0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+	{0x20, AC_VERB_SET_PROC_COEF,  0x3070},
+	{}
+};
+
 /* add playback controls from the parsed DAC table */
 static int alc262_auto_create_multi_out_ctls(struct alc_spec *spec, const struct auto_pin_cfg *cfg)
 {
@@ -5534,7 +5630,7 @@
 				return err;
 		}
 	}
-	nid = cfg->hp_pin;
+	nid = cfg->hp_pins[0];
 	if (nid) {
 		/* spec->multiout.hp_nid = 2; */
 		if (nid == 0x16) {
@@ -5769,6 +5865,7 @@
 	{ .modelname = "fujitsu", .config = ALC262_FUJITSU },
 	{ .pci_subvendor = 0x10cf, .pci_subdevice = 0x1397,
 	  .config = ALC262_FUJITSU },
+	{ .modelname = "hp-bpc", .config = ALC262_HP_BPC },
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x208c,
 	  .config = ALC262_HP_BPC }, /* xw4400 */
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x3014,
@@ -5777,6 +5874,9 @@
 	  .config = ALC262_HP_BPC }, /* xw8400 */
 	{ .pci_subvendor = 0x103c, .pci_subdevice = 0x12fe,
 	  .config = ALC262_HP_BPC }, /* xw9400 */
+	{ .modelname = "benq", .config = ALC262_BENQ_ED8 },
+	{ .pci_subvendor = 0x17ff, .pci_subdevice = 0x0560,
+	  .config = ALC262_BENQ_ED8 },
 	{ .modelname = "auto", .config = ALC262_AUTO },
 	{}
 };
@@ -5814,6 +5914,16 @@
 		.channel_mode = alc262_modes,
 		.input_mux = &alc262_HP_capture_source,
 	},	
+	[ALC262_BENQ_ED8] = {
+		.mixers = { alc262_base_mixer },
+		.init_verbs = { alc262_init_verbs, alc262_EAPD_verbs },
+		.num_dacs = ARRAY_SIZE(alc262_dac_nids),
+		.dac_nids = alc262_dac_nids,
+		.hp_nid = 0x03,
+		.num_channel_mode = ARRAY_SIZE(alc262_modes),
+		.channel_mode = alc262_modes,
+		.input_mux = &alc262_capture_source,
+	},		
 };
 
 static int patch_alc262(struct hda_codec *codec)
@@ -5942,6 +6052,23 @@
 	{ 2, alc861_threestack_ch2_init },
 	{ 6, alc861_threestack_ch6_init },
 };
+/* Set mic1 as input and unmute the mixer */
+static struct hda_verb alc861_uniwill_m31_ch2_init[] = {
+	{ 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+	{ 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8)) }, /*mic*/
+	{ } /* end */
+};
+/* Set mic1 as output and mute mixer */
+static struct hda_verb alc861_uniwill_m31_ch4_init[] = {
+	{ 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+	{ 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8)) }, /*mic*/
+	{ } /* end */
+};
+
+static struct hda_channel_mode alc861_uniwill_m31_modes[2] = {
+	{ 2, alc861_uniwill_m31_ch2_init },
+	{ 4, alc861_uniwill_m31_ch4_init },
+};
 
 /* patch-ALC861 */
 
@@ -6020,6 +6147,47 @@
 	},
 	{ } /* end */
 };			
+static struct snd_kcontrol_new alc861_uniwill_m31_mixer[] = {
+        /* output mixer control */
+	HDA_CODEC_MUTE("Front Playback Switch", 0x03, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Surround Playback Switch", 0x06, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x05, 1, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x05, 2, 0x0, HDA_OUTPUT),
+	/*HDA_CODEC_MUTE("Side Playback Switch", 0x04, 0x0, HDA_OUTPUT), */
+
+	/* Input mixer control */
+	/* HDA_CODEC_VOLUME("Input Playback Volume", 0x15, 0x0, HDA_OUTPUT),
+	   HDA_CODEC_MUTE("Input Playback Switch", 0x15, 0x0, HDA_OUTPUT), */
+	HDA_CODEC_VOLUME("CD Playback Volume", 0x15, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("CD Playback Switch", 0x15, 0x0, HDA_INPUT),
+	HDA_CODEC_VOLUME("Line Playback Volume", 0x15, 0x02, HDA_INPUT),
+	HDA_CODEC_MUTE("Line Playback Switch", 0x15, 0x02, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x15, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x15, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Mic Playback Switch", 0x10, 0x01, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Headphone Playback Switch", 0x1a, 0x03, HDA_INPUT),
+ 
+	/* Capture mixer control */
+	HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Capture Source",
+		.count = 1,
+		.info = alc_mux_enum_info,
+		.get = alc_mux_enum_get,
+		.put = alc_mux_enum_put,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Channel Mode",
+		.info = alc_ch_mode_info,
+		.get = alc_ch_mode_get,
+		.put = alc_ch_mode_put,
+                .private_value = ARRAY_SIZE(alc861_uniwill_m31_modes),
+	},
+	{ } /* end */
+};			
 	
 /*
  * generic initialization of ADC, input mixers and output mixers
@@ -6148,6 +6316,67 @@
         {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
 	{ }
 };
+
+static struct hda_verb alc861_uniwill_m31_init_verbs[] = {
+	/*
+	 * Unmute ADC0 and set the default input to mic-in
+	 */
+	/* port-A for surround (rear panel) */
+	{ 0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+	/* port-B for mic-in (rear panel) with vref */
+	{ 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+	/* port-C for line-in (rear panel) */
+	{ 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+	/* port-D for Front */
+	{ 0x0b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+	{ 0x0b, AC_VERB_SET_CONNECT_SEL, 0x00 },
+	/* port-E for HP out (front panel) */
+	{ 0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, // this has to be set to VREF80
+	/* route front PCM to HP */
+	{ 0x0f, AC_VERB_SET_CONNECT_SEL, 0x01 },
+	/* port-F for mic-in (front panel) with vref */
+	{ 0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+	/* port-G for CLFE (rear panel) */
+	{ 0x1f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+	/* port-H for side (rear panel) */
+	{ 0x20, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+	/* CD-in */
+	{ 0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+	/* route front mic to ADC1*/
+	{0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+	{0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	/* Unmute DAC0~3 & spdif out*/
+	{0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	
+	/* Unmute Mixer 14 (mic) 1c (Line in)*/
+	{0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	
+	/* Unmute Stereo Mixer 15 */
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c          }, //Output 0~12 step
+
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+	{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+	{0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)}, // hp used DAC 3 (Front)
+        {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+	{ }
+};
+
 /*
  * generic initialization of ADC, input mixers and output mixers
  */
@@ -6401,7 +6630,7 @@
 	struct alc_spec *spec = codec->spec;
 	hda_nid_t pin;
 
-	pin = spec->autocfg.hp_pin;
+	pin = spec->autocfg.hp_pins[0];
 	if (pin) /* connect to front */
 		alc861_auto_set_output_and_unmute(codec, pin, PIN_HP, spec->multiout.dac_nids[0]);
 }
@@ -6436,7 +6665,7 @@
 
 	if ((err = alc861_auto_fill_dac_nids(spec, &spec->autocfg)) < 0 ||
 	    (err = alc861_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 ||
-	    (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pin)) < 0 ||
+	    (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pins[0])) < 0 ||
 	    (err = alc861_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
 		return err;
 
@@ -6477,10 +6706,14 @@
 	{ .modelname = "3stack", .config = ALC861_3ST },
 	{ .pci_subvendor = 0x8086, .pci_subdevice = 0xd600,
 	  .config = ALC861_3ST },
+	{ .modelname = "3stack-660", .config = ALC660_3ST },
 	{ .pci_subvendor = 0x1043, .pci_subdevice = 0x81e7,
 	  .config = ALC660_3ST },
 	{ .modelname = "3stack-dig", .config = ALC861_3ST_DIG },
 	{ .modelname = "6stack-dig", .config = ALC861_6ST_DIG },
+	{ .modelname = "uniwill-m31", .config = ALC861_UNIWILL_M31},
+	{ .pci_subvendor = 0x1584, .pci_subdevice = 0x9072,
+	  .config = ALC861_UNIWILL_M31 },
 	{ .modelname = "auto", .config = ALC861_AUTO },
 	{}
 };
@@ -6493,6 +6726,7 @@
 		.dac_nids = alc861_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
 		.channel_mode = alc861_threestack_modes,
+		.need_dac_fix = 1,
 		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
@@ -6505,6 +6739,7 @@
 		.dig_out_nid = ALC861_DIGOUT_NID,
 		.num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
 		.channel_mode = alc861_threestack_modes,
+		.need_dac_fix = 1,
 		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
@@ -6528,10 +6763,25 @@
 		.dac_nids = alc660_dac_nids,
 		.num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
 		.channel_mode = alc861_threestack_modes,
+		.need_dac_fix = 1,
 		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
 		.adc_nids = alc861_adc_nids,
 		.input_mux = &alc861_capture_source,
 	},
+	[ALC861_UNIWILL_M31] = {
+		.mixers = { alc861_uniwill_m31_mixer },
+		.init_verbs = { alc861_uniwill_m31_init_verbs },
+		.num_dacs = ARRAY_SIZE(alc861_dac_nids),
+		.dac_nids = alc861_dac_nids,
+		.dig_out_nid = ALC861_DIGOUT_NID,
+		.num_channel_mode = ARRAY_SIZE(alc861_uniwill_m31_modes),
+		.channel_mode = alc861_uniwill_m31_modes,
+		.need_dac_fix = 1,
+		.num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
+		.adc_nids = alc861_adc_nids,
+		.input_mux = &alc861_capture_source,
+	},
+
 };	
 
 
diff --git a/sound/pci/hda/patch_si3054.c b/sound/pci/hda/patch_si3054.c
index 250242c..76ec3d7 100644
--- a/sound/pci/hda/patch_si3054.c
+++ b/sound/pci/hda/patch_si3054.c
@@ -298,6 +298,7 @@
  	{ .id = 0x163c3055, .name = "Si3054", .patch = patch_si3054 },
  	{ .id = 0x163c3155, .name = "Si3054", .patch = patch_si3054 },
  	{ .id = 0x11c13026, .name = "Si3054", .patch = patch_si3054 },
+ 	{ .id = 0x10573057, .name = "Si3054", .patch = patch_si3054 },
 	{}
 };
 
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index ea99083..731b7b9 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -36,15 +36,15 @@
 
 #define NUM_CONTROL_ALLOC	32
 #define STAC_HP_EVENT		0x37
-#define STAC_UNSOL_ENABLE 	(AC_USRSP_EN | STAC_HP_EVENT)
 
 #define STAC_REF		0
 #define STAC_D945GTP3		1
 #define STAC_D945GTP5		2
 #define STAC_MACMINI		3
-#define STAC_D965_2112		4
-#define STAC_D965_284B		5
-#define STAC_922X_MODELS	6	/* number of 922x models */
+#define STAC_922X_MODELS	4	/* number of 922x models */
+#define STAC_D965_3ST		4
+#define STAC_D965_5ST		5
+#define STAC_927X_MODELS	6	/* number of 922x models */
 
 struct sigmatel_spec {
 	struct snd_kcontrol_new *mixers[4];
@@ -73,6 +73,7 @@
 	hda_nid_t *pin_nids;
 	unsigned int num_pins;
 	unsigned int *pin_configs;
+	unsigned int *bios_pin_configs;
 
 	/* codec specific stuff */
 	struct hda_verb *init;
@@ -110,24 +111,10 @@
         0x06, 0x07,
 };
 
-static hda_nid_t stac9227_adc_nids[2] = {
-        0x07, 0x08,
-};
-
-#if 0
-static hda_nid_t d965_2112_dac_nids[3] = {
-        0x02, 0x03, 0x05,
-};
-#endif
-
 static hda_nid_t stac922x_mux_nids[2] = {
         0x12, 0x13,
 };
 
-static hda_nid_t stac9227_mux_nids[2] = {
-        0x15, 0x16,
-};
-
 static hda_nid_t stac927x_adc_nids[3] = {
         0x07, 0x08, 0x09
 };
@@ -136,8 +123,17 @@
         0x15, 0x16, 0x17
 };
 
+static hda_nid_t stac9205_adc_nids[2] = {
+        0x12, 0x13
+};
+
+static hda_nid_t stac9205_mux_nids[2] = {
+        0x19, 0x1a
+};
+
 static hda_nid_t stac9200_pin_nids[8] = {
-	0x08, 0x09, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
+	0x08, 0x09, 0x0d, 0x0e, 
+	0x0f, 0x10, 0x11, 0x12,
 };
 
 static hda_nid_t stac922x_pin_nids[10] = {
@@ -151,6 +147,13 @@
 	0x14, 0x21, 0x22, 0x23,
 };
 
+static hda_nid_t stac9205_pin_nids[12] = {
+	0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+	0x0f, 0x14, 0x16, 0x17, 0x18,
+	0x21, 0x22,
+	
+};
+
 static int stac92xx_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
@@ -190,17 +193,9 @@
 	{}
 };
 
-static struct hda_verb stac9227_core_init[] = {
+static struct hda_verb d965_core_init[] = {
 	/* set master volume and direct control */	
-	{ 0x16, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
-	/* unmute node 0x1b */
-	{ 0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-	{}
-};
-
-static struct hda_verb d965_2112_core_init[] = {
-	/* set master volume and direct control */	
-	{ 0x16, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
+	{ 0x24, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
 	/* unmute node 0x1b */
 	{ 0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
 	/* select node 0x03 as DAC */	
@@ -214,6 +209,12 @@
 	{}
 };
 
+static struct hda_verb stac9205_core_init[] = {
+	/* set master volume and direct control */	
+	{ 0x24, AC_VERB_SET_VOLUME_KNOB_CONTROL, 0xff},
+	{}
+};
+
 static struct snd_kcontrol_new stac9200_mixer[] = {
 	HDA_CODEC_VOLUME("Master Playback Volume", 0xb, 0, HDA_OUTPUT),
 	HDA_CODEC_MUTE("Master Playback Switch", 0xb, 0, HDA_OUTPUT),
@@ -277,6 +278,21 @@
 	{ } /* end */
 };
 
+static snd_kcontrol_new_t stac9205_mixer[] = {
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Input Source",
+		.count = 1,
+		.info = stac92xx_mux_enum_info,
+		.get = stac92xx_mux_enum_get,
+		.put = stac92xx_mux_enum_put,
+	},
+	HDA_CODEC_VOLUME("InMux Capture Volume", 0x19, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("InVol Capture Volume", 0x1b, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("ADCMux Capture Switch", 0x1d, 0x0, HDA_OUTPUT),
+	{ } /* end */
+};
+
 static int stac92xx_build_controls(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -341,38 +357,67 @@
 	0x02a19320, 0x40000100,
 };
 
-static unsigned int d965_2112_pin_configs[10] = {
-	0x0221401f, 0x40000100, 0x40000100, 0x01014011,
-	0x01a19021, 0x01813024, 0x01452130, 0x40000100,
-	0x02a19320, 0x40000100,
-};
-
 static unsigned int *stac922x_brd_tbl[STAC_922X_MODELS] = {
 	[STAC_REF] =	ref922x_pin_configs,
 	[STAC_D945GTP3] = d945gtp3_pin_configs,
 	[STAC_D945GTP5] = d945gtp5_pin_configs,
 	[STAC_MACMINI] = d945gtp5_pin_configs,
-	[STAC_D965_2112] = d965_2112_pin_configs,
 };
 
 static struct hda_board_config stac922x_cfg_tbl[] = {
+	{ .modelname = "5stack", .config = STAC_D945GTP5 },
+	{ .modelname = "3stack", .config = STAC_D945GTP3 },
 	{ .modelname = "ref",
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
 	  .config = STAC_REF },		/* SigmaTel reference board */
+         /* Intel 945G based systems */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0101,
 	  .config = STAC_D945GTP3 },	/* Intel D945GTP - 3 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0202,
-	  .config = STAC_D945GTP3 },	/* Intel D945GNT - 3 Stack, 9221 A1 */
+	  .config = STAC_D945GTP3 },	/* Intel D945GNT - 3 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x0b0b,
-	  .config = STAC_D945GTP3 },	/* Intel D945PSN - 3 Stack, 9221 A1 */
+	  .pci_subdevice = 0x0606,
+	  .config = STAC_D945GTP3 },	/* Intel D945GTP - 3 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x0707,
-	  .config = STAC_D945GTP5 },	/* Intel D945PSV - 5 Stack */
-       { .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0601,
+	  .config = STAC_D945GTP3 },	/* Intel D945GTP - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0111,
+	  .config = STAC_D945GTP3 },	/* Intel D945GZP - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1115,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1116,
+	  .config = STAC_D945GTP3 },	/* Intel D945GBO - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1117,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1118,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x1119,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x8826,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x5049,
+	  .config = STAC_D945GTP3 },	/* Intel D945GCZ - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x5055,
+	  .config = STAC_D945GTP3 },	/* Intel D945GCZ - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x5048,
+	  .config = STAC_D945GTP3 },	/* Intel D945GPB - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0110,
+	  .config = STAC_D945GTP3 },	/* Intel D945GLR - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0404,
 	  .config = STAC_D945GTP5 },	/* Intel D945GTP - 5 Stack */
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
@@ -384,44 +429,214 @@
 	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x0417,
 	  .config = STAC_D945GTP5 },	/* Intel D975XBK - 5 Stack */
+	  /* Intel 945P based systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0b0b,
+	  .config = STAC_D945GTP3 },	/* Intel D945PSN - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0112,
+	  .config = STAC_D945GTP3 },	/* Intel D945PLN - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0d0d,
+	  .config = STAC_D945GTP3 },	/* Intel D945PLM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0909,
+	  .config = STAC_D945GTP3 },	/* Intel D945PAW - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0505,
+	  .config = STAC_D945GTP3 },	/* Intel D945PLM - 3 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x0707,
+	  .config = STAC_D945GTP5 },	/* Intel D945PSV - 5 Stack */
+	  /* other systems  */
 	{ .pci_subvendor = 0x8384,
 	  .pci_subdevice = 0x7680,
 	  .config = STAC_MACMINI },	/* Apple Mac Mini (early 2006) */
-	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x2112,
-	  .config = STAC_D965_2112 },
-	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
-	  .pci_subdevice = 0x284b,
-	  .config = STAC_D965_284B },
 	{} /* terminator */
 };
 
 static unsigned int ref927x_pin_configs[14] = {
-	0x01813122, 0x01a19021, 0x01014010, 0x01016011,
-	0x01012012, 0x01011014, 0x40000100, 0x40000100, 
-	0x40000100, 0x40000100, 0x40000100, 0x01441030,
-	0x01c41030, 0x40000100,
+	0x02214020, 0x02a19080, 0x0181304e, 0x01014010,
+	0x01a19040, 0x01011012, 0x01016011, 0x0101201f, 
+	0x183301f0, 0x18a001f0, 0x18a001f0, 0x01442070,
+	0x01c42190, 0x40000100,
 };
 
-static unsigned int *stac927x_brd_tbl[] = {
-	ref927x_pin_configs,
+static unsigned int d965_3st_pin_configs[14] = {
+	0x0221401f, 0x02a19120, 0x40000100, 0x01014011,
+	0x01a19021, 0x01813024, 0x40000100, 0x40000100,
+	0x40000100, 0x40000100, 0x40000100, 0x40000100,
+	0x40000100, 0x40000100
+};
+
+static unsigned int d965_5st_pin_configs[14] = {
+	0x02214020, 0x02a19080, 0x0181304e, 0x01014010,
+	0x01a19040, 0x01011012, 0x01016011, 0x40000100,
+	0x40000100, 0x40000100, 0x40000100, 0x01442070,
+	0x40000100, 0x40000100
+};
+
+static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = {
+	[STAC_REF] =	ref927x_pin_configs,
+	[STAC_D965_3ST] = d965_3st_pin_configs,
+	[STAC_D965_5ST] = d965_5st_pin_configs,
 };
 
 static struct hda_board_config stac927x_cfg_tbl[] = {
+	{ .modelname = "5stack", .config = STAC_D965_5ST },
+	{ .modelname = "3stack", .config = STAC_D965_3ST },
 	{ .modelname = "ref",
 	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
 	  .pci_subdevice = 0x2668,	/* DFI LanParty */
 	  .config = STAC_REF },		/* SigmaTel reference board */
+	 /* Intel 946 based systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x3d01,
+	  .config = STAC_D965_3ST }, /* D946  configuration */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0xa301,
+	  .config = STAC_D965_3ST }, /* Intel D946GZT - 3 stack  */
+	/* 965 based 3 stack systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2116,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2115,
+	  .config = STAC_D965_3ST }, /* Intel DQ965WC - 3 Stack  */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2114,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2113,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2112,
+	  .config = STAC_D965_3ST }, /* Intel DG965MS - 3 Stack  */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2111,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2110,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2009,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2008,
+	  .config = STAC_D965_3ST }, /* Intel DQ965GF - 3 Stack  */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2007,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2006,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2005,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2004,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2003,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2002,
+	  .config = STAC_D965_3ST }, /* Intel D965 3Stack config */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2001,
+	  .config = STAC_D965_3ST }, /* Intel DQ965GF - 3 Stack */
+	/* 965 based 5 stack systems */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2301,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2302,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2303,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2304,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2305,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2501,
+	  .config = STAC_D965_5ST }, /* Intel DG965MQ - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2502,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2503,
+	  .config = STAC_D965_5ST }, /* Intel DG965 - 5 Stack */
+	{ .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2504,
+	  .config = STAC_D965_5ST }, /* Intel DQ965GF - 5 Stack */
 	{} /* terminator */
 };
 
+static unsigned int ref9205_pin_configs[12] = {
+	0x40000100, 0x40000100, 0x01016011, 0x01014010,
+	0x01813122, 0x01a19021, 0x40000100, 0x40000100, 
+	0x40000100, 0x40000100, 0x01441030, 0x01c41030
+};
+
+static unsigned int *stac9205_brd_tbl[] = {
+	ref9205_pin_configs,
+};
+
+static struct hda_board_config stac9205_cfg_tbl[] = {
+	{ .modelname = "ref",
+	  .pci_subvendor = PCI_VENDOR_ID_INTEL,
+	  .pci_subdevice = 0x2668,	/* DFI LanParty */
+	  .config = STAC_REF },		/* SigmaTel reference board */
+	/* Dell laptops have BIOS problem */
+	{ .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01b5,
+	  .config = STAC_REF },	/* Dell Inspiron 630m */
+	{ .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01c2,
+	  .config = STAC_REF },	/* Dell Latitude D620 */
+	{ .pci_subvendor = PCI_VENDOR_ID_DELL, .pci_subdevice = 0x01cb,
+	  .config = STAC_REF },	/* Dell Latitude 120L */
+	{} /* terminator */
+};
+
+static int stac92xx_save_bios_config_regs(struct hda_codec *codec)
+{
+	int i;
+	struct sigmatel_spec *spec = codec->spec;
+	
+	if (! spec->bios_pin_configs) {
+		spec->bios_pin_configs = kcalloc(spec->num_pins,
+		                                 sizeof(*spec->bios_pin_configs), GFP_KERNEL);
+		if (! spec->bios_pin_configs)
+			return -ENOMEM;
+	}
+	
+	for (i = 0; i < spec->num_pins; i++) {
+		hda_nid_t nid = spec->pin_nids[i];
+		unsigned int pin_cfg;
+		
+		pin_cfg = snd_hda_codec_read(codec, nid, 0, 
+			AC_VERB_GET_CONFIG_DEFAULT, 0x00);	
+		snd_printdd(KERN_INFO "hda_codec: pin nid %2.2x bios pin config %8.8x\n",
+					nid, pin_cfg);
+		spec->bios_pin_configs[i] = pin_cfg;
+	}
+	
+	return 0;
+}
+
 static void stac92xx_set_config_regs(struct hda_codec *codec)
 {
 	int i;
 	struct sigmatel_spec *spec = codec->spec;
 	unsigned int pin_cfg;
 
-	for (i=0; i < spec->num_pins; i++) {
+	if (! spec->pin_nids || ! spec->pin_configs)
+		return;
+
+	for (i = 0; i < spec->num_pins; i++) {
 		snd_hda_codec_write(codec, spec->pin_nids[i], 0,
 				    AC_VERB_SET_CONFIG_DEFAULT_BYTES_0,
 				    spec->pin_configs[i] & 0x000000ff);
@@ -795,11 +1010,29 @@
 	return 0;
 }
 
+/* create volume control/switch for the given prefx type */
+static int create_controls(struct sigmatel_spec *spec, const char *pfx, hda_nid_t nid, int chs)
+{
+	char name[32];
+	int err;
+
+	sprintf(name, "%s Playback Volume", pfx);
+	err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, name,
+				   HDA_COMPOSE_AMP_VAL(nid, chs, 0, HDA_OUTPUT));
+	if (err < 0)
+		return err;
+	sprintf(name, "%s Playback Switch", pfx);
+	err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, name,
+				   HDA_COMPOSE_AMP_VAL(nid, chs, 0, HDA_OUTPUT));
+	if (err < 0)
+		return err;
+	return 0;
+}
+
 /* add playback controls from the parsed DAC table */
 static int stac92xx_auto_create_multi_out_ctls(struct sigmatel_spec *spec,
 					       const struct auto_pin_cfg *cfg)
 {
-	char name[32];
 	static const char *chname[4] = {
 		"Front", "Surround", NULL /*CLFE*/, "Side"
 	};
@@ -814,26 +1047,15 @@
 
 		if (i == 2) {
 			/* Center/LFE */
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "Center Playback Volume",
-					       HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0)
+			err = create_controls(spec, "Center", nid, 1);
+			if (err < 0)
 				return err;
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "LFE Playback Volume",
-					       HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
-				return err;
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "Center Playback Switch",
-					       HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0)
-				return err;
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "LFE Playback Switch",
-					       HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
+			err = create_controls(spec, "LFE", nid, 2);
+			if (err < 0)
 				return err;
 		} else {
-			sprintf(name, "%s Playback Volume", chname[i]);
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, name,
-					       HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
-				return err;
-			sprintf(name, "%s Playback Switch", chname[i]);
-			if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, name,
-					       HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+			err = create_controls(spec, chname[i], nid, 3);
+			if (err < 0)
 				return err;
 		}
 	}
@@ -849,39 +1071,85 @@
 	return 0;
 }
 
-/* add playback controls for HP output */
-static int stac92xx_auto_create_hp_ctls(struct hda_codec *codec, struct auto_pin_cfg *cfg)
+static int check_in_dac_nids(struct sigmatel_spec *spec, hda_nid_t nid)
+{
+	int i;
+
+	for (i = 0; i < spec->multiout.num_dacs; i++) {
+		if (spec->multiout.dac_nids[i] == nid)
+			return 1;
+	}
+	if (spec->multiout.hp_nid == nid)
+		return 1;
+	return 0;
+}
+
+static int add_spec_dacs(struct sigmatel_spec *spec, hda_nid_t nid)
+{
+	if (!spec->multiout.hp_nid)
+		spec->multiout.hp_nid = nid;
+	else if (spec->multiout.num_dacs > 4) {
+		printk(KERN_WARNING "stac92xx: No space for DAC 0x%x\n", nid);
+		return 1;
+	} else {
+		spec->multiout.dac_nids[spec->multiout.num_dacs] = nid;
+		spec->multiout.num_dacs++;
+	}
+	return 0;
+}
+
+/* add playback controls for Speaker and HP outputs */
+static int stac92xx_auto_create_hp_ctls(struct hda_codec *codec,
+					struct auto_pin_cfg *cfg)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	hda_nid_t pin = cfg->hp_pin;
 	hda_nid_t nid;
-	int i, err;
-	unsigned int wid_caps;
+	int i, old_num_dacs, err;
 
-	if (! pin)
-		return 0;
-
-	wid_caps = get_wcaps(codec, pin);
-	if (wid_caps & AC_WCAP_UNSOL_CAP)
-		spec->hp_detect = 1;
-
-	nid = snd_hda_codec_read(codec, pin, 0, AC_VERB_GET_CONNECT_LIST, 0) & 0xff;
-	for (i = 0; i < cfg->line_outs; i++) {
-		if (! spec->multiout.dac_nids[i])
+	old_num_dacs = spec->multiout.num_dacs;
+	for (i = 0; i < cfg->hp_outs; i++) {
+		unsigned int wid_caps = get_wcaps(codec, cfg->hp_pins[i]);
+		if (wid_caps & AC_WCAP_UNSOL_CAP)
+			spec->hp_detect = 1;
+		nid = snd_hda_codec_read(codec, cfg->hp_pins[i], 0,
+					 AC_VERB_GET_CONNECT_LIST, 0) & 0xff;
+		if (check_in_dac_nids(spec, nid))
+			nid = 0;
+		if (! nid)
 			continue;
-		if (spec->multiout.dac_nids[i] == nid)
-			return 0;
+		add_spec_dacs(spec, nid);
+	}
+	for (i = 0; i < cfg->speaker_outs; i++) {
+		nid = snd_hda_codec_read(codec, cfg->speaker_pins[0], 0,
+					 AC_VERB_GET_CONNECT_LIST, 0) & 0xff;
+		if (check_in_dac_nids(spec, nid))
+			nid = 0;
+		if (check_in_dac_nids(spec, nid))
+			nid = 0;
+		if (! nid)
+			continue;
+		add_spec_dacs(spec, nid);
 	}
 
-	spec->multiout.hp_nid = nid;
-
-	/* control HP volume/switch on the output mixer amp */
-	if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_VOL, "Headphone Playback Volume",
-					HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
-		return err;
-	if ((err = stac92xx_add_control(spec, STAC_CTL_WIDGET_MUTE, "Headphone Playback Switch",
-					HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
-		return err;
+	for (i = old_num_dacs; i < spec->multiout.num_dacs; i++) {
+		static const char *pfxs[] = {
+			"Speaker", "External Speaker", "Speaker2",
+		};
+		err = create_controls(spec, pfxs[i - old_num_dacs],
+				      spec->multiout.dac_nids[i], 3);
+		if (err < 0)
+			return err;
+	}
+	if (spec->multiout.hp_nid) {
+		const char *pfx;
+		if (old_num_dacs == spec->multiout.num_dacs)
+			pfx = "Master";
+		else
+			pfx = "Headphone";
+		err = create_controls(spec, pfx, spec->multiout.hp_nid, 3);
+		if (err < 0)
+			return err;
+	}
 
 	return 0;
 }
@@ -895,23 +1163,28 @@
 	int i, j, k;
 
 	for (i = 0; i < AUTO_PIN_LAST; i++) {
-		int index = -1;
-		if (cfg->input_pins[i]) {
-			imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+		int index;
 
-			for (j=0; j<spec->num_muxes; j++) {
-				int num_cons = snd_hda_get_connections(codec, spec->mux_nids[j], con_lst, HDA_MAX_NUM_INPUTS);
-				for (k=0; k<num_cons; k++)
-					if (con_lst[k] == cfg->input_pins[i]) {
-						index = k;
-					 	break;
-					}
-				if (index >= 0)
-					break;
-			}
-			imux->items[imux->num_items].index = index;
-			imux->num_items++;
+		if (!cfg->input_pins[i])
+			continue;
+		index = -1;
+		for (j = 0; j < spec->num_muxes; j++) {
+			int num_cons;
+			num_cons = snd_hda_get_connections(codec,
+							   spec->mux_nids[j],
+							   con_lst,
+							   HDA_MAX_NUM_INPUTS);
+			for (k = 0; k < num_cons; k++)
+				if (con_lst[k] == cfg->input_pins[i]) {
+					index = k;
+					goto found;
+				}
 		}
+		continue;
+	found:
+		imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+		imux->items[imux->num_items].index = index;
+		imux->num_items++;
 	}
 
 	if (imux->num_items == 1) {
@@ -944,11 +1217,20 @@
 static void stac92xx_auto_init_hp_out(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	hda_nid_t pin;
+	int i;
 
-	pin = spec->autocfg.hp_pin;
-	if (pin) /* connect to front */
-		stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
+	for (i = 0; i < spec->autocfg.hp_outs; i++) {
+		hda_nid_t pin;
+		pin = spec->autocfg.hp_pins[i];
+		if (pin) /* connect to front */
+			stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN);
+	}
+	for (i = 0; i < spec->autocfg.speaker_outs; i++) {
+		hda_nid_t pin;
+		pin = spec->autocfg.speaker_pins[i];
+		if (pin) /* connect to front */
+			stac92xx_auto_set_pinctl(codec, pin, AC_PINCTL_OUT_EN);
+	}
 }
 
 static int stac92xx_parse_auto_config(struct hda_codec *codec, hda_nid_t dig_out, hda_nid_t dig_in)
@@ -994,7 +1276,7 @@
 					struct auto_pin_cfg *cfg)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	hda_nid_t pin = cfg->hp_pin;
+	hda_nid_t pin = cfg->hp_pins[0];
 	unsigned int wid_caps;
 
 	if (! pin)
@@ -1007,6 +1289,57 @@
 	return 0;
 }
 
+/* add playback controls for LFE output */
+static int stac9200_auto_create_lfe_ctls(struct hda_codec *codec,
+					struct auto_pin_cfg *cfg)
+{
+	struct sigmatel_spec *spec = codec->spec;
+	int err;
+	hda_nid_t lfe_pin = 0x0;
+	int i;
+
+	/*
+	 * search speaker outs and line outs for a mono speaker pin
+	 * with an amp.  If one is found, add LFE controls
+	 * for it.
+	 */
+	for (i = 0; i < spec->autocfg.speaker_outs && lfe_pin == 0x0; i++) {
+		hda_nid_t pin = spec->autocfg.speaker_pins[i];
+		unsigned long wcaps = get_wcaps(codec, pin);
+		wcaps &= (AC_WCAP_STEREO | AC_WCAP_OUT_AMP);
+		if (wcaps == AC_WCAP_OUT_AMP)
+			/* found a mono speaker with an amp, must be lfe */
+			lfe_pin = pin;
+	}
+
+	/* if speaker_outs is 0, then speakers may be in line_outs */
+	if (lfe_pin == 0 && spec->autocfg.speaker_outs == 0) {
+		for (i = 0; i < spec->autocfg.line_outs && lfe_pin == 0x0; i++) {
+			hda_nid_t pin = spec->autocfg.line_out_pins[i];
+			unsigned long cfg;
+			cfg = snd_hda_codec_read(codec, pin, 0,
+						 AC_VERB_GET_CONFIG_DEFAULT,
+						 0x00);
+			if (get_defcfg_device(cfg) == AC_JACK_SPEAKER) {
+				unsigned long wcaps = get_wcaps(codec, pin);
+				wcaps &= (AC_WCAP_STEREO | AC_WCAP_OUT_AMP);
+				if (wcaps == AC_WCAP_OUT_AMP)
+					/* found a mono speaker with an amp,
+					   must be lfe */
+					lfe_pin = pin;
+			}
+		}
+	}
+
+	if (lfe_pin) {
+		err = create_controls(spec, "LFE", lfe_pin, 1);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 static int stac9200_parse_auto_config(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -1021,6 +1354,9 @@
 	if ((err = stac9200_auto_create_hp_ctls(codec, &spec->autocfg)) < 0)
 		return err;
 
+	if ((err = stac9200_auto_create_lfe_ctls(codec, &spec->autocfg)) < 0)
+		return err;
+
 	if (spec->autocfg.dig_out_pin)
 		spec->multiout.dig_out_nid = 0x05;
 	if (spec->autocfg.dig_in_pin)
@@ -1073,6 +1409,15 @@
 			    AC_VERB_SET_GPIO_DATA, gpiostate);
 }
 
+static void enable_pin_detect(struct hda_codec *codec, hda_nid_t nid,
+			      unsigned int event)
+{
+	if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP)
+		snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_UNSOLICITED_ENABLE,
+				    (AC_USRSP_EN | event));
+}
+
 static int stac92xx_init(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
@@ -1084,9 +1429,10 @@
 	/* set up pins */
 	if (spec->hp_detect) {
 		/* Enable unsolicited responses on the HP widget */
-		snd_hda_codec_write(codec, cfg->hp_pin, 0,
-				AC_VERB_SET_UNSOLICITED_ENABLE,
-				STAC_UNSOL_ENABLE);
+		for (i = 0; i < cfg->hp_outs; i++)
+			enable_pin_detect(codec, cfg->hp_pins[i],
+					  STAC_HP_EVENT);
+		stac92xx_auto_init_hp_out(codec);
 		/* fake event to set up pins */
 		codec->patch_ops.unsol_event(codec, STAC_HP_EVENT << 26);
 	} else {
@@ -1131,6 +1477,9 @@
 		kfree(spec->kctl_alloc);
 	}
 
+	if (spec->bios_pin_configs)
+		kfree(spec->bios_pin_configs);
+
 	kfree(spec);
 }
 
@@ -1139,6 +1488,8 @@
 {
 	unsigned int pin_ctl = snd_hda_codec_read(codec, nid,
 			0, AC_VERB_GET_PIN_WIDGET_CONTROL, 0x00);
+	if (flag == AC_PINCTL_OUT_EN && (pin_ctl & AC_PINCTL_IN_EN))
+		return;
 	snd_hda_codec_write(codec, nid, 0,
 			AC_VERB_SET_PIN_WIDGET_CONTROL,
 			pin_ctl | flag);
@@ -1154,33 +1505,57 @@
 			pin_ctl & ~flag);
 }
 
-static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res)
+static int get_pin_presence(struct hda_codec *codec, hda_nid_t nid)
+{
+	if (!nid)
+		return 0;
+	if (snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_PIN_SENSE, 0x00)
+	    & (1 << 31))
+		return 1;
+	return 0;
+}
+
+static void stac92xx_hp_detect(struct hda_codec *codec, unsigned int res)
 {
 	struct sigmatel_spec *spec = codec->spec;
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	int i, presence;
 
-	if ((res >> 26) != STAC_HP_EVENT)
-		return;
-
-	presence = snd_hda_codec_read(codec, cfg->hp_pin, 0,
-			AC_VERB_GET_PIN_SENSE, 0x00) >> 31;
+	presence = 0;
+	for (i = 0; i < cfg->hp_outs; i++) {
+		presence = get_pin_presence(codec, cfg->hp_pins[i]);
+		if (presence)
+			break;
+	}
 
 	if (presence) {
 		/* disable lineouts, enable hp */
 		for (i = 0; i < cfg->line_outs; i++)
 			stac92xx_reset_pinctl(codec, cfg->line_out_pins[i],
 						AC_PINCTL_OUT_EN);
-		stac92xx_set_pinctl(codec, cfg->hp_pin, AC_PINCTL_OUT_EN);
+		for (i = 0; i < cfg->speaker_outs; i++)
+			stac92xx_reset_pinctl(codec, cfg->speaker_pins[i],
+						AC_PINCTL_OUT_EN);
 	} else {
 		/* enable lineouts, disable hp */
 		for (i = 0; i < cfg->line_outs; i++)
 			stac92xx_set_pinctl(codec, cfg->line_out_pins[i],
 						AC_PINCTL_OUT_EN);
-		stac92xx_reset_pinctl(codec, cfg->hp_pin, AC_PINCTL_OUT_EN);
+		for (i = 0; i < cfg->speaker_outs; i++)
+			stac92xx_set_pinctl(codec, cfg->speaker_pins[i],
+						AC_PINCTL_OUT_EN);
 	}
 } 
 
+static void stac92xx_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+	switch (res >> 26) {
+	case STAC_HP_EVENT:
+		stac92xx_hp_detect(codec, res);
+		break;
+	}
+}
+
 #ifdef CONFIG_PM
 static int stac92xx_resume(struct hda_codec *codec)
 {
@@ -1188,6 +1563,7 @@
 	int i;
 
 	stac92xx_init(codec);
+	stac92xx_set_config_regs(codec);
 	for (i = 0; i < spec->num_mixers; i++)
 		snd_hda_resume_ctls(codec, spec->mixers[i]);
 	if (spec->multiout.dig_out_nid)
@@ -1220,12 +1596,18 @@
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 8;
+	spec->pin_nids = stac9200_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac9200_cfg_tbl);
-	if (spec->board_config < 0)
-                snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9200, using BIOS defaults\n");
-	else {
-		spec->num_pins = 8;
-		spec->pin_nids = stac9200_pin_nids;
+	if (spec->board_config < 0) {
+		snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9200, using BIOS defaults\n");
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else {
 		spec->pin_configs = stac9200_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
@@ -1261,13 +1643,19 @@
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 10;
+	spec->pin_nids = stac922x_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac922x_cfg_tbl);
-	if (spec->board_config < 0)
-                snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC922x, "
-			    "using BIOS defaults\n");
-	else if (stac922x_brd_tbl[spec->board_config] != NULL) {
-		spec->num_pins = 10;
-		spec->pin_nids = stac922x_pin_nids;
+	if (spec->board_config < 0) {
+		snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC922x, "
+			"using BIOS defaults\n");
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else if (stac922x_brd_tbl[spec->board_config] != NULL) {
 		spec->pin_configs = stac922x_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
@@ -1281,25 +1669,6 @@
 
 	spec->multiout.dac_nids = spec->dac_nids;
 	
-	switch (spec->board_config) {
-	case STAC_D965_2112:
-		spec->adc_nids = stac9227_adc_nids;
-		spec->mux_nids = stac9227_mux_nids;
-#if 0
-		spec->multiout.dac_nids = d965_2112_dac_nids;
-		spec->multiout.num_dacs = ARRAY_SIZE(d965_2112_dac_nids);
-#endif
-		spec->init = d965_2112_core_init;
-		spec->mixer = stac9227_mixer;
-		break;
-	case STAC_D965_284B:
-		spec->adc_nids = stac9227_adc_nids;
-		spec->mux_nids = stac9227_mux_nids;
-		spec->init = stac9227_core_init;
-		spec->mixer = stac9227_mixer;
-		break;
-	}
-
 	err = stac92xx_parse_auto_config(codec, 0x08, 0x09);
 	if (err < 0) {
 		stac92xx_free(codec);
@@ -1324,22 +1693,44 @@
 		return -ENOMEM;
 
 	codec->spec = spec;
+	spec->num_pins = 14;
+	spec->pin_nids = stac927x_pin_nids;
 	spec->board_config = snd_hda_check_board_config(codec, stac927x_cfg_tbl);
-	if (spec->board_config < 0)
+	if (spec->board_config < 0) {
                 snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC927x, using BIOS defaults\n");
-	else {
-		spec->num_pins = 14;
-		spec->pin_nids = stac927x_pin_nids;
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else if (stac927x_brd_tbl[spec->board_config] != NULL) {
 		spec->pin_configs = stac927x_brd_tbl[spec->board_config];
 		stac92xx_set_config_regs(codec);
 	}
 
-	spec->adc_nids = stac927x_adc_nids;
-	spec->mux_nids = stac927x_mux_nids;
-	spec->num_muxes = 3;
-
-	spec->init = stac927x_core_init;
-	spec->mixer = stac927x_mixer;
+	switch (spec->board_config) {
+	case STAC_D965_3ST:
+		spec->adc_nids = stac927x_adc_nids;
+		spec->mux_nids = stac927x_mux_nids;
+		spec->num_muxes = 3;
+		spec->init = d965_core_init;
+		spec->mixer = stac9227_mixer;
+		break;
+	case STAC_D965_5ST:
+		spec->adc_nids = stac927x_adc_nids;
+		spec->mux_nids = stac927x_mux_nids;
+		spec->num_muxes = 3;
+		spec->init = d965_core_init;
+		spec->mixer = stac9227_mixer;
+		break;
+	default:
+		spec->adc_nids = stac927x_adc_nids;
+		spec->mux_nids = stac927x_mux_nids;
+		spec->num_muxes = 3;
+		spec->init = stac927x_core_init;
+		spec->mixer = stac927x_mixer;
+	}
 
 	spec->multiout.dac_nids = spec->dac_nids;
 
@@ -1354,11 +1745,57 @@
 	return 0;
 }
 
+static int patch_stac9205(struct hda_codec *codec)
+{
+	struct sigmatel_spec *spec;
+	int err;
+
+	spec  = kzalloc(sizeof(*spec), GFP_KERNEL);
+	if (spec == NULL)
+		return -ENOMEM;
+
+	codec->spec = spec;
+	spec->num_pins = 14;
+	spec->pin_nids = stac9205_pin_nids;
+	spec->board_config = snd_hda_check_board_config(codec, stac9205_cfg_tbl);
+	if (spec->board_config < 0) {
+		snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC9205, using BIOS defaults\n");
+		err = stac92xx_save_bios_config_regs(codec);
+		if (err < 0) {
+			stac92xx_free(codec);
+			return err;
+		}
+		spec->pin_configs = spec->bios_pin_configs;
+	} else {
+		spec->pin_configs = stac9205_brd_tbl[spec->board_config];
+		stac92xx_set_config_regs(codec);
+	}
+
+	spec->adc_nids = stac9205_adc_nids;
+	spec->mux_nids = stac9205_mux_nids;
+	spec->num_muxes = 3;
+
+	spec->init = stac9205_core_init;
+	spec->mixer = stac9205_mixer;
+
+	spec->multiout.dac_nids = spec->dac_nids;
+
+	err = stac92xx_parse_auto_config(codec, 0x1f, 0x20);
+	if (err < 0) {
+		stac92xx_free(codec);
+		return err;
+	}
+
+	codec->patch_ops = stac92xx_patch_ops;
+
+	return 0;
+}
+
 /*
- * STAC 7661(?) hack
+ * STAC9872 hack
  */
 
-/* static config for Sony VAIO FE550G */
+/* static config for Sony VAIO FE550G and Sony VAIO AR */
 static hda_nid_t vaio_dacs[] = { 0x2 };
 #define VAIO_HP_DAC	0x5
 static hda_nid_t vaio_adcs[] = { 0x8 /*,0x6*/ };
@@ -1389,6 +1826,23 @@
 	{}
 };
 
+static struct hda_verb vaio_ar_init[] = {
+	{0x0a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP }, /* HP <- 0x2 */
+	{0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT }, /* Speaker <- 0x5 */
+	{0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 }, /* Mic? (<- 0x2) */
+	{0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN }, /* CD */
+/*	{0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },*/ /* Optical Out */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 }, /* Mic? */
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x2}, /* mic-sel: 0a,0d,14,02 */
+	{0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* HP */
+	{0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, /* Speaker */
+/*	{0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},*/ /* Optical Out */
+	{0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)}, /* capture sw/vol -> 0x8 */
+	{0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)}, /* CD-in -> 0x6 */
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, /* Mic-in -> 0x9 */
+	{}
+};
+
 /* bind volumes of both NID 0x02 and 0x05 */
 static int vaio_master_vol_put(struct snd_kcontrol *kcontrol,
 			       struct snd_ctl_elem_value *ucontrol)
@@ -1434,6 +1888,7 @@
 		.info = snd_hda_mixer_amp_volume_info,
 		.get = snd_hda_mixer_amp_volume_get,
 		.put = vaio_master_vol_put,
+		.tlv = { .c = snd_hda_mixer_amp_tlv },
 		.private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT),
 	},
 	{
@@ -1458,7 +1913,40 @@
 	{}
 };
 
-static struct hda_codec_ops stac7661_patch_ops = {
+static struct snd_kcontrol_new vaio_ar_mixer[] = {
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Master Playback Volume",
+		.info = snd_hda_mixer_amp_volume_info,
+		.get = snd_hda_mixer_amp_volume_get,
+		.put = vaio_master_vol_put,
+		.private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT),
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Master Playback Switch",
+		.info = snd_hda_mixer_amp_switch_info,
+		.get = snd_hda_mixer_amp_switch_get,
+		.put = vaio_master_sw_put,
+		.private_value = HDA_COMPOSE_AMP_VAL(0x02, 3, 0, HDA_OUTPUT),
+	},
+	/* HDA_CODEC_VOLUME("CD Capture Volume", 0x07, 0, HDA_INPUT), */
+	HDA_CODEC_VOLUME("Capture Volume", 0x09, 0, HDA_INPUT),
+	HDA_CODEC_MUTE("Capture Switch", 0x09, 0, HDA_INPUT),
+	/*HDA_CODEC_MUTE("Optical Out Switch", 0x10, 0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Optical Out Volume", 0x10, 0, HDA_OUTPUT),*/
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.name = "Capture Source",
+		.count = 1,
+		.info = stac92xx_mux_enum_info,
+		.get = stac92xx_mux_enum_get,
+		.put = stac92xx_mux_enum_put,
+	},
+	{}
+};
+
+static struct hda_codec_ops stac9872_patch_ops = {
 	.build_controls = stac92xx_build_controls,
 	.build_pcms = stac92xx_build_pcms,
 	.init = stac92xx_init,
@@ -1468,23 +1956,34 @@
 #endif
 };
 
-enum { STAC7661_VAIO };
+enum { /* FE and SZ series. id=0x83847661 and subsys=0x104D0700 or 104D1000. */
+       CXD9872RD_VAIO,
+       /* Unknown. id=0x83847662 and subsys=0x104D1200 or 104D1000. */
+       STAC9872AK_VAIO, 
+       /* Unknown. id=0x83847661 and subsys=0x104D1200. */
+       STAC9872K_VAIO,
+       /* AR Series. id=0x83847664 and subsys=104D1300 */
+       CXD9872AKD_VAIO 
+     };
 
-static struct hda_board_config stac7661_cfg_tbl[] = {
-	{ .modelname = "vaio", .config = STAC7661_VAIO },
+static struct hda_board_config stac9872_cfg_tbl[] = {
+	{ .modelname = "vaio", .config = CXD9872RD_VAIO },
+	{ .modelname = "vaio-ar", .config = CXD9872AKD_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81e6,
-	  .config = STAC7661_VAIO },
+	  .config = CXD9872RD_VAIO },
 	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81ef,
-	  .config = STAC7661_VAIO },
+	  .config = CXD9872RD_VAIO },
+	{ .pci_subvendor = 0x104d, .pci_subdevice = 0x81fd,
+	  .config = CXD9872AKD_VAIO },
 	{}
 };
 
-static int patch_stac7661(struct hda_codec *codec)
+static int patch_stac9872(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec;
 	int board_config;
 
-	board_config = snd_hda_check_board_config(codec, stac7661_cfg_tbl);
+	board_config = snd_hda_check_board_config(codec, stac9872_cfg_tbl);
 	if (board_config < 0)
 		/* unknown config, let generic-parser do its job... */
 		return snd_hda_parse_generic_codec(codec);
@@ -1495,7 +1994,9 @@
 
 	codec->spec = spec;
 	switch (board_config) {
-	case STAC7661_VAIO:
+	case CXD9872RD_VAIO:
+	case STAC9872AK_VAIO:
+	case STAC9872K_VAIO:
 		spec->mixer = vaio_mixer;
 		spec->init = vaio_init;
 		spec->multiout.max_channels = 2;
@@ -1507,9 +2008,22 @@
 		spec->input_mux = &vaio_mux;
 		spec->mux_nids = vaio_mux_nids;
 		break;
+	
+	case CXD9872AKD_VAIO:
+		spec->mixer = vaio_ar_mixer;
+		spec->init = vaio_ar_init;
+		spec->multiout.max_channels = 2;
+		spec->multiout.num_dacs = ARRAY_SIZE(vaio_dacs);
+		spec->multiout.dac_nids = vaio_dacs;
+		spec->multiout.hp_nid = VAIO_HP_DAC;
+		spec->num_adcs = ARRAY_SIZE(vaio_adcs);
+		spec->adc_nids = vaio_adcs;
+		spec->input_mux = &vaio_mux;
+		spec->mux_nids = vaio_mux_nids;
+		break;
 	}
 
-	codec->patch_ops = stac7661_patch_ops;
+	codec->patch_ops = stac9872_patch_ops;
 	return 0;
 }
 
@@ -1525,12 +2039,12 @@
  	{ .id = 0x83847681, .name = "STAC9220D/9223D A2", .patch = patch_stac922x },
  	{ .id = 0x83847682, .name = "STAC9221 A2", .patch = patch_stac922x },
  	{ .id = 0x83847683, .name = "STAC9221D A2", .patch = patch_stac922x },
- 	{ .id = 0x83847618, .name = "STAC9227", .patch = patch_stac922x },
- 	{ .id = 0x83847619, .name = "STAC9227", .patch = patch_stac922x },
- 	{ .id = 0x83847616, .name = "STAC9228", .patch = patch_stac922x },
- 	{ .id = 0x83847617, .name = "STAC9228", .patch = patch_stac922x },
- 	{ .id = 0x83847614, .name = "STAC9229", .patch = patch_stac922x },
- 	{ .id = 0x83847615, .name = "STAC9229", .patch = patch_stac922x },
+ 	{ .id = 0x83847618, .name = "STAC9227", .patch = patch_stac927x },
+ 	{ .id = 0x83847619, .name = "STAC9227", .patch = patch_stac927x },
+ 	{ .id = 0x83847616, .name = "STAC9228", .patch = patch_stac927x },
+ 	{ .id = 0x83847617, .name = "STAC9228", .patch = patch_stac927x },
+ 	{ .id = 0x83847614, .name = "STAC9229", .patch = patch_stac927x },
+ 	{ .id = 0x83847615, .name = "STAC9229", .patch = patch_stac927x },
  	{ .id = 0x83847620, .name = "STAC9274", .patch = patch_stac927x },
  	{ .id = 0x83847621, .name = "STAC9274D", .patch = patch_stac927x },
  	{ .id = 0x83847622, .name = "STAC9273X", .patch = patch_stac927x },
@@ -1541,6 +2055,20 @@
  	{ .id = 0x83847627, .name = "STAC9271D", .patch = patch_stac927x },
  	{ .id = 0x83847628, .name = "STAC9274X5NH", .patch = patch_stac927x },
  	{ .id = 0x83847629, .name = "STAC9274D5NH", .patch = patch_stac927x },
- 	{ .id = 0x83847661, .name = "STAC7661", .patch = patch_stac7661 },
+ 	/* The following does not take into account .id=0x83847661 when subsys =
+ 	 * 104D0C00 which is STAC9225s. Because of this, some SZ Notebooks are
+ 	 * currently not fully supported.
+ 	 */
+ 	{ .id = 0x83847661, .name = "CXD9872RD/K", .patch = patch_stac9872 },
+ 	{ .id = 0x83847662, .name = "STAC9872AK", .patch = patch_stac9872 },
+ 	{ .id = 0x83847664, .name = "CXD9872AKD", .patch = patch_stac9872 },
+ 	{ .id = 0x838476a0, .name = "STAC9205", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a1, .name = "STAC9205D", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a2, .name = "STAC9204", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a3, .name = "STAC9204D", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a4, .name = "STAC9255", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a5, .name = "STAC9255D", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a6, .name = "STAC9254", .patch = patch_stac9205 },
+ 	{ .id = 0x838476a7, .name = "STAC9254D", .patch = patch_stac9205 },
 	{} /* terminator */
 };
diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c
index 9492f3d..9e76ceb 100644
--- a/sound/pci/ice1712/aureon.c
+++ b/sound/pci/ice1712/aureon.c
@@ -60,6 +60,7 @@
 #include "ice1712.h"
 #include "envy24ht.h"
 #include "aureon.h"
+#include <sound/tlv.h>
 
 /* WM8770 registers */
 #define WM_DAC_ATTEN		0x00	/* DAC1-8 analog attenuation */
@@ -660,6 +661,12 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_wm_pcm, -6400, 50, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_wm_adc, -1200, 100, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_ac97_master, -4650, 150, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_ac97_gain, -3450, 150, 0);
+
 /*
  * Logarithmic volume values for WM8770
  * Computed as 20 * Log10(255 / x)
@@ -1409,10 +1416,13 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Master Playback Volume",
 		.info = wm_master_vol_info,
 		.get = wm_master_vol_get,
-		.put = wm_master_vol_put
+		.put = wm_master_vol_put,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1424,11 +1434,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Front Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 0
+		.private_value = (2 << 8) | 0,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1440,11 +1453,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Rear Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 2
+		.private_value = (2 << 8) | 2,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1456,11 +1472,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Center Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 4
+		.private_value = (1 << 8) | 4,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1472,11 +1491,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "LFE Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 5
+		.private_value = (1 << 8) | 5,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1488,11 +1510,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Side Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 6
+		.private_value = (2 << 8) | 6,
+		.tlv = { .p = db_scale_wm_dac }
 	}
 };
 
@@ -1506,10 +1531,13 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "PCM Playback Volume",
 		.info = wm_pcm_vol_info,
 		.get = wm_pcm_vol_get,
-		.put = wm_pcm_vol_put
+		.put = wm_pcm_vol_put,
+		.tlv = { .p = db_scale_wm_pcm }
  	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1520,10 +1548,13 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Capture Volume",
 		.info = wm_adc_vol_info,
 		.get = wm_adc_vol_get,
-		.put = wm_adc_vol_put
+		.put = wm_adc_vol_put,
+		.tlv = { .p = db_scale_wm_adc }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1567,11 +1598,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "AC97 Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MASTER|AUREON_AC97_STEREO
+ 		.private_value = AC97_MASTER|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_master }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1583,11 +1617,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "CD Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_CD|AUREON_AC97_STEREO
+ 		.private_value = AC97_CD|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1599,11 +1636,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Aux Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_AUX|AUREON_AC97_STEREO
+ 		.private_value = AC97_AUX|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1615,11 +1655,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Line Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_LINE|AUREON_AC97_STEREO
+ 		.private_value = AC97_LINE|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1631,11 +1674,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Mic Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MIC
+ 		.private_value = AC97_MIC,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1657,11 +1703,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "AC97 Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MASTER|AUREON_AC97_STEREO
+ 		.private_value = AC97_MASTER|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_master }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1673,11 +1722,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "CD Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_AUX|AUREON_AC97_STEREO
+ 		.private_value = AC97_AUX|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1685,15 +1737,18 @@
  		.info = aureon_ac97_mute_info,
  		.get = aureon_ac97_mute_get,
  		.put = aureon_ac97_mute_put,
- 		.private_value = AC97_CD,
+ 		.private_value = AC97_CD
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Phono Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_CD|AUREON_AC97_STEREO
+ 		.private_value = AC97_CD|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1705,11 +1760,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Line Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_LINE|AUREON_AC97_STEREO
+ 		.private_value = AC97_LINE|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1721,11 +1779,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Mic Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_MIC
+ 		.private_value = AC97_MIC,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -1744,11 +1805,14 @@
  	},
  	{
  		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
  		.name = "Aux Playback Volume",
  		.info = aureon_ac97_vol_info,
  		.get = aureon_ac97_vol_get,
  		.put = aureon_ac97_vol_put,
- 		.private_value = AC97_VIDEO|AUREON_AC97_STEREO
+ 		.private_value = AC97_VIDEO|AUREON_AC97_STEREO,
+		.tlv = { .p = db_scale_ac97_gain }
  	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c
index bf20858..dc69392 100644
--- a/sound/pci/ice1712/ice1712.c
+++ b/sound/pci/ice1712/ice1712.c
@@ -62,6 +62,7 @@
 #include <sound/cs8427.h>
 #include <sound/info.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 #include <sound/asoundef.h>
 
@@ -1377,6 +1378,7 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_playback, -14400, 150, 0);
 
 static struct snd_kcontrol_new snd_ice1712_multi_playback_ctrls[] __devinitdata = {
 	{
@@ -1390,12 +1392,15 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Multi Playback Volume",
 		.info = snd_ice1712_pro_mixer_volume_info,
 		.get = snd_ice1712_pro_mixer_volume_get,
 		.put = snd_ice1712_pro_mixer_volume_put,
 		.private_value = 0,
 		.count = 10,
+		.tlv = { .p = db_scale_playback }
 	},
 };
 
@@ -1420,11 +1425,14 @@
 
 static struct snd_kcontrol_new snd_ice1712_multi_capture_analog_volume __devinitdata = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name = "H/W Multi Capture Volume",
 	.info = snd_ice1712_pro_mixer_volume_info,
 	.get = snd_ice1712_pro_mixer_volume_get,
 	.put = snd_ice1712_pro_mixer_volume_put,
 	.private_value = 10,
+	.tlv = { .p = db_scale_playback }
 };
 
 static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_volume __devinitdata = {
@@ -1857,7 +1865,7 @@
 {
 	struct snd_ice1712 *ice = snd_kcontrol_chip(kcontrol);
 	static unsigned int xrate[13] = {
-		8000, 9600, 11025, 12000, 1600, 22050, 24000,
+		8000, 9600, 11025, 12000, 16000, 22050, 24000,
 		32000, 44100, 48000, 64000, 88200, 96000
 	};
 	unsigned char oval;
@@ -1924,7 +1932,7 @@
 {
 	int val;
 	static unsigned int xrate[13] = {
-		8000, 9600, 11025, 12000, 1600, 22050, 24000,
+		8000, 9600, 11025, 12000, 16000, 22050, 24000,
 		32000, 44100, 48000, 64000, 88200, 96000
 	};
 
@@ -1941,7 +1949,7 @@
 						      struct snd_ctl_elem_value *ucontrol)
 {
 	static unsigned int xrate[13] = {
-		8000, 9600, 11025, 12000, 1600, 22050, 24000,
+		8000, 9600, 11025, 12000, 16000, 22050, 24000,
 		32000, 44100, 48000, 64000, 88200, 96000
 	};
 	unsigned char oval;
diff --git a/sound/pci/ice1712/phase.c b/sound/pci/ice1712/phase.c
index 502da1c..e08d73f 100644
--- a/sound/pci/ice1712/phase.c
+++ b/sound/pci/ice1712/phase.c
@@ -46,6 +46,7 @@
 #include "ice1712.h"
 #include "envy24ht.h"
 #include "phase.h"
+#include <sound/tlv.h>
 
 /* WM8770 registers */
 #define WM_DAC_ATTEN		0x00	/* DAC1-8 analog attenuation */
@@ -696,6 +697,9 @@
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_wm_dac, -12700, 100, 1);
+static DECLARE_TLV_DB_SCALE(db_scale_wm_pcm, -6400, 50, 1);
+
 static struct snd_kcontrol_new phase28_dac_controls[] __devinitdata = {
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -706,10 +710,13 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Master Playback Volume",
 		.info = wm_master_vol_info,
 		.get = wm_master_vol_get,
-		.put = wm_master_vol_put
+		.put = wm_master_vol_put,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -721,11 +728,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Front Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 0
+		.private_value = (2 << 8) | 0,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -737,11 +747,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Rear Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 2
+		.private_value = (2 << 8) | 2,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -753,11 +766,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Center Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 4
+		.private_value = (1 << 8) | 4,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -769,11 +785,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "LFE Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (1 << 8) | 5
+		.private_value = (1 << 8) | 5,
+		.tlv = { .p = db_scale_wm_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -785,11 +804,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Side Playback Volume",
 		.info = wm_vol_info,
 		.get = wm_vol_get,
 		.put = wm_vol_put,
-		.private_value = (2 << 8) | 6
+		.private_value = (2 << 8) | 6,
+		.tlv = { .p = db_scale_wm_dac }
 	}
 };
 
@@ -803,10 +825,13 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "PCM Playback Volume",
 		.info = wm_pcm_vol_info,
 		.get = wm_pcm_vol_get,
-		.put = wm_pcm_vol_put
+		.put = wm_pcm_vol_put,
+		.tlv = { .p = db_scale_wm_pcm }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/ice1712/pontis.c b/sound/pci/ice1712/pontis.c
index 0efcad9..6c74c2d 100644
--- a/sound/pci/ice1712/pontis.c
+++ b/sound/pci/ice1712/pontis.c
@@ -31,6 +31,7 @@
 
 #include <sound/core.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 
 #include "ice1712.h"
 #include "envy24ht.h"
@@ -564,6 +565,8 @@
 	return changed;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_volume, -6400, 50, 1);
+
 /*
  * mixers
  */
@@ -571,17 +574,23 @@
 static struct snd_kcontrol_new pontis_controls[] __devinitdata = {
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "PCM Playback Volume",
 		.info = wm_dac_vol_info,
 		.get = wm_dac_vol_get,
 		.put = wm_dac_vol_put,
+		.tlv = { .p = db_scale_volume },
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Capture Volume",
 		.info = wm_adc_vol_info,
 		.get = wm_adc_vol_get,
 		.put = wm_adc_vol_put,
+		.tlv = { .p = db_scale_volume },
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
diff --git a/sound/pci/ice1712/prodigy192.c b/sound/pci/ice1712/prodigy192.c
index fdb5cb8..41b2605 100644
--- a/sound/pci/ice1712/prodigy192.c
+++ b/sound/pci/ice1712/prodigy192.c
@@ -35,6 +35,7 @@
 #include "envy24ht.h"
 #include "prodigy192.h"
 #include "stac946x.h"
+#include <sound/tlv.h>
 
 static inline void stac9460_put(struct snd_ice1712 *ice, int reg, unsigned char val)
 {
@@ -356,6 +357,9 @@
 }
 #endif
 
+static DECLARE_TLV_DB_SCALE(db_scale_dac, -19125, 75, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_adc, 0, 150, 0);
+
 /*
  * mixers
  */
@@ -368,14 +372,18 @@
 		.get = stac9460_dac_mute_get,
 		.put = stac9460_dac_mute_put,
 		.private_value = 1,
+		.tlv = { .p = db_scale_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "Master Playback Volume",
 		.info = stac9460_dac_vol_info,
 		.get = stac9460_dac_vol_get,
 		.put = stac9460_dac_vol_put,
 		.private_value = 1,
+		.tlv = { .p = db_scale_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -387,11 +395,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "DAC Volume",
 		.count = 6,
 		.info = stac9460_dac_vol_info,
 		.get = stac9460_dac_vol_get,
 		.put = stac9460_dac_vol_put,
+		.tlv = { .p = db_scale_dac }
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -404,11 +415,14 @@
 	},
 	{
 		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 		.name = "ADC Volume",
 		.count = 1,
 		.info = stac9460_adc_vol_info,
 		.get = stac9460_adc_vol_get,
 		.put = stac9460_adc_vol_put,
+		.tlv = { .p = db_scale_adc }
 	},
 #if 0
 	{
diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c
index fec9440..bf98ea3 100644
--- a/sound/pci/ice1712/revo.c
+++ b/sound/pci/ice1712/revo.c
@@ -87,16 +87,33 @@
  * initialize the chips on M-Audio Revolution cards
  */
 
-static unsigned int revo71_num_stereo_front[] = {2};
-static char *revo71_channel_names_front[] = {"PCM Playback Volume"};
+#define AK_DAC(xname,xch) { .name = xname, .num_channels = xch }
 
-static unsigned int revo71_num_stereo_surround[] = {1, 1, 2, 2};
-static char *revo71_channel_names_surround[] = {"PCM Center Playback Volume", "PCM LFE Playback Volume",
-						"PCM Side Playback Volume", "PCM Rear Playback Volume"};
+static struct snd_akm4xxx_dac_channel revo71_front[] = {
+	AK_DAC("PCM Playback Volume", 2)
+};
 
-static unsigned int revo51_num_stereo[] = {2, 1, 1, 2};
-static char *revo51_channel_names[] = {"PCM Playback Volume", "PCM Center Playback Volume",
-					"PCM LFE Playback Volume", "PCM Rear Playback Volume"};
+static struct snd_akm4xxx_dac_channel revo71_surround[] = {
+	AK_DAC("PCM Center Playback Volume", 1),
+	AK_DAC("PCM LFE Playback Volume", 1),
+	AK_DAC("PCM Side Playback Volume", 2),
+	AK_DAC("PCM Rear Playback Volume", 2),
+};
+
+static struct snd_akm4xxx_dac_channel revo51_dac[] = {
+	AK_DAC("PCM Playback Volume", 2),
+	AK_DAC("PCM Center Playback Volume", 1),
+	AK_DAC("PCM LFE Playback Volume", 1),
+	AK_DAC("PCM Rear Playback Volume", 2),
+};
+
+static struct snd_akm4xxx_adc_channel revo51_adc[] = {
+	{
+		.name = "PCM Capture Volume",
+		.switch_name = "PCM Capture Switch",
+		.num_channels = 2
+	},
+};
 
 static struct snd_akm4xxx akm_revo_front __devinitdata = {
 	.type = SND_AK4381,
@@ -104,8 +121,7 @@
 	.ops = {
 		.set_rate_val = revo_set_rate_val
 	},
-	.num_stereo = revo71_num_stereo_front,
-	.channel_names = revo71_channel_names_front
+	.dac_info = revo71_front,
 };
 
 static struct snd_ak4xxx_private akm_revo_front_priv __devinitdata = {
@@ -127,8 +143,7 @@
 	.ops = {
 		.set_rate_val = revo_set_rate_val
 	},
-	.num_stereo = revo71_num_stereo_surround,
-	.channel_names = revo71_channel_names_surround
+	.dac_info = revo71_surround,
 };
 
 static struct snd_ak4xxx_private akm_revo_surround_priv __devinitdata = {
@@ -149,8 +164,7 @@
 	.ops = {
 		.set_rate_val = revo_set_rate_val
 	},
-	.num_stereo = revo51_num_stereo,
-	.channel_names = revo51_channel_names
+	.dac_info = revo51_dac,
 };
 
 static struct snd_ak4xxx_private akm_revo51_priv __devinitdata = {
@@ -159,7 +173,25 @@
 	.data_mask = VT1724_REVO_CDOUT,
 	.clk_mask = VT1724_REVO_CCLK,
 	.cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
-	.cs_addr = 0,
+	.cs_addr = VT1724_REVO_CS1 | VT1724_REVO_CS2,
+	.cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
+	.add_flags = VT1724_REVO_CCLK, /* high at init */
+	.mask_flags = 0,
+};
+
+static struct snd_akm4xxx akm_revo51_adc __devinitdata = {
+	.type = SND_AK5365,
+	.num_adcs = 2,
+	.adc_info = revo51_adc,
+};
+
+static struct snd_ak4xxx_private akm_revo51_adc_priv __devinitdata = {
+	.caddr = 2,
+	.cif = 0,
+	.data_mask = VT1724_REVO_CDOUT,
+	.clk_mask = VT1724_REVO_CCLK,
+	.cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
+	.cs_addr = VT1724_REVO_CS0 | VT1724_REVO_CS2,
 	.cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1 | VT1724_REVO_CS2,
 	.add_flags = VT1724_REVO_CCLK, /* high at init */
 	.mask_flags = 0,
@@ -202,9 +234,13 @@
 		snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, VT1724_REVO_MUTE);
 		break;
 	case VT1724_SUBDEVICE_REVOLUTION51:
-		ice->akm_codecs = 1;
+		ice->akm_codecs = 2;
 		if ((err = snd_ice1712_akm4xxx_init(ak, &akm_revo51, &akm_revo51_priv, ice)) < 0)
 			return err;
+		err = snd_ice1712_akm4xxx_init(ak + 1, &akm_revo51_adc,
+					       &akm_revo51_adc_priv, ice);
+		if (err < 0)
+			return err;
 		/* unmute all codecs - needed! */
 		snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, VT1724_REVO_MUTE);
 		break;
diff --git a/sound/pci/ice1712/revo.h b/sound/pci/ice1712/revo.h
index dea52ea..efbb86e 100644
--- a/sound/pci/ice1712/revo.h
+++ b/sound/pci/ice1712/revo.h
@@ -42,7 +42,7 @@
 #define VT1724_REVO_CCLK	0x02
 #define VT1724_REVO_CDIN	0x04	/* not used */
 #define VT1724_REVO_CDOUT	0x08
-#define VT1724_REVO_CS0		0x10	/* not used */
+#define VT1724_REVO_CS0		0x10	/* AK5365 chipselect for Rev. 5.1 */
 #define VT1724_REVO_CS1		0x20	/* front AKM4381 chipselect */
 #define VT1724_REVO_CS2		0x40	/* surround AKM4355 chipselect */
 #define VT1724_REVO_MUTE	(1<<22)	/* 0 = all mute, 1 = normal operation */
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 6874263..72dbaed 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -2251,6 +2251,16 @@
 	/* ACLink on, 2 channels */
 	cnt = igetdword(chip, ICHREG(GLOB_CNT));
 	cnt &= ~(ICH_ACLINK | ICH_PCM_246_MASK);
+#ifdef CONFIG_SND_AC97_POWER_SAVE
+	/* do cold reset - the full ac97 powerdown may leave the controller
+	 * in a warm state but actually it cannot communicate with the codec.
+	 */
+	iputdword(chip, ICHREG(GLOB_CNT), cnt & ~ICH_AC97COLD);
+	cnt = igetdword(chip, ICHREG(GLOB_CNT));
+	udelay(10);
+	iputdword(chip, ICHREG(GLOB_CNT), cnt | ICH_AC97COLD);
+	msleep(1);
+#else
 	/* finish cold or do warm reset */
 	cnt |= (cnt & ICH_AC97COLD) == 0 ? ICH_AC97COLD : ICH_AC97WARM;
 	iputdword(chip, ICHREG(GLOB_CNT), cnt);
@@ -2265,6 +2275,7 @@
 	return -EIO;
 
       __ok:
+#endif
 	if (probing) {
 		/* wait for any codec ready status.
 		 * Once it becomes ready it should remain ready
@@ -2485,7 +2496,7 @@
 		    card->shortname, chip);
 	chip->irq = pci->irq;
 	synchronize_irq(chip->irq);
-	snd_intel8x0_chip_init(chip, 1);
+	snd_intel8x0_chip_init(chip, 0);
 
 	/* re-initialize mixer stuff */
 	if (chip->device_type == DEVICE_INTEL_ICH4) {
@@ -2615,6 +2626,7 @@
 		/* not 48000Hz, tuning the clock.. */
 		chip->ac97_bus->clock = (chip->ac97_bus->clock * 48000) / pos;
 	printk(KERN_INFO "intel8x0: clocking to %d\n", chip->ac97_bus->clock);
+	snd_ac97_update_power(chip->ac97[0], AC97_PCM_FRONT_DAC_RATE, 0);
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c
index 9185028..268e2f7 100644
--- a/sound/pci/intel8x0m.c
+++ b/sound/pci/intel8x0m.c
@@ -1045,6 +1045,8 @@
 	for (i = 0; i < chip->pcm_devs; i++)
 		snd_pcm_suspend_all(chip->pcm[i]);
 	snd_ac97_suspend(chip->ac97);
+	if (chip->irq >= 0)
+		free_irq(chip->irq, chip);
 	pci_disable_device(pci);
 	pci_save_state(pci);
 	return 0;
@@ -1058,6 +1060,9 @@
 	pci_restore_state(pci);
 	pci_enable_device(pci);
 	pci_set_master(pci);
+	request_irq(pci->irq, snd_intel8x0_interrupt, IRQF_DISABLED|IRQF_SHARED,
+		    card->shortname, chip);
+	chip->irq = pci->irq;
 	snd_intel8x0_chip_init(chip, 0);
 	snd_ac97_resume(chip->ac97);
 
diff --git a/sound/pci/mixart/mixart.c b/sound/pci/mixart/mixart.c
index cc43ecd..216aee5 100644
--- a/sound/pci/mixart/mixart.c
+++ b/sound/pci/mixart/mixart.c
@@ -1109,13 +1109,13 @@
 	offset = offset & ~3; /* 4 bytes aligned */
 
 	switch(orig) {
-	case 0:  /* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1:  /* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2:  /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = MIXART_BA0_SIZE + offset;
 		break;
 	default:
@@ -1135,13 +1135,13 @@
 	offset = offset & ~3; /* 4 bytes aligned */
 
 	switch(orig) {
-	case 0:  /* SEEK_SET */
+	case SEEK_SET:
 		file->f_pos = offset;
 		break;
-	case 1:  /* SEEK_CUR */
+	case SEEK_CUR:
 		file->f_pos += offset;
 		break;
-	case 2: /* SEEK_END, offset is negative */
+	case SEEK_END: /* offset is negative */
 		file->f_pos = MIXART_BA1_SIZE + offset;
 		break;
 	default:
diff --git a/sound/pci/mixart/mixart_mixer.c b/sound/pci/mixart/mixart_mixer.c
index ed47b73..13de0f7 100644
--- a/sound/pci/mixart/mixart_mixer.c
+++ b/sound/pci/mixart/mixart_mixer.c
@@ -31,6 +31,7 @@
 #include "mixart_core.h"
 #include "mixart_hwdep.h"
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include "mixart_mixer.h"
 
 static u32 mixart_analog_level[256] = {
@@ -388,12 +389,17 @@
 	return changed;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_analog, -9600, 50, 0);
+
 static struct snd_kcontrol_new mixart_control_analog_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	.info =		mixart_analog_vol_info,
 	.get =		mixart_analog_vol_get,
 	.put =		mixart_analog_vol_put,
+	.tlv = { .p = db_scale_analog },
 };
 
 /* shared */
@@ -866,14 +872,19 @@
 	return changed;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_digital, -10950, 50, 0);
+
 static struct snd_kcontrol_new snd_mixart_pcm_vol =
 {
 	.iface =        SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	/* count will be filled later */
 	.info =         mixart_digital_vol_info,		/* shared */
 	.get =          mixart_pcm_vol_get,
 	.put =          mixart_pcm_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 
@@ -984,10 +995,13 @@
 
 static struct snd_kcontrol_new mixart_control_monitor_vol = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =         "Monitoring Volume",
 	.info =		mixart_digital_vol_info,		/* shared */
 	.get =		mixart_monitor_vol_get,
 	.put =		mixart_monitor_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 /*
diff --git a/sound/pci/pcxhr/pcxhr_mixer.c b/sound/pci/pcxhr/pcxhr_mixer.c
index 94e63a1..b133ad9 100644
--- a/sound/pci/pcxhr/pcxhr_mixer.c
+++ b/sound/pci/pcxhr/pcxhr_mixer.c
@@ -31,6 +31,7 @@
 #include "pcxhr_hwdep.h"
 #include "pcxhr_core.h"
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/asoundef.h>
 #include "pcxhr_mixer.h"
 
@@ -43,6 +44,9 @@
 #define PCXHR_ANALOG_PLAYBACK_LEVEL_MAX  128	/*    0.0 dB */
 #define PCXHR_ANALOG_PLAYBACK_ZERO_LEVEL 104	/*  -24.0 dB ( 0.0 dB - fix level +24.0 dB ) */
 
+static DECLARE_TLV_DB_SCALE(db_scale_analog_capture, -9600, 50, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_analog_playback, -12800, 100, 0);
+
 static int pcxhr_update_analog_audio_level(struct snd_pcxhr *chip, int is_capture, int channel)
 {
 	int err, vol;
@@ -130,10 +134,13 @@
 
 static struct snd_kcontrol_new pcxhr_control_analog_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	.info =		pcxhr_analog_vol_info,
 	.get =		pcxhr_analog_vol_get,
 	.put =		pcxhr_analog_vol_put,
+	/* tlv will be filled later */
 };
 
 /* shared */
@@ -188,6 +195,7 @@
 #define PCXHR_DIGITAL_LEVEL_MAX		0x1ff	/* +18 dB */
 #define PCXHR_DIGITAL_ZERO_LEVEL	0x1b7	/*  0 dB */
 
+static DECLARE_TLV_DB_SCALE(db_scale_digital, -10950, 50, 0);
 
 #define MORE_THAN_ONE_STREAM_LEVEL	0x000001
 #define VALID_STREAM_PAN_LEVEL_MASK	0x800000
@@ -343,11 +351,14 @@
 static struct snd_kcontrol_new snd_pcxhr_pcm_vol =
 {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	/* name will be filled later */
 	/* count will be filled later */
 	.info =		pcxhr_digital_vol_info,		/* shared */
 	.get =		pcxhr_pcm_vol_get,
 	.put =		pcxhr_pcm_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 
@@ -433,10 +444,13 @@
 
 static struct snd_kcontrol_new pcxhr_control_monitor_vol = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =         "Monitoring Volume",
 	.info =		pcxhr_digital_vol_info,		/* shared */
 	.get =		pcxhr_monitor_vol_get,
 	.put =		pcxhr_monitor_vol_put,
+	.tlv = { .p = db_scale_digital },
 };
 
 /*
@@ -928,6 +942,7 @@
 			temp = pcxhr_control_analog_level;
 			temp.name = "Master Playback Volume";
 			temp.private_value = 0; /* playback */
+			temp.tlv.p = db_scale_analog_playback;
 			if ((err = snd_ctl_add(chip->card, snd_ctl_new1(&temp, chip))) < 0)
 				return err;
 			/* output mute controls */
@@ -963,6 +978,7 @@
 			temp = pcxhr_control_analog_level;
 			temp.name = "Master Capture Volume";
 			temp.private_value = 1; /* capture */
+			temp.tlv.p = db_scale_analog_capture;
 			if ((err = snd_ctl_add(chip->card, snd_ctl_new1(&temp, chip))) < 0)
 				return err;
 
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index f435fcd..fe210c8 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -673,9 +673,13 @@
 #define FIRMWARE_VERSIONS 1
 static union firmware_version firmware_versions[] = {
 	{
-	 .firmware.ASIC = 3,.firmware.CODEC = 2,
-	 .firmware.AUXDSP = 3,.firmware.PROG = 773,
-	 },
+		.firmware = {
+			.ASIC = 3,
+			.CODEC = 2,
+			.AUXDSP = 3,
+			.PROG = 773,
+		},
+	},
 };
 
 static u32 atoh(unsigned char *in, unsigned int len)
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index e5a52da..d3e07de 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -726,22 +726,36 @@
 }
 
 
-static int hdsp_check_for_firmware (struct hdsp *hdsp, int show_err)
+#ifdef HDSP_FW_LOADER
+static int __devinit hdsp_request_fw_loader(struct hdsp *hdsp);
+#endif
+
+static int hdsp_check_for_firmware (struct hdsp *hdsp, int load_on_demand)
 {
-	if (hdsp->io_type == H9652 || hdsp->io_type == H9632) return 0;
+	if (hdsp->io_type == H9652 || hdsp->io_type == H9632)
+		return 0;
 	if ((hdsp_read (hdsp, HDSP_statusRegister) & HDSP_DllError) != 0) {
-		snd_printk(KERN_ERR "Hammerfall-DSP: firmware not present.\n");
 		hdsp->state &= ~HDSP_FirmwareLoaded;
-		if (! show_err)
+		if (! load_on_demand)
 			return -EIO;
+		snd_printk(KERN_ERR "Hammerfall-DSP: firmware not present.\n");
 		/* try to load firmware */
-		if (hdsp->state & HDSP_FirmwareCached) {
-			if (snd_hdsp_load_firmware_from_cache(hdsp) != 0)
-				snd_printk(KERN_ERR "Hammerfall-DSP: Firmware loading from cache failed, please upload manually.\n");
-		} else {
-			snd_printk(KERN_ERR "Hammerfall-DSP: No firmware loaded nor cached, please upload firmware.\n");
+		if (! (hdsp->state & HDSP_FirmwareCached)) {
+#ifdef HDSP_FW_LOADER
+			if (! hdsp_request_fw_loader(hdsp))
+				return 0;
+#endif
+			snd_printk(KERN_ERR
+				   "Hammerfall-DSP: No firmware loaded nor "
+				   "cached, please upload firmware.\n");
+			return -EIO;
 		}
-		return -EIO;
+		if (snd_hdsp_load_firmware_from_cache(hdsp) != 0) {
+			snd_printk(KERN_ERR
+				   "Hammerfall-DSP: Firmware loading from "
+				   "cache failed, please upload manually.\n");
+			return -EIO;
+		}
 	}
 	return 0;
 }
@@ -3181,8 +3195,16 @@
 				return;
 			}
 		} else {
-			snd_iprintf(buffer, "No firmware loaded nor cached, please upload firmware.\n");
-			return;
+			int err = -EINVAL;
+#ifdef HDSP_FW_LOADER
+			err = hdsp_request_fw_loader(hdsp);
+#endif
+			if (err < 0) {
+				snd_iprintf(buffer,
+					    "No firmware loaded nor cached, "
+					    "please upload firmware.\n");
+				return;
+			}
 		}
 	}
 	
@@ -3851,7 +3873,7 @@
 	if (hdsp_check_for_iobox (hdsp))
 		return -EIO;
 
-	if (hdsp_check_for_firmware(hdsp, 1))
+	if (hdsp_check_for_firmware(hdsp, 0)) /* no auto-loading in trigger */
 		return -EIO;
 
 	spin_lock(&hdsp->lock);
diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c
index 4930cc6..ebbe12d 100644
--- a/sound/pci/trident/trident_main.c
+++ b/sound/pci/trident/trident_main.c
@@ -40,6 +40,7 @@
 #include <sound/core.h>
 #include <sound/info.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <sound/trident.h>
 #include <sound/asoundef.h>
 
@@ -2627,6 +2628,8 @@
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_gvol, -6375, 25, 0);
+
 static int snd_trident_vol_control_put(struct snd_kcontrol *kcontrol,
 				       struct snd_ctl_elem_value *ucontrol)
 {
@@ -2653,6 +2656,7 @@
 	.get =		snd_trident_vol_control_get,
 	.put =		snd_trident_vol_control_put,
 	.private_value = 16,
+	.tlv = { .p = db_scale_gvol },
 };
 
 static struct snd_kcontrol_new snd_trident_vol_wave_control __devinitdata =
@@ -2663,6 +2667,7 @@
 	.get =		snd_trident_vol_control_get,
 	.put =		snd_trident_vol_control_put,
 	.private_value = 0,
+	.tlv = { .p = db_scale_gvol },
 };
 
 /*---------------------------------------------------------------------------
@@ -2730,6 +2735,7 @@
 	.info =		snd_trident_pcm_vol_control_info,
 	.get =		snd_trident_pcm_vol_control_get,
 	.put =		snd_trident_pcm_vol_control_put,
+	/* FIXME: no tlv yet */
 };
 
 /*---------------------------------------------------------------------------
@@ -2839,6 +2845,8 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_crvol, -3175, 25, 1);
+
 static struct snd_kcontrol_new snd_trident_pcm_rvol_control __devinitdata =
 {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
@@ -2848,6 +2856,7 @@
 	.info =		snd_trident_pcm_rvol_control_info,
 	.get =		snd_trident_pcm_rvol_control_get,
 	.put =		snd_trident_pcm_rvol_control_put,
+	.tlv = { .p = db_scale_crvol },
 };
 
 /*---------------------------------------------------------------------------
@@ -2903,6 +2912,7 @@
 	.info =		snd_trident_pcm_cvol_control_info,
 	.get =		snd_trident_pcm_cvol_control_get,
 	.put =		snd_trident_pcm_cvol_control_put,
+	.tlv = { .p = db_scale_crvol },
 };
 
 static void snd_trident_notify_pcm_change1(struct snd_card *card,
diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c
index 08da923..6db3d4c 100644
--- a/sound/pci/via82xx.c
+++ b/sound/pci/via82xx.c
@@ -59,6 +59,7 @@
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 #include <sound/ac97_codec.h>
 #include <sound/mpu401.h>
 #include <sound/initval.h>
@@ -1277,7 +1278,18 @@
 	if (! ratep->used)
 		ratep->rate = 0;
 	spin_unlock_irq(&ratep->lock);
-
+	if (! ratep->rate) {
+		if (! viadev->direction) {
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_FRONT_DAC_RATE, 0);
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_SURR_DAC_RATE, 0);
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_LFE_DAC_RATE, 0);
+		} else
+			snd_ac97_update_power(chip->ac97,
+					      AC97_PCM_LR_ADC_RATE, 0);
+	}
 	viadev->substream = NULL;
 	return 0;
 }
@@ -1687,21 +1699,29 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_dxs, -9450, 150, 1);
+
 static struct snd_kcontrol_new snd_via8233_pcmdxs_volume_control __devinitdata = {
 	.name = "PCM Playback Volume",
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.info = snd_via8233_dxs_volume_info,
 	.get = snd_via8233_pcmdxs_volume_get,
 	.put = snd_via8233_pcmdxs_volume_put,
+	.tlv = { .p = db_scale_dxs }
 };
 
 static struct snd_kcontrol_new snd_via8233_dxs_volume_control __devinitdata = {
 	.name = "VIA DXS Playback Volume",
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+		   SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.count = 4,
 	.info = snd_via8233_dxs_volume_info,
 	.get = snd_via8233_dxs_volume_get,
 	.put = snd_via8233_dxs_volume_put,
+	.tlv = { .p = db_scale_dxs }
 };
 
 /*
@@ -2393,6 +2413,7 @@
 		{ .subvendor = 0x16f3, .subdevice = 0x6405, .action = VIA_DXS_SRC }, /* Jetway K8M8MS */
 		{ .subvendor = 0x1734, .subdevice = 0x1078, .action = VIA_DXS_SRC }, /* FSC Amilo L7300 */
 		{ .subvendor = 0x1734, .subdevice = 0x1093, .action = VIA_DXS_SRC }, /* FSC */
+		{ .subvendor = 0x1734, .subdevice = 0x10ab, .action = VIA_DXS_SRC }, /* FSC */
 		{ .subvendor = 0x1849, .subdevice = 0x3059, .action = VIA_DXS_NO_VRA }, /* ASRock K7VM2 */
 		{ .subvendor = 0x1849, .subdevice = 0x9739, .action = VIA_DXS_SRC }, /* ASRock mobo(?) */
 		{ .subvendor = 0x1849, .subdevice = 0x9761, .action = VIA_DXS_SRC }, /* ASRock mobo(?) */
diff --git a/sound/pci/vx222/vx222.c b/sound/pci/vx222/vx222.c
index 9c03c6b..e7cd8ac 100644
--- a/sound/pci/vx222/vx222.c
+++ b/sound/pci/vx222/vx222.c
@@ -26,6 +26,7 @@
 #include <linux/moduleparam.h>
 #include <sound/core.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 #include "vx222.h"
 
 #define CARD_NAME "VX222"
@@ -72,6 +73,9 @@
 /*
  */
 
+static DECLARE_TLV_DB_SCALE(db_scale_old_vol, -11350, 50, 0);
+static DECLARE_TLV_DB_SCALE(db_scale_akm, -7350, 50, 0);
+
 static struct snd_vx_hardware vx222_old_hw = {
 
 	.name = "VX222/Old",
@@ -81,6 +85,7 @@
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX_ANALOG_OUT_LEVEL_MAX,
+	.output_level_db_scale = db_scale_old_vol,
 };
 
 static struct snd_vx_hardware vx222_v2_hw = {
@@ -92,6 +97,7 @@
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX2_AKM_LEVEL_MAX,
+	.output_level_db_scale = db_scale_akm,
 };
 
 static struct snd_vx_hardware vx222_mic_hw = {
@@ -103,6 +109,7 @@
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX2_AKM_LEVEL_MAX,
+	.output_level_db_scale = db_scale_akm,
 };
 
 
diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c
index 9b6d345..5e51950 100644
--- a/sound/pci/vx222/vx222_ops.c
+++ b/sound/pci/vx222/vx222_ops.c
@@ -28,6 +28,7 @@
 
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include <asm/io.h>
 #include "vx222.h"
 
@@ -845,6 +846,8 @@
 
 #define MIC_LEVEL_MAX	0xff
 
+static DECLARE_TLV_DB_SCALE(db_scale_mic, -6450, 50, 0);
+
 /*
  * controls API for input levels
  */
@@ -922,18 +925,24 @@
 
 static struct snd_kcontrol_new vx_control_input_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Capture Volume",
 	.info =		vx_input_level_info,
 	.get =		vx_input_level_get,
 	.put =		vx_input_level_put,
+	.tlv = { .p = db_scale_mic },
 };
 
 static struct snd_kcontrol_new vx_control_mic_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Mic Capture Volume",
 	.info =		vx_mic_level_info,
 	.get =		vx_mic_level_get,
 	.put =		vx_mic_level_put,
+	.tlv = { .p = db_scale_mic },
 };
 
 /*
diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index a55b5fd..24f6fc5 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -36,6 +36,7 @@
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 #include <sound/ymfpci.h>
 #include <sound/asoundef.h>
 #include <sound/mpu401.h>
@@ -1477,11 +1478,15 @@
 	return change;
 }
 
+static DECLARE_TLV_DB_LINEAR(db_scale_native, TLV_DB_GAIN_MUTE, 0);
+
 #define YMFPCI_DOUBLE(xname, xindex, reg) \
 { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = xindex, \
+  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, \
   .info = snd_ymfpci_info_double, \
   .get = snd_ymfpci_get_double, .put = snd_ymfpci_put_double, \
-  .private_value = reg }
+  .private_value = reg, \
+  .tlv = { .p = db_scale_native } }
 
 static int snd_ymfpci_info_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c
index 1c09e5f..fd3590f 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c
@@ -206,7 +206,7 @@
 		snd_pdacf_powerdown(chip);
 	chip->chip_status |= PDAUDIOCF_STAT_IS_STALE; /* to be sure */
 	snd_card_disconnect(chip->card);
-	snd_card_free_in_thread(chip->card);
+	snd_card_free_when_closed(chip->card);
 }
 
 /*
diff --git a/sound/pcmcia/vx/vxp_mixer.c b/sound/pcmcia/vx/vxp_mixer.c
index e237f6c..bced7b6 100644
--- a/sound/pcmcia/vx/vxp_mixer.c
+++ b/sound/pcmcia/vx/vxp_mixer.c
@@ -23,6 +23,7 @@
 #include <sound/driver.h>
 #include <sound/core.h>
 #include <sound/control.h>
+#include <sound/tlv.h>
 #include "vxpocket.h"
 
 #define MIC_LEVEL_MIN	0
@@ -63,12 +64,17 @@
 	return 0;
 }
 
+static DECLARE_TLV_DB_SCALE(db_scale_mic, -21, 3, 0);
+
 static struct snd_kcontrol_new vx_control_mic_level = {
 	.iface =	SNDRV_CTL_ELEM_IFACE_MIXER,
+	.access =	(SNDRV_CTL_ELEM_ACCESS_READWRITE |
+			 SNDRV_CTL_ELEM_ACCESS_TLV_READ),
 	.name =		"Mic Capture Volume",
 	.info =		vx_mic_level_info,
 	.get =		vx_mic_level_get,
 	.put =		vx_mic_level_put,
+	.tlv = { .p = db_scale_mic },
 };
 
 /*
diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c
index cafe664..3089fcc 100644
--- a/sound/pcmcia/vx/vxpocket.c
+++ b/sound/pcmcia/vx/vxpocket.c
@@ -27,6 +27,7 @@
 #include <pcmcia/ciscode.h>
 #include <pcmcia/cisreg.h>
 #include <sound/initval.h>
+#include <sound/tlv.h>
 
 /*
  */
@@ -65,7 +66,7 @@
 }
 
 /*
- * destructor, called from snd_card_free_in_thread()
+ * destructor, called from snd_card_free_when_closed()
  */
 static int snd_vxpocket_dev_free(struct snd_device *device)
 {
@@ -90,6 +91,8 @@
  * Only output levels can be modified
  */
 
+static DECLARE_TLV_DB_SCALE(db_scale_old_vol, -11350, 50, 0);
+
 static struct snd_vx_hardware vxpocket_hw = {
 	.name = "VXPocket",
 	.type = VX_TYPE_VXPOCKET,
@@ -99,6 +102,7 @@
 	.num_ins = 1,
 	.num_outs = 1,
 	.output_level_max = VX_ANALOG_OUT_LEVEL_MAX,
+	.output_level_db_scale = db_scale_old_vol,
 };	
 
 /* VX-pocket 440
@@ -120,6 +124,7 @@
 	.num_ins = 2,
 	.num_outs = 2,
 	.output_level_max = VX_ANALOG_OUT_LEVEL_MAX,
+	.output_level_db_scale = db_scale_old_vol,
 };	
 
 
@@ -363,7 +368,7 @@
 	chip->chip_status |= VX_STAT_IS_STALE; /* to be sure */
 	snd_card_disconnect(chip->card);
 	vxpocket_release(link);
-	snd_card_free_in_thread(chip->card);
+	snd_card_free_when_closed(chip->card);
 }
 
 /*
diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index 5fec1e5..5f38f67 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -215,15 +215,18 @@
 {
 	struct pmac_beep *beep;
 	struct input_dev *input_dev;
+	struct snd_kcontrol *beep_ctl;
 	void *dmabuf;
 	int err = -ENOMEM;
 
 	beep = kzalloc(sizeof(*beep), GFP_KERNEL);
+	if (! beep)
+		return -ENOMEM;
 	dmabuf = dma_alloc_coherent(&chip->pdev->dev, BEEP_BUFLEN * 4,
 				    &beep->addr, GFP_KERNEL);
 	input_dev = input_allocate_device();
-	if (!beep || !dmabuf || !input_dev)
-		goto fail;
+	if (! dmabuf || ! input_dev)
+		goto fail1;
 
 	/* FIXME: set more better values */
 	input_dev->name = "PowerMac Beep";
@@ -244,17 +247,24 @@
 	beep->volume = BEEP_VOLUME;
 	beep->running = 0;
 
-	err = snd_ctl_add(chip->card, snd_ctl_new1(&snd_pmac_beep_mixer, chip));
+	beep_ctl = snd_ctl_new1(&snd_pmac_beep_mixer, chip);
+	err = snd_ctl_add(chip->card, beep_ctl);
 	if (err < 0)
-		goto fail;
+		goto fail1;
+ 
+ 	chip->beep = beep;
 
-	chip->beep = beep;
-	input_register_device(beep->dev);
-
-	return 0;
-
- fail:	input_free_device(input_dev);
-	kfree(dmabuf);
+	err = input_register_device(beep->dev);
+	if (err)
+		goto fail2;
+ 
+ 	return 0;
+ 
+ fail2:	snd_ctl_remove(chip->card, beep_ctl);
+ fail1:	input_free_device(input_dev);
+	if (dmabuf)
+		dma_free_coherent(&chip->pdev->dev, BEEP_BUFLEN * 4,
+				  dmabuf, beep->addr);
 	kfree(beep);
 	return err;
 }
diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
index 59482a4..272ae38 100644
--- a/sound/ppc/keywest.c
+++ b/sound/ppc/keywest.c
@@ -117,6 +117,9 @@
 {
 	int err;
 	
+	if (!keywest_ctx || !keywest_ctx->client)
+		return -ENXIO;
+
 	if ((err = keywest_ctx->init_client(keywest_ctx)) < 0) {
 		snd_printk(KERN_ERR "tumbler: %i :cannot initialize the MCS\n", err);
 		return err;
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 84f6b19..cdff53e 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -190,7 +190,7 @@
 
 	ret = do_gpio_read(gp);
 
-	return (ret & 0xd) == (gp->active_val & 0xd);
+	return (ret & 0x1) == (gp->active_val & 0x1);
 }
 
 static int read_audio_gpio(struct pmac_gpio *gp)
@@ -198,7 +198,8 @@
 	int ret;
 	if (! gp->addr)
 		return 0;
-	ret = ((do_gpio_read(gp) & 0x02) !=0);
+	ret = do_gpio_read(gp);
+	ret = (ret & 0x02) !=0;
 	return ret == gp->active_state;
 }
 
@@ -1035,7 +1036,7 @@
 		return NULL;
   
 	for (np = np->child; np; np = np->sibling) {
-		char *property = get_property(np, "audio-gpio", NULL);
+		const char *property = get_property(np, "audio-gpio", NULL);
 		if (property && strcmp(property, name) == 0)
 			return np;
 	}  
@@ -1062,7 +1063,8 @@
 				struct pmac_gpio *gp, int is_compatible)
 {
 	struct device_node *node;
-	u32 *base, addr;
+	const u32 *base;
+	u32 addr;
 
 	if (is_compatible)
 		node = find_compatible_audio_device(device);
@@ -1074,9 +1076,9 @@
 		return -ENODEV;
 	}
 
-	base = (u32 *)get_property(node, "AAPL,address", NULL);
+	base = get_property(node, "AAPL,address", NULL);
 	if (! base) {
-		base = (u32 *)get_property(node, "reg", NULL);
+		base = get_property(node, "reg", NULL);
 		if (!base) {
 			DBG("(E) cannot find address for device %s !\n", device);
 			snd_printd("cannot find address for device %s\n", device);
@@ -1090,13 +1092,13 @@
 
 	gp->addr = addr & 0x0000ffff;
 	/* Try to find the active state, default to 0 ! */
-	base = (u32 *)get_property(node, "audio-gpio-active-state", NULL);
+	base = get_property(node, "audio-gpio-active-state", NULL);
 	if (base) {
 		gp->active_state = *base;
 		gp->active_val = (*base) ? 0x5 : 0x4;
 		gp->inactive_val = (*base) ? 0x4 : 0x5;
 	} else {
-		u32 *prop = NULL;
+		const u32 *prop = NULL;
 		gp->active_state = 0;
 		gp->active_val = 0x4;
 		gp->inactive_val = 0x5;
@@ -1105,7 +1107,7 @@
 		 * as we don't yet have an interpreter for these things
 		 */
 		if (platform)
-			prop = (u32 *)get_property(node, platform, NULL);
+			prop = get_property(node, platform, NULL);
 		if (prop) {
 			if (prop[3] == 0x9 && prop[4] == 0x9) {
 				gp->active_val = 0xd;
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index f3ae6e2..e4935fc 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2,6 +2,8 @@
  * Driver for DBRI sound chip found on Sparcs.
  * Copyright (C) 2004, 2005 Martin Habets (mhabets@users.sourceforge.net)
  *
+ * Converted to ring buffered version by Krzysztof Helt (krzysztof.h1@wp.pl)
+ *
  * Based entirely upon drivers/sbus/audio/dbri.c which is:
  * Copyright (C) 1997 Rudolf Koenig (rfkoenig@immd4.informatik.uni-erlangen.de)
  * Copyright (C) 1998, 1999 Brent Baccala (baccala@freesoft.org)
@@ -34,7 +36,7 @@
  * (the second one is a monitor/tee pipe, valid only for serial input).
  *
  * The mmcodec is connected via the CHI bus and needs the data & some
- * parameters (volume, balance, output selection) timemultiplexed in 8 byte
+ * parameters (volume, output selection) timemultiplexed in 8 byte
  * chunks. It also has a control mode, which serves for audio format setting.
  *
  * Looking at the CS4215 data sheet it is easy to set up 2 or 4 codecs on
@@ -83,7 +85,7 @@
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable Sun DBRI soundcard.");
 
-#define DBRI_DEBUG
+#undef DBRI_DEBUG
 
 #define D_INT	(1<<0)
 #define D_GEN	(1<<1)
@@ -104,17 +106,15 @@
 
 #define dprintk(a, x...) if(dbri_debug & a) printk(KERN_DEBUG x)
 
-#define DBRI_CMD(cmd, intr, value) ((cmd << 28) |			\
-				    (1 << 27) | \
-				    value)
 #else
-#define dprintk(a, x...)
+#define dprintk(a, x...) do { } while (0)
 
-#define DBRI_CMD(cmd, intr, value) ((cmd << 28) |			\
-				    (intr << 27) | \
-				    value)
 #endif				/* DBRI_DEBUG */
 
+#define DBRI_CMD(cmd, intr, value) ((cmd << 28) |	\
+				    (intr << 27) |	\
+				    value)
+
 /***************************************************************************
 	CS4215 specific definitions and structures
 ****************************************************************************/
@@ -160,7 +160,7 @@
      /* {    NA, (1 << 4), (5 << 3) }, */
 	{ 48000, (1 << 4), (6 << 3) },
 	{  9600, (1 << 4), (7 << 3) },
-	{  5513, (2 << 4), (0 << 3) },	/* Actually 5512.5 */
+	{  5512, (2 << 4), (0 << 3) },	/* Actually 5512.5 */
 	{ 11025, (2 << 4), (1 << 3) },
 	{ 18900, (2 << 4), (2 << 3) },
 	{ 22050, (2 << 4), (3 << 3) },
@@ -240,28 +240,21 @@
 #define REG9	0x24UL		/* Interrupt Queue Pointer */
 
 #define DBRI_NO_CMDS	64
-#define DBRI_NO_INTS	1	/* Note: the value of this define was
-				 * originally 2.  The ringbuffer to store
-				 * interrupts in dma is currently broken.
-				 * This is a temporary fix until the ringbuffer
-				 * is fixed.
-				 */
 #define DBRI_INT_BLK	64
 #define DBRI_NO_DESCS	64
 #define DBRI_NO_PIPES	32
-
-#define DBRI_MM_ONB	1
-#define DBRI_MM_SB	2
+#define DBRI_MAX_PIPE	(DBRI_NO_PIPES - 1)
 
 #define DBRI_REC	0
 #define DBRI_PLAY	1
 #define DBRI_NO_STREAMS	2
 
 /* One transmit/receive descriptor */
+/* When ba != 0 descriptor is used */
 struct dbri_mem {
 	volatile __u32 word1;
-	volatile __u32 ba;	/* Transmit/Receive Buffer Address */
-	volatile __u32 nda;	/* Next Descriptor Address */
+	__u32 ba;	/* Transmit/Receive Buffer Address */
+	__u32 nda;	/* Next Descriptor Address */
 	volatile __u32 word4;
 };
 
@@ -269,8 +262,8 @@
  * the CPU and the DBRI
  */
 struct dbri_dma {
-	volatile s32 cmd[DBRI_NO_CMDS];	/* Place for commands       */
-	volatile s32 intr[DBRI_NO_INTS * DBRI_INT_BLK];	/* Interrupt field  */
+	s32 cmd[DBRI_NO_CMDS];			/* Place for commands */
+	volatile s32 intr[DBRI_INT_BLK];	/* Interrupt field  */
 	struct dbri_mem desc[DBRI_NO_DESCS];	/* Xmit/receive descriptors */
 };
 
@@ -282,58 +275,43 @@
 
 struct dbri_pipe {
 	u32 sdp;		/* SDP command word */
-	enum in_or_out direction;
 	int nextpipe;		/* Next pipe in linked list */
-	int prevpipe;
-	int cycle;		/* Offset of timeslot (bits) */
 	int length;		/* Length of timeslot (bits) */
 	int first_desc;		/* Index of first descriptor */
 	int desc;		/* Index of active descriptor */
 	volatile __u32 *recv_fixed_ptr;	/* Ptr to receive fixed data */
 };
 
-struct dbri_desc {
-	int inuse;		/* Boolean flag */
-	int next;		/* Index of next desc, or -1 */
-	unsigned int len;
-};
-
 /* Per stream (playback or record) information */
 struct dbri_streaminfo {
 	struct snd_pcm_substream *substream;
 	u32 dvma_buffer;	/* Device view of Alsa DMA buffer */
-	int left;		/* # of bytes left in DMA buffer  */
 	int size;		/* Size of DMA buffer             */
 	size_t offset;		/* offset in user buffer          */
 	int pipe;		/* Data pipe used                 */
 	int left_gain;		/* mixer elements                 */
 	int right_gain;
-	int balance;
 };
 
 /* This structure holds the information for both chips (DBRI & CS4215) */
 struct snd_dbri {
 	struct snd_card *card;	/* ALSA card */
-	struct snd_pcm *pcm;
 
 	int regs_size, irq;	/* Needed for unload */
 	struct sbus_dev *sdev;	/* SBUS device info */
 	spinlock_t lock;
 
-	volatile struct dbri_dma *dma;	/* Pointer to our DMA block */
+	struct dbri_dma *dma;	/* Pointer to our DMA block */
 	u32 dma_dvma;		/* DBRI visible DMA address */
 
 	void __iomem *regs;	/* dbri HW regs */
-	int dbri_version;	/* 'e' and up is OK */
 	int dbri_irqp;		/* intr queue pointer */
-	int wait_send;		/* sequence of command buffers send */
-	int wait_ackd;		/* sequence of command buffers acknowledged */
 
 	struct dbri_pipe pipes[DBRI_NO_PIPES];	/* DBRI's 32 data pipes */
-	struct dbri_desc descs[DBRI_NO_DESCS];
+	int next_desc[DBRI_NO_DESCS];		/* Index of next desc, or -1 */
+	spinlock_t cmdlock;	/* Protects cmd queue accesses */
+	s32 *cmdptr;		/* Pointer to the last queued cmd */
 
-	int chi_in_pipe;
-	int chi_out_pipe;
 	int chi_bpf;
 
 	struct cs4215 mm;	/* mmcodec special info */
@@ -345,8 +323,6 @@
 
 #define DBRI_MAX_VOLUME		63	/* Output volume */
 #define DBRI_MAX_GAIN		15	/* Input gain */
-#define DBRI_RIGHT_BALANCE	255
-#define DBRI_MID_BALANCE	(DBRI_RIGHT_BALANCE >> 1)
 
 /* DBRI Reg0 - Status Control Register - defines. (Page 17) */
 #define D_P		(1<<15)	/* Program command & queue pointer valid */
@@ -569,7 +545,7 @@
 #define DBRI_TD_TBC	(1<<0)	/* Transmit buffer Complete */
 #define DBRI_TD_STATUS(v)       ((v)&0xff)	/* Transmit status */
 			/* Maximum buffer size per TD: almost 8Kb */
-#define DBRI_TD_MAXCNT	((1 << 13) - 1)
+#define DBRI_TD_MAXCNT	((1 << 13) - 4)
 
 /* Receive descriptor defines */
 #define DBRI_RD_F	(1<<31)	/* End of Frame */
@@ -633,93 +609,124 @@
 CPU interrupt to signal completion.
 
 Since the DBRI can run in parallel with the CPU, several means of
-synchronization present themselves.  The method implemented here is close
-to the original scheme (Rudolf's), and uses 2 counters (wait_send and
-wait_ackd) to synchronize the command buffer between the CPU and the DBRI.
+synchronization present themselves. The method implemented here is only
+use of the dbri_cmdwait() to wait for execution of batch of sent commands.
 
-A more sophisticated scheme might involve a circular command buffer
-or an array of command buffers.  A routine could fill one with
-commands and link it onto a list.  When a interrupt signaled
-completion of the current command buffer, look on the list for
-the next one.
+A circular command buffer is used here. A new command is being added 
+while another can be executed. The scheme works by adding two WAIT commands
+after each sent batch of commands. When the next batch is prepared it is
+added after the WAIT commands then the WAITs are replaced with single JUMP
+command to the new batch. The the DBRI is forced to reread the last WAIT 
+command (replaced by the JUMP by then). If the DBRI is still executing 
+previous commands the request to reread the WAIT command is ignored.
 
 Every time a routine wants to write commands to the DBRI, it must
-first call dbri_cmdlock() and get an initial pointer into dbri->dma->cmd
-in return. dbri_cmdlock() will block if the previous commands have not
-been completed yet. After this the commands can be written to the buffer,
-and dbri_cmdsend() is called with the final pointer value to send them
-to the DBRI.
+first call dbri_cmdlock() and get pointer to a free space in 
+dbri->dma->cmd buffer. After this, the commands can be written to 
+the buffer, and dbri_cmdsend() is called with the final pointer value 
+to send them to the DBRI.
 
 */
 
-static void dbri_process_interrupt_buffer(struct snd_dbri * dbri);
-
-enum dbri_lock { NoGetLock, GetLock };
-#define MAXLOOPS 10
-
-static volatile s32 *dbri_cmdlock(struct snd_dbri * dbri, enum dbri_lock get)
+#define MAXLOOPS 20
+/*
+ * Wait for the current command string to execute
+ */
+static void dbri_cmdwait(struct snd_dbri *dbri)
 {
 	int maxloops = MAXLOOPS;
-
-#ifndef SMP
-	if ((get == GetLock) && spin_is_locked(&dbri->lock)) {
-		printk(KERN_ERR "DBRI: cmdlock called while in spinlock.");
-	}
-#endif
+	unsigned long flags;
 
 	/* Delay if previous commands are still being processed */
-	while ((--maxloops) > 0 && (dbri->wait_send != dbri->wait_ackd)) {
+	spin_lock_irqsave(&dbri->lock, flags);
+	while ((--maxloops) > 0 && (sbus_readl(dbri->regs + REG0) & D_P)) {
+		spin_unlock_irqrestore(&dbri->lock, flags);
 		msleep_interruptible(1);
-		/* If dbri_cmdlock() got called from inside the
-		 * interrupt handler, this will do the processing.
-		 */
-		dbri_process_interrupt_buffer(dbri);
+		spin_lock_irqsave(&dbri->lock, flags);
 	}
+	spin_unlock_irqrestore(&dbri->lock, flags);
+
 	if (maxloops == 0) {
-		printk(KERN_ERR "DBRI: Chip never completed command buffer %d\n",
-			dbri->wait_send);
+		printk(KERN_ERR "DBRI: Chip never completed command buffer\n");
 	} else {
 		dprintk(D_CMD, "Chip completed command buffer (%d)\n",
 			MAXLOOPS - maxloops - 1);
 	}
+}
+/*
+ * Lock the command queue and returns pointer to a space for len cmd words
+ * It locks the cmdlock spinlock.
+ */
+static s32 *dbri_cmdlock(struct snd_dbri * dbri, int len)
+{
+	/* Space for 2 WAIT cmds (replaced later by 1 JUMP cmd) */
+	len += 2;
+	spin_lock(&dbri->cmdlock);
+	if (dbri->cmdptr - dbri->dma->cmd + len < DBRI_NO_CMDS - 2)
+		return dbri->cmdptr + 2;
+	else if (len < sbus_readl(dbri->regs + REG8) - dbri->dma_dvma)
+		return dbri->dma->cmd;
+	else
+		printk(KERN_ERR "DBRI: no space for commands.");
 
-	/*if (get == GetLock) spin_lock(&dbri->lock); */
-	return &dbri->dma->cmd[0];
+	return 0;
 }
 
-static void dbri_cmdsend(struct snd_dbri * dbri, volatile s32 * cmd)
+/*
+ * Send prepared cmd string. It works by writting a JUMP cmd into
+ * the last WAIT cmd and force DBRI to reread the cmd.
+ * The JUMP cmd points to the new cmd string.
+ * It also releases the cmdlock spinlock.
+ *
+ * Lock must not be held before calling this.
+ */
+static void dbri_cmdsend(struct snd_dbri * dbri, s32 * cmd,int len)
 {
-	volatile s32 *ptr;
-	u32	reg;
+	s32 tmp, addr;
+	static int wait_id = 0;
 
-	for (ptr = &dbri->dma->cmd[0]; ptr < cmd; ptr++) {
+	wait_id++;
+	wait_id &= 0xffff;	/* restrict it to a 16 bit counter. */
+	*(cmd) = DBRI_CMD(D_WAIT, 1, wait_id);
+	*(cmd+1) = DBRI_CMD(D_WAIT, 1, wait_id);
+
+	/* Replace the last command with JUMP */
+	addr = dbri->dma_dvma + (cmd - len - dbri->dma->cmd) * sizeof(s32);
+	*(dbri->cmdptr+1) = addr;
+	*(dbri->cmdptr) = DBRI_CMD(D_JUMP, 0, 0);
+
+#ifdef DBRI_DEBUG
+	if (cmd > dbri->cmdptr) {
+		s32 *ptr;
+
+		for (ptr = dbri->cmdptr; ptr < cmd+2; ptr++)
+			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+	} else {
+		s32 *ptr = dbri->cmdptr;
+
 		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		ptr++;
+		dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		for (ptr = dbri->dma->cmd; ptr < cmd+2; ptr++) {
+			dprintk(D_CMD, "cmd: %lx:%08x\n", (unsigned long)ptr, *ptr);
+		}
 	}
+#endif
 
-	if ((cmd - &dbri->dma->cmd[0]) >= DBRI_NO_CMDS - 1) {
-		printk(KERN_ERR "DBRI: Command buffer overflow! (bug in driver)\n");
-		/* Ignore the last part. */
-		cmd = &dbri->dma->cmd[DBRI_NO_CMDS - 3];
-	}
+	/* Reread the last command */
+	tmp = sbus_readl(dbri->regs + REG0);
+	tmp |= D_P;
+	sbus_writel(tmp, dbri->regs + REG0);
 
-	dbri->wait_send++;
-	dbri->wait_send &= 0xffff;	/* restrict it to a 16 bit counter. */
-	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
-	*(cmd++) = DBRI_CMD(D_WAIT, 1, dbri->wait_send);
-
-	/* Set command pointer and signal it is valid. */
-	sbus_writel(dbri->dma_dvma, dbri->regs + REG8);
-	reg = sbus_readl(dbri->regs + REG0);
-	reg |= D_P;
-	sbus_writel(reg, dbri->regs + REG0);
-
-	/*spin_unlock(&dbri->lock); */
+	dbri->cmdptr = cmd;
+	spin_unlock(&dbri->cmdlock);
 }
 
 /* Lock must be held when calling this */
 static void dbri_reset(struct snd_dbri * dbri)
 {
 	int i;
+	u32 tmp;
 
 	dprintk(D_GEN, "reset 0:%x 2:%x 8:%x 9:%x\n",
 		sbus_readl(dbri->regs + REG0),
@@ -729,39 +736,6 @@
 	sbus_writel(D_R, dbri->regs + REG0);	/* Soft Reset */
 	for (i = 0; (sbus_readl(dbri->regs + REG0) & D_R) && i < 64; i++)
 		udelay(10);
-}
-
-/* Lock must not be held before calling this */
-static void dbri_initialize(struct snd_dbri * dbri)
-{
-	volatile s32 *cmd;
-	u32 dma_addr, tmp;
-	unsigned long flags;
-	int n;
-
-	spin_lock_irqsave(&dbri->lock, flags);
-
-	dbri_reset(dbri);
-
-	cmd = dbri_cmdlock(dbri, NoGetLock);
-	dprintk(D_GEN, "init: cmd: %p, int: %p\n",
-		&dbri->dma->cmd[0], &dbri->dma->intr[0]);
-
-	/*
-	 * Initialize the interrupt ringbuffer.
-	 */
-	for (n = 0; n < DBRI_NO_INTS - 1; n++) {
-		dma_addr = dbri->dma_dvma;
-		dma_addr += dbri_dma_off(intr, ((n + 1) & DBRI_INT_BLK));
-		dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr;
-	}
-	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
-	dbri->dma->intr[n * DBRI_INT_BLK] = dma_addr;
-	dbri->dbri_irqp = 1;
-
-	/* Initialize pipes */
-	for (n = 0; n < DBRI_NO_PIPES; n++)
-		dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1;
 
 	/* A brute approach - DBRI falls back to working burst size by itself
 	 * On SS20 D_S does not work, so do not try so high. */
@@ -769,16 +743,48 @@
 	tmp |= D_G | D_E;
 	tmp &= ~D_S;
 	sbus_writel(tmp, dbri->regs + REG0);
+}
 
+/* Lock must not be held before calling this */
+static void dbri_initialize(struct snd_dbri * dbri)
+{
+	s32 *cmd;
+	u32 dma_addr;
+	unsigned long flags;
+	int n;
+
+	spin_lock_irqsave(&dbri->lock, flags);
+
+	dbri_reset(dbri);
+
+	/* Initialize pipes */
+	for (n = 0; n < DBRI_NO_PIPES; n++)
+		dbri->pipes[n].desc = dbri->pipes[n].first_desc = -1;
+
+	spin_lock_init(&dbri->cmdlock);
+	/*
+	 * Initialize the interrupt ringbuffer.
+	 */
+	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
+	dbri->dma->intr[0] = dma_addr;
+	dbri->dbri_irqp = 1;
 	/*
 	 * Set up the interrupt queue
 	 */
-	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
+	spin_lock(&dbri->cmdlock);
+	cmd = dbri->cmdptr = dbri->dma->cmd;
 	*(cmd++) = DBRI_CMD(D_IIQ, 0, 0);
 	*(cmd++) = dma_addr;
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
+	dbri->cmdptr = cmd;
+	*(cmd++) = DBRI_CMD(D_WAIT, 1, 0);
+	*(cmd++) = DBRI_CMD(D_WAIT, 1, 0);
+	dma_addr = dbri->dma_dvma + dbri_dma_off(cmd, 0);
+	sbus_writel(dma_addr, dbri->regs + REG8);
+	spin_unlock(&dbri->cmdlock);
 
-	dbri_cmdsend(dbri, cmd);
 	spin_unlock_irqrestore(&dbri->lock, flags);
+	dbri_cmdwait(dbri);
 }
 
 /*
@@ -809,9 +815,9 @@
 {
 	int sdp;
 	int desc;
-	volatile int *cmd;
+	s32 *cmd;
 
-	if (pipe < 0 || pipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: reset_pipe called with illegal pipe number\n");
 		return;
 	}
@@ -822,25 +828,29 @@
 		return;
 	}
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
+	cmd = dbri_cmdlock(dbri, 3);
 	*(cmd++) = DBRI_CMD(D_SDP, 0, sdp | D_SDP_C | D_SDP_P);
 	*(cmd++) = 0;
-	dbri_cmdsend(dbri, cmd);
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
+	dbri_cmdsend(dbri, cmd, 3);
 
 	desc = dbri->pipes[pipe].first_desc;
-	while (desc != -1) {
-		dbri->descs[desc].inuse = 0;
-		desc = dbri->descs[desc].next;
-	}
+	if ( desc >= 0)
+		do {
+			dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0;
+			desc = dbri->next_desc[desc];
+		} while (desc != -1 && desc != dbri->pipes[pipe].first_desc);
 
 	dbri->pipes[pipe].desc = -1;
 	dbri->pipes[pipe].first_desc = -1;
 }
 
-/* FIXME: direction as an argument? */
+/*
+ * Lock must be held before calling this.
+ */
 static void setup_pipe(struct snd_dbri * dbri, int pipe, int sdp)
 {
-	if (pipe < 0 || pipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: setup_pipe called with illegal pipe number\n");
 		return;
 	}
@@ -860,119 +870,87 @@
 	dbri->pipes[pipe].sdp = sdp;
 	dbri->pipes[pipe].desc = -1;
 	dbri->pipes[pipe].first_desc = -1;
-	if (sdp & D_SDP_TO_SER)
-		dbri->pipes[pipe].direction = PIPEoutput;
-	else
-		dbri->pipes[pipe].direction = PIPEinput;
 
 	reset_pipe(dbri, pipe);
 }
 
-/* FIXME: direction not needed */
+/*
+ * Lock must be held before calling this.
+ */
 static void link_time_slot(struct snd_dbri * dbri, int pipe,
-			   enum in_or_out direction, int basepipe,
+			   int prevpipe, int nextpipe,
 			   int length, int cycle)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	int val;
-	int prevpipe;
-	int nextpipe;
 
-	if (pipe < 0 || pipe > 31 || basepipe < 0 || basepipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE 
+			|| prevpipe < 0 || prevpipe > DBRI_MAX_PIPE
+			|| nextpipe < 0 || nextpipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR 
 		    "DBRI: link_time_slot called with illegal pipe number\n");
 		return;
 	}
 
-	if (dbri->pipes[pipe].sdp == 0 || dbri->pipes[basepipe].sdp == 0) {
+	if (dbri->pipes[pipe].sdp == 0 
+			|| dbri->pipes[prevpipe].sdp == 0
+			|| dbri->pipes[nextpipe].sdp == 0) {
 		printk(KERN_ERR "DBRI: link_time_slot called on uninitialized pipe\n");
 		return;
 	}
 
-	/* Deal with CHI special case:
-	 * "If transmission on edges 0 or 1 is desired, then cycle n
-	 *  (where n = # of bit times per frame...) must be used."
-	 *                  - DBRI data sheet, page 11
-	 */
-	if (basepipe == 16 && direction == PIPEoutput && cycle == 0)
-		cycle = dbri->chi_bpf;
-
-	if (basepipe == pipe) {
-		prevpipe = pipe;
-		nextpipe = pipe;
-	} else {
-		/* We're not initializing a new linked list (basepipe != pipe),
-		 * so run through the linked list and find where this pipe
-		 * should be sloted in, based on its cycle.  CHI confuses
-		 * things a bit, since it has a single anchor for both its
-		 * transmit and receive lists.
-		 */
-		if (basepipe == 16) {
-			if (direction == PIPEinput) {
-				prevpipe = dbri->chi_in_pipe;
-			} else {
-				prevpipe = dbri->chi_out_pipe;
-			}
-		} else {
-			prevpipe = basepipe;
-		}
-
-		nextpipe = dbri->pipes[prevpipe].nextpipe;
-
-		while (dbri->pipes[nextpipe].cycle < cycle
-		       && dbri->pipes[nextpipe].nextpipe != basepipe) {
-			prevpipe = nextpipe;
-			nextpipe = dbri->pipes[nextpipe].nextpipe;
-		}
-	}
-
-	if (prevpipe == 16) {
-		if (direction == PIPEinput) {
-			dbri->chi_in_pipe = pipe;
-		} else {
-			dbri->chi_out_pipe = pipe;
-		}
-	} else {
-		dbri->pipes[prevpipe].nextpipe = pipe;
-	}
-
+	dbri->pipes[prevpipe].nextpipe = pipe;
 	dbri->pipes[pipe].nextpipe = nextpipe;
-	dbri->pipes[pipe].cycle = cycle;
 	dbri->pipes[pipe].length = length;
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
+	cmd = dbri_cmdlock(dbri, 4);
 
-	if (direction == PIPEinput) {
-		val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(prevpipe) | pipe;
-		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
-		*(cmd++) =
-		    D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe);
-		*(cmd++) = 0;
-	} else {
+	if (dbri->pipes[pipe].sdp & D_SDP_TO_SER) {
+		/* Deal with CHI special case:
+		 * "If transmission on edges 0 or 1 is desired, then cycle n
+		 *  (where n = # of bit times per frame...) must be used."
+		 *                  - DBRI data sheet, page 11
+		 */
+		if (prevpipe == 16 && cycle == 0)
+			cycle = dbri->chi_bpf;
+
 		val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(prevpipe) | pipe;
 		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
 		*(cmd++) = 0;
 		*(cmd++) =
 		    D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe);
+	} else {
+		val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(prevpipe) | pipe;
+		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
+		*(cmd++) =
+		    D_TS_LEN(length) | D_TS_CYCLE(cycle) | D_TS_NEXT(nextpipe);
+		*(cmd++) = 0;
 	}
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 4);
 }
 
+#if 0
+/*
+ * Lock must be held before calling this.
+ */
 static void unlink_time_slot(struct snd_dbri * dbri, int pipe,
 			     enum in_or_out direction, int prevpipe,
 			     int nextpipe)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	int val;
 
-	if (pipe < 0 || pipe > 31 || prevpipe < 0 || prevpipe > 31) {
+	if (pipe < 0 || pipe > DBRI_MAX_PIPE 
+			|| prevpipe < 0 || prevpipe > DBRI_MAX_PIPE
+			|| nextpipe < 0 || nextpipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR 
 		    "DBRI: unlink_time_slot called with illegal pipe number\n");
 		return;
 	}
 
-	cmd = dbri_cmdlock(dbri, NoGetLock);
+	cmd = dbri_cmdlock(dbri, 4);
 
 	if (direction == PIPEinput) {
 		val = D_DTS_VI | D_DTS_DEL | D_DTS_PRVIN(prevpipe) | pipe;
@@ -985,9 +963,11 @@
 		*(cmd++) = 0;
 		*(cmd++) = D_TS_NEXT(nextpipe);
 	}
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 4);
 }
+#endif
 
 /* xmit_fixed() / recv_fixed()
  *
@@ -1001,13 +981,16 @@
  * the actual time slot is.  The interrupt handler takes care of bit
  * ordering and alignment.  An 8-bit time slot will always end up
  * in the low-order 8 bits, filled either MSB-first or LSB-first,
- * depending on the settings passed to setup_pipe()
+ * depending on the settings passed to setup_pipe().
+ *
+ * Lock must not be held before calling it.
  */
 static void xmit_fixed(struct snd_dbri * dbri, int pipe, unsigned int data)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
+	unsigned long flags;
 
-	if (pipe < 16 || pipe > 31) {
+	if (pipe < 16 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: xmit_fixed: Illegal pipe number\n");
 		return;
 	}
@@ -1032,17 +1015,22 @@
 	if (dbri->pipes[pipe].sdp & D_SDP_MSB)
 		data = reverse_bytes(data, dbri->pipes[pipe].length);
 
-	cmd = dbri_cmdlock(dbri, GetLock);
+	cmd = dbri_cmdlock(dbri, 3);
 
 	*(cmd++) = DBRI_CMD(D_SSP, 0, pipe);
 	*(cmd++) = data;
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	spin_lock_irqsave(&dbri->lock, flags);
+	dbri_cmdsend(dbri, cmd, 3);
+	spin_unlock_irqrestore(&dbri->lock, flags);
+	dbri_cmdwait(dbri);
+
 }
 
 static void recv_fixed(struct snd_dbri * dbri, int pipe, volatile __u32 * ptr)
 {
-	if (pipe < 16 || pipe > 31) {
+	if (pipe < 16 || pipe > DBRI_MAX_PIPE) {
 		printk(KERN_ERR "DBRI: recv_fixed called with illegal pipe number\n");
 		return;
 	}
@@ -1071,12 +1059,16 @@
  * and work by building chains of descriptors which identify the
  * data buffers.  Buffers too large for a single descriptor will
  * be spread across multiple descriptors.
+ *
+ * All descriptors create a ring buffer.
+ *
+ * Lock must be held before calling this.
  */
 static int setup_descs(struct snd_dbri * dbri, int streamno, unsigned int period)
 {
 	struct dbri_streaminfo *info = &dbri->stream_info[streamno];
 	__u32 dvma_buffer;
-	int desc = 0;
+	int desc;
 	int len;
 	int first_desc = -1;
 	int last_desc = -1;
@@ -1119,11 +1111,23 @@
 		len &= ~3;
 	}
 
+	/* Free descriptors if pipe has any */
+	desc = dbri->pipes[info->pipe].first_desc;
+	if ( desc >= 0)
+		do {
+			dbri->dma->desc[desc].nda = dbri->dma->desc[desc].ba = 0;
+			desc = dbri->next_desc[desc];
+		} while (desc != -1 && desc != dbri->pipes[info->pipe].first_desc);
+
+	dbri->pipes[info->pipe].desc = -1;
+	dbri->pipes[info->pipe].first_desc = -1;
+
+	desc = 0;
 	while (len > 0) {
 		int mylen;
 
 		for (; desc < DBRI_NO_DESCS; desc++) {
-			if (!dbri->descs[desc].inuse)
+			if (!dbri->dma->desc[desc].ba)
 				break;
 		}
 		if (desc == DBRI_NO_DESCS) {
@@ -1131,37 +1135,33 @@
 			return -1;
 		}
 
-		if (len > DBRI_TD_MAXCNT) {
-			mylen = DBRI_TD_MAXCNT;	/* 8KB - 1 */
-		} else {
+		if (len > DBRI_TD_MAXCNT)
+			mylen = DBRI_TD_MAXCNT;	/* 8KB - 4 */
+		else
 			mylen = len;
-		}
-		if (mylen > period) {
-			mylen = period;
-		}
 
-		dbri->descs[desc].inuse = 1;
-		dbri->descs[desc].next = -1;
+		if (mylen > period)
+			mylen = period;
+
+		dbri->next_desc[desc] = -1;
 		dbri->dma->desc[desc].ba = dvma_buffer;
 		dbri->dma->desc[desc].nda = 0;
 
 		if (streamno == DBRI_PLAY) {
-			dbri->descs[desc].len = mylen;
 			dbri->dma->desc[desc].word1 = DBRI_TD_CNT(mylen);
 			dbri->dma->desc[desc].word4 = 0;
-			if (first_desc != -1)
-				dbri->dma->desc[desc].word1 |= DBRI_TD_M;
+			dbri->dma->desc[desc].word1 |= 
+			    DBRI_TD_F | DBRI_TD_B;
 		} else {
-			dbri->descs[desc].len = 0;
 			dbri->dma->desc[desc].word1 = 0;
 			dbri->dma->desc[desc].word4 =
 			    DBRI_RD_B | DBRI_RD_BCNT(mylen);
 		}
 
-		if (first_desc == -1) {
+		if (first_desc == -1)
 			first_desc = desc;
-		} else {
-			dbri->descs[last_desc].next = desc;
+		else {
+			dbri->next_desc[last_desc] = desc;
 			dbri->dma->desc[last_desc].nda =
 			    dbri->dma_dvma + dbri_dma_off(desc, desc);
 		}
@@ -1176,21 +1176,24 @@
 		return -1;
 	}
 
-	dbri->dma->desc[last_desc].word1 &= ~DBRI_TD_M;
-	if (streamno == DBRI_PLAY) {
-		dbri->dma->desc[last_desc].word1 |=
-		    DBRI_TD_I | DBRI_TD_F | DBRI_TD_B;
-	}
+	dbri->dma->desc[last_desc].nda =
+	    dbri->dma_dvma + dbri_dma_off(desc, first_desc);
+	dbri->next_desc[last_desc] = first_desc;
 	dbri->pipes[info->pipe].first_desc = first_desc;
 	dbri->pipes[info->pipe].desc = first_desc;
 
-	for (desc = first_desc; desc != -1; desc = dbri->descs[desc].next) {
+#ifdef DBRI_DEBUG
+	for (desc = first_desc; desc != -1; ) {
 		dprintk(D_DESC, "DESC %d: %08x %08x %08x %08x\n",
 			desc,
 			dbri->dma->desc[desc].word1,
 			dbri->dma->desc[desc].ba,
 			dbri->dma->desc[desc].nda, dbri->dma->desc[desc].word4);
+			desc = dbri->next_desc[desc];
+			if ( desc == first_desc )
+				break;
 	}
+#endif
 	return 0;
 }
 
@@ -1207,56 +1210,30 @@
 
 enum master_or_slave { CHImaster, CHIslave };
 
+/*
+ * Lock must not be held before calling it.
+ */
 static void reset_chi(struct snd_dbri * dbri, enum master_or_slave master_or_slave,
 		      int bits_per_frame)
 {
-	volatile s32 *cmd;
+	s32 *cmd;
 	int val;
-	static int chi_initialized = 0;	/* FIXME: mutex? */
 
-	if (!chi_initialized) {
+	/* Set CHI Anchor: Pipe 16 */
 
-		cmd = dbri_cmdlock(dbri, GetLock);
+	cmd = dbri_cmdlock(dbri, 4);
+	val = D_DTS_VO | D_DTS_VI | D_DTS_INS 
+		| D_DTS_PRVIN(16) | D_PIPE(16) | D_DTS_PRVOUT(16);
+	*(cmd++) = DBRI_CMD(D_DTS, 0, val);
+	*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
+	*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
+	dbri_cmdsend(dbri, cmd, 4);
 
-		/* Set CHI Anchor: Pipe 16 */
+	dbri->pipes[16].sdp = 1;
+	dbri->pipes[16].nextpipe = 16;
 
-		val = D_DTS_VI | D_DTS_INS | D_DTS_PRVIN(16) | D_PIPE(16);
-		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
-		*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
-		*(cmd++) = 0;
-
-		val = D_DTS_VO | D_DTS_INS | D_DTS_PRVOUT(16) | D_PIPE(16);
-		*(cmd++) = DBRI_CMD(D_DTS, 0, val);
-		*(cmd++) = 0;
-		*(cmd++) = D_TS_ANCHOR | D_TS_NEXT(16);
-
-		dbri->pipes[16].sdp = 1;
-		dbri->pipes[16].nextpipe = 16;
-		dbri->chi_in_pipe = 16;
-		dbri->chi_out_pipe = 16;
-
-#if 0
-		chi_initialized++;
-#endif
-	} else {
-		int pipe;
-
-		for (pipe = dbri->chi_in_pipe;
-		     pipe != 16; pipe = dbri->pipes[pipe].nextpipe) {
-			unlink_time_slot(dbri, pipe, PIPEinput,
-					 16, dbri->pipes[pipe].nextpipe);
-		}
-		for (pipe = dbri->chi_out_pipe;
-		     pipe != 16; pipe = dbri->pipes[pipe].nextpipe) {
-			unlink_time_slot(dbri, pipe, PIPEoutput,
-					 16, dbri->pipes[pipe].nextpipe);
-		}
-
-		dbri->chi_in_pipe = 16;
-		dbri->chi_out_pipe = 16;
-
-		cmd = dbri_cmdlock(dbri, GetLock);
-	}
+	cmd = dbri_cmdlock(dbri, 4);
 
 	if (master_or_slave == CHIslave) {
 		/* Setup DBRI for CHI Slave - receive clock, frame sync (FS)
@@ -1295,8 +1272,9 @@
 
 	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 	*(cmd++) = DBRI_CMD(D_CDM, 0, D_CDM_XCE | D_CDM_XEN | D_CDM_REN);
+	*(cmd++) = DBRI_CMD(D_PAUSE, 0, 0);
 
-	dbri_cmdsend(dbri, cmd);
+	dbri_cmdsend(dbri, cmd, 4);
 }
 
 /*
@@ -1307,9 +1285,14 @@
 In the standard SPARC audio configuration, the CS4215 codec is attached
 to the DBRI via the CHI interface and few of the DBRI's PIO pins.
 
+ * Lock must not be held before calling it.
+
 */
 static void cs4215_setup_pipes(struct snd_dbri * dbri)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&dbri->lock, flags);
 	/*
 	 * Data mode:
 	 * Pipe  4: Send timeslots 1-4 (audio data)
@@ -1333,6 +1316,9 @@
 	setup_pipe(dbri, 17, D_SDP_FIXED | D_SDP_TO_SER | D_SDP_MSB);
 	setup_pipe(dbri, 18, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB);
 	setup_pipe(dbri, 19, D_SDP_FIXED | D_SDP_FROM_SER | D_SDP_MSB);
+	spin_unlock_irqrestore(&dbri->lock, flags);
+
+	dbri_cmdwait(dbri);
 }
 
 static int cs4215_init_data(struct cs4215 *mm)
@@ -1364,7 +1350,7 @@
 	mm->status = 0;
 	mm->version = 0xff;
 	mm->precision = 8;	/* For ULAW */
-	mm->channels = 2;
+	mm->channels = 1;
 
 	return 0;
 }
@@ -1379,16 +1365,8 @@
 	} else {
 		/* Start by setting the playback attenuation. */
 		struct dbri_streaminfo *info = &dbri->stream_info[DBRI_PLAY];
-		int left_gain = info->left_gain % 64;
-		int right_gain = info->right_gain % 64;
-
-		if (info->balance < DBRI_MID_BALANCE) {
-			right_gain *= info->balance;
-			right_gain /= DBRI_MID_BALANCE;
-		} else {
-			left_gain *= DBRI_RIGHT_BALANCE - info->balance;
-			left_gain /= DBRI_MID_BALANCE;
-		}
+		int left_gain = info->left_gain & 0x3f;
+		int right_gain = info->right_gain & 0x3f;
 
 		dbri->mm.data[0] &= ~0x3f;	/* Reset the volume bits */
 		dbri->mm.data[1] &= ~0x3f;
@@ -1397,8 +1375,8 @@
 
 		/* Now set the recording gain. */
 		info = &dbri->stream_info[DBRI_REC];
-		left_gain = info->left_gain % 16;
-		right_gain = info->right_gain % 16;
+		left_gain = info->left_gain & 0xf;
+		right_gain = info->right_gain & 0xf;
 		dbri->mm.data[2] |= CS4215_LG(left_gain);
 		dbri->mm.data[3] |= CS4215_RG(right_gain);
 	}
@@ -1413,6 +1391,7 @@
 {
 	int data_width;
 	u32 tmp;
+	unsigned long flags;
 
 	dprintk(D_MM, "cs4215_open: %d channels, %d bits\n",
 		dbri->mm.channels, dbri->mm.precision);
@@ -1437,6 +1416,7 @@
 	 * bits.  The CS4215, it seems, observes TSIN (the delayed signal)
 	 * even if it's the CHI master.  Don't ask me...
 	 */
+	spin_lock_irqsave(&dbri->lock, flags);
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp &= ~(D_C);		/* Disable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
@@ -1455,15 +1435,16 @@
 	 */
 	data_width = dbri->mm.channels * dbri->mm.precision;
 
-	link_time_slot(dbri, 20, PIPEoutput, 16, 32, dbri->mm.offset + 32);
-	link_time_slot(dbri, 4, PIPEoutput, 16, data_width, dbri->mm.offset);
-	link_time_slot(dbri, 6, PIPEinput, 16, data_width, dbri->mm.offset);
-	link_time_slot(dbri, 21, PIPEinput, 16, 16, dbri->mm.offset + 40);
+	link_time_slot(dbri, 4, 16, 16, data_width, dbri->mm.offset);
+	link_time_slot(dbri, 20, 4, 16, 32, dbri->mm.offset + 32);
+	link_time_slot(dbri, 6, 16, 16, data_width, dbri->mm.offset);
+	link_time_slot(dbri, 21, 6, 16, 16, dbri->mm.offset + 40);
 
 	/* FIXME: enable CHI after _setdata? */
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp |= D_C;		/* Enable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	cs4215_setdata(dbri, 0);
 }
@@ -1475,6 +1456,7 @@
 {
 	int i, val;
 	u32 tmp;
+	unsigned long flags;
 
 	/* FIXME - let the CPU do something useful during these delays */
 
@@ -1511,6 +1493,7 @@
 	 * done in hardware by a TI 248 that delays the DBRI->4215
 	 * frame sync signal by eight clock cycles.  Anybody know why?
 	 */
+	spin_lock_irqsave(&dbri->lock, flags);
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp &= ~D_C;		/* Disable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
@@ -1524,17 +1507,20 @@
 	 * Pipe 19: Receive timeslot 7 (version). 
 	 */
 
-	link_time_slot(dbri, 17, PIPEoutput, 16, 32, dbri->mm.offset);
-	link_time_slot(dbri, 18, PIPEinput, 16, 8, dbri->mm.offset);
-	link_time_slot(dbri, 19, PIPEinput, 16, 8, dbri->mm.offset + 48);
+	link_time_slot(dbri, 17, 16, 16, 32, dbri->mm.offset);
+	link_time_slot(dbri, 18, 16, 16, 8, dbri->mm.offset);
+	link_time_slot(dbri, 19, 18, 16, 8, dbri->mm.offset + 48);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	/* Wait for the chip to echo back CLB (Control Latch Bit) as zero */
 	dbri->mm.ctrl[0] &= ~CS4215_CLB;
 	xmit_fixed(dbri, 17, *(int *)dbri->mm.ctrl);
 
+	spin_lock_irqsave(&dbri->lock, flags);
 	tmp = sbus_readl(dbri->regs + REG0);
 	tmp |= D_C;		/* Enable CHI */
 	sbus_writel(tmp, dbri->regs + REG0);
+	spin_unlock_irqrestore(&dbri->lock, flags);
 
 	for (i = 10; ((dbri->mm.status & 0xe4) != 0x20); --i) {
 		msleep_interruptible(1);
@@ -1614,8 +1600,7 @@
 	    CS4215_BSEL_128 | CS4215_FREQ[freq_idx].xtal;
 
 	dbri->mm.channels = channels;
-	/* Stereo bit: 8 bit stereo not working yet. */
-	if ((channels > 1) && (dbri->mm.precision == 16))
+	if (channels == 2)
 		dbri->mm.ctrl[1] |= CS4215_DFR_STEREO;
 
 	ret = cs4215_setctrl(dbri);
@@ -1655,7 +1640,6 @@
 	}
 
 	cs4215_setup_pipes(dbri);
-
 	cs4215_init_data(&dbri->mm);
 
 	/* Enable capture of the status & version timeslots. */
@@ -1684,88 +1668,71 @@
 Complicated interrupts are handled by dedicated functions (which
 appear first in this file).  Any pending interrupts can be serviced by
 calling dbri_process_interrupt_buffer(), which works even if the CPU's
-interrupts are disabled.  This function is used by dbri_cmdlock()
-to make sure we're synced up with the chip before each command sequence,
-even if we're running cli'ed.
+interrupts are disabled.
 
 */
 
 /* xmit_descs()
  *
- * Transmit the current TD's for recording/playing, if needed.
+ * Starts transmiting the current TD's for recording/playing.
  * For playback, ALSA has filled the DMA memory with new data (we hope).
  */
-static void xmit_descs(unsigned long data)
+static void xmit_descs(struct snd_dbri *dbri)
 {
-	struct snd_dbri *dbri = (struct snd_dbri *) data;
 	struct dbri_streaminfo *info;
-	volatile s32 *cmd;
+	s32 *cmd;
 	unsigned long flags;
 	int first_td;
 
 	if (dbri == NULL)
 		return;		/* Disabled */
 
-	/* First check the recording stream for buffer overflow */
 	info = &dbri->stream_info[DBRI_REC];
 	spin_lock_irqsave(&dbri->lock, flags);
 
-	if ((info->left >= info->size) && (info->pipe >= 0)) {
+	if (info->pipe >= 0) {
 		first_td = dbri->pipes[info->pipe].first_desc;
 
 		dprintk(D_DESC, "xmit_descs rec @ TD %d\n", first_td);
 
 		/* Stream could be closed by the time we run. */
-		if (first_td < 0) {
-			goto play;
+		if (first_td >= 0) {
+			cmd = dbri_cmdlock(dbri, 2);
+			*(cmd++) = DBRI_CMD(D_SDP, 0,
+					    dbri->pipes[info->pipe].sdp
+					    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
+			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
+			dbri_cmdsend(dbri, cmd, 2);
+
+			/* Reset our admin of the pipe. */
+			dbri->pipes[info->pipe].desc = first_td;
 		}
-
-		cmd = dbri_cmdlock(dbri, NoGetLock);
-		*(cmd++) = DBRI_CMD(D_SDP, 0,
-				    dbri->pipes[info->pipe].sdp
-				    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
-		*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
-		dbri_cmdsend(dbri, cmd);
-
-		/* Reset our admin of the pipe & bytes read. */
-		dbri->pipes[info->pipe].desc = first_td;
-		info->left = 0;
 	}
 
-play:
-	spin_unlock_irqrestore(&dbri->lock, flags);
-
-	/* Now check the playback stream for buffer underflow */
 	info = &dbri->stream_info[DBRI_PLAY];
-	spin_lock_irqsave(&dbri->lock, flags);
 
-	if ((info->left <= 0) && (info->pipe >= 0)) {
+	if (info->pipe >= 0) {
 		first_td = dbri->pipes[info->pipe].first_desc;
 
 		dprintk(D_DESC, "xmit_descs play @ TD %d\n", first_td);
 
 		/* Stream could be closed by the time we run. */
-		if (first_td < 0) {
-			spin_unlock_irqrestore(&dbri->lock, flags);
-			return;
+		if (first_td >= 0) {
+			cmd = dbri_cmdlock(dbri, 2);
+			*(cmd++) = DBRI_CMD(D_SDP, 0,
+					    dbri->pipes[info->pipe].sdp
+					    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
+			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
+			dbri_cmdsend(dbri, cmd, 2);
+
+			/* Reset our admin of the pipe. */
+			dbri->pipes[info->pipe].desc = first_td;
 		}
-
-		cmd = dbri_cmdlock(dbri, NoGetLock);
-		*(cmd++) = DBRI_CMD(D_SDP, 0,
-				    dbri->pipes[info->pipe].sdp
-				    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
-		*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, first_td);
-		dbri_cmdsend(dbri, cmd);
-
-		/* Reset our admin of the pipe & bytes written. */
-		dbri->pipes[info->pipe].desc = first_td;
-		info->left = info->size;
 	}
+
 	spin_unlock_irqrestore(&dbri->lock, flags);
 }
 
-static DECLARE_TASKLET(xmit_descs_task, xmit_descs, 0);
-
 /* transmission_complete_intr()
  *
  * Called by main interrupt handler when DBRI signals transmission complete
@@ -1775,9 +1742,9 @@
  * them as available. Stops when the first descriptor is found without
  * TBC (Transmit Buffer Complete) set, or we've run through them all.
  *
- * The DMA buffers are not released, but re-used. Since the transmit buffer
- * descriptors are not clobbered, they can be re-submitted as is. This is
- * done by the xmit_descs() tasklet above since that could take longer.
+ * The DMA buffers are not released. They form a ring buffer and
+ * they are filled by ALSA while others are transmitted by DMA.
+ *
  */
 
 static void transmission_complete_intr(struct snd_dbri * dbri, int pipe)
@@ -1803,21 +1770,9 @@
 		dprintk(D_INT, "TD %d, status 0x%02x\n", td, status);
 
 		dbri->dma->desc[td].word4 = 0;	/* Reset it for next time. */
-		info->offset += dbri->descs[td].len;
-		info->left -= dbri->descs[td].len;
+		info->offset += DBRI_RD_CNT(dbri->dma->desc[td].word1);
 
-		/* On the last TD, transmit them all again. */
-		if (dbri->descs[td].next == -1) {
-			if (info->left > 0) {
-				printk(KERN_WARNING
-				       "%d bytes left after last transfer.\n",
-				       info->left);
-				info->left = 0;
-			}
-			tasklet_schedule(&xmit_descs_task);
-		}
-
-		td = dbri->descs[td].next;
+		td = dbri->next_desc[td];
 		dbri->pipes[pipe].desc = td;
 	}
 
@@ -1841,30 +1796,18 @@
 		return;
 	}
 
-	dbri->descs[rd].inuse = 0;
-	dbri->pipes[pipe].desc = dbri->descs[rd].next;
+	dbri->pipes[pipe].desc = dbri->next_desc[rd];
 	status = dbri->dma->desc[rd].word1;
 	dbri->dma->desc[rd].word1 = 0;	/* Reset it for next time. */
 
 	info = &dbri->stream_info[DBRI_REC];
 	info->offset += DBRI_RD_CNT(status);
-	info->left += DBRI_RD_CNT(status);
 
 	/* FIXME: Check status */
 
 	dprintk(D_INT, "Recv RD %d, status 0x%02x, len %d\n",
 		rd, DBRI_RD_STATUS(status), DBRI_RD_CNT(status));
 
-	/* On the last TD, transmit them all again. */
-	if (dbri->descs[rd].next == -1) {
-		if (info->left > info->size) {
-			printk(KERN_WARNING
-			       "%d bytes recorded in %d size buffer.\n",
-			       info->left, info->size);
-		}
-		tasklet_schedule(&xmit_descs_task);
-	}
-
 	/* Notify ALSA */
 	if (spin_is_locked(&dbri->lock)) {
 		spin_unlock(&dbri->lock);
@@ -1892,16 +1835,11 @@
 			channel, code, rval);
 	}
 
-	if (channel == D_INTR_CMD && command == D_WAIT) {
-		dbri->wait_ackd = val;
-		if (dbri->wait_send != val) {
-			printk(KERN_ERR "Processing wait command %d when %d was send.\n",
-			       val, dbri->wait_send);
-		}
-		return;
-	}
-
 	switch (code) {
+	case D_INTR_CMDI:
+		if (command != D_WAIT)
+			printk(KERN_ERR "DBRI: Command read interrupt\n");
+		break;
 	case D_INTR_BRDY:
 		reception_complete_intr(dbri, channel);
 		break;
@@ -1914,8 +1852,10 @@
 		 * resend SDP command with clear pipe bit (C) set
 		 */
 		{
-			volatile s32 *cmd;
-
+	/* FIXME: do something useful in case of underrun */
+			printk(KERN_ERR "DBRI: Underrun error\n");
+#if 0
+			s32 *cmd;
 			int pipe = channel;
 			int td = dbri->pipes[pipe].desc;
 
@@ -1926,6 +1866,7 @@
 					    | D_SDP_P | D_SDP_C | D_SDP_2SAME);
 			*(cmd++) = dbri->dma_dvma + dbri_dma_off(desc, td);
 			dbri_cmdsend(dbri, cmd);
+#endif
 		}
 		break;
 	case D_INTR_FXDT:
@@ -1946,9 +1887,7 @@
 /* dbri_process_interrupt_buffer advances through the DBRI's interrupt
  * buffer until it finds a zero word (indicating nothing more to do
  * right now).  Non-zero words require processing and are handed off
- * to dbri_process_one_interrupt AFTER advancing the pointer.  This
- * order is important since we might recurse back into this function
- * and need to make sure the pointer has been advanced first.
+ * to dbri_process_one_interrupt AFTER advancing the pointer.
  */
 static void dbri_process_interrupt_buffer(struct snd_dbri * dbri)
 {
@@ -1957,10 +1896,8 @@
 	while ((x = dbri->dma->intr[dbri->dbri_irqp]) != 0) {
 		dbri->dma->intr[dbri->dbri_irqp] = 0;
 		dbri->dbri_irqp++;
-		if (dbri->dbri_irqp == (DBRI_NO_INTS * DBRI_INT_BLK))
+		if (dbri->dbri_irqp == DBRI_INT_BLK)
 			dbri->dbri_irqp = 1;
-		else if ((dbri->dbri_irqp & (DBRI_INT_BLK - 1)) == 0)
-			dbri->dbri_irqp++;
 
 		dbri_process_one_interrupt(dbri, x);
 	}
@@ -2020,8 +1957,6 @@
 
 	dbri_process_interrupt_buffer(dbri);
 
-	/* FIXME: Write 0 into regs to ACK interrupt */
-
 	spin_unlock(&dbri->lock);
 
 	return IRQ_HANDLED;
@@ -2039,8 +1974,8 @@
 				  SNDRV_PCM_FMTBIT_A_LAW |
 				  SNDRV_PCM_FMTBIT_U8 |
 				  SNDRV_PCM_FMTBIT_S16_BE,
-	.rates			= SNDRV_PCM_RATE_8000_48000,
-	.rate_min		= 8000,
+	.rates			= SNDRV_PCM_RATE_8000_48000 | SNDRV_PCM_RATE_5512,
+	.rate_min		= 5512,
 	.rate_max		= 48000,
 	.channels_min		= 1,
 	.channels_max		= 2,
@@ -2051,6 +1986,39 @@
 	.periods_max		= 1024,
 };
 
+static int snd_hw_rule_format(struct snd_pcm_hw_params *params,
+			      struct snd_pcm_hw_rule *rule)
+{
+	struct snd_interval *c = hw_param_interval(params,
+				SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+	struct snd_mask fmt;
+
+	snd_mask_any(&fmt);
+	if (c->min > 1) {
+		fmt.bits[0] &= SNDRV_PCM_FMTBIT_S16_BE;
+		return snd_mask_refine(f, &fmt);
+	}
+	return 0;
+}
+
+static int snd_hw_rule_channels(struct snd_pcm_hw_params *params,
+				struct snd_pcm_hw_rule *rule)
+{
+	struct snd_interval *c = hw_param_interval(params,
+				SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+	struct snd_interval ch;
+
+	snd_interval_any(&ch);
+	if (!(f->bits[0] & SNDRV_PCM_FMTBIT_S16_BE)) {
+		ch.min = ch.max = 1;
+		ch.integer = 1;
+		return snd_interval_refine(c, &ch);
+	}
+	return 0;
+}
+
 static int snd_dbri_open(struct snd_pcm_substream *substream)
 {
 	struct snd_dbri *dbri = snd_pcm_substream_chip(substream);
@@ -2063,12 +2031,19 @@
 
 	spin_lock_irqsave(&dbri->lock, flags);
 	info->substream = substream;
-	info->left = 0;
 	info->offset = 0;
 	info->dvma_buffer = 0;
 	info->pipe = -1;
 	spin_unlock_irqrestore(&dbri->lock, flags);
 
+	snd_pcm_hw_rule_add(runtime,0,SNDRV_PCM_HW_PARAM_CHANNELS,
+			    snd_hw_rule_format, 0, SNDRV_PCM_HW_PARAM_FORMAT,
+			    -1);
+	snd_pcm_hw_rule_add(runtime,0,SNDRV_PCM_HW_PARAM_FORMAT,
+			    snd_hw_rule_channels, 0, 
+			    SNDRV_PCM_HW_PARAM_CHANNELS,
+			    -1);
+				
 	cs4215_open(dbri);
 
 	return 0;
@@ -2081,7 +2056,6 @@
 
 	dprintk(D_USR, "close audio output.\n");
 	info->substream = NULL;
-	info->left = 0;
 	info->offset = 0;
 
 	return 0;
@@ -2134,6 +2108,7 @@
 	struct snd_dbri *dbri = snd_pcm_substream_chip(substream);
 	struct dbri_streaminfo *info = DBRI_STREAM(dbri, substream);
 	int direction;
+
 	dprintk(D_USR, "hw_free.\n");
 
 	/* hw_free can get called multiple times. Only unmap the DMA once.
@@ -2148,7 +2123,10 @@
 				  substream->runtime->buffer_size, direction);
 		info->dvma_buffer = 0;
 	}
-	info->pipe = -1;
+	if (info->pipe != -1) {
+		reset_pipe(dbri, info->pipe);
+		info->pipe = -1;
+	}
 
 	return snd_pcm_lib_free_pages(substream);
 }
@@ -2157,18 +2135,16 @@
 {
 	struct snd_dbri *dbri = snd_pcm_substream_chip(substream);
 	struct dbri_streaminfo *info = DBRI_STREAM(dbri, substream);
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	int ret;
 
 	info->size = snd_pcm_lib_buffer_bytes(substream);
 	if (DBRI_STREAMNO(substream) == DBRI_PLAY)
 		info->pipe = 4;	/* Send pipe */
-	else {
+	else
 		info->pipe = 6;	/* Receive pipe */
-		info->left = info->size;	/* To trigger submittal */
-	}
 
 	spin_lock_irq(&dbri->lock);
+	info->offset = 0;
 
 	/* Setup the all the transmit/receive desciptors to cover the
 	 * whole DMA buffer.
@@ -2176,8 +2152,6 @@
 	ret = setup_descs(dbri, DBRI_STREAMNO(substream),
 			  snd_pcm_lib_period_bytes(substream));
 
-	runtime->stop_threshold = DBRI_TD_MAXCNT / runtime->channels;
-
 	spin_unlock_irq(&dbri->lock);
 
 	dprintk(D_USR, "prepare audio output. %d bytes\n", info->size);
@@ -2194,14 +2168,11 @@
 	case SNDRV_PCM_TRIGGER_START:
 		dprintk(D_USR, "start audio, period is %d bytes\n",
 			(int)snd_pcm_lib_period_bytes(substream));
-		/* Enable & schedule the tasklet that re-submits the TDs. */
-		xmit_descs_task.data = (unsigned long)dbri;
-		tasklet_schedule(&xmit_descs_task);
+		/* Re-submit the TDs. */
+		xmit_descs(dbri);
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 		dprintk(D_USR, "stop audio.\n");
-		/* Make the tasklet bail out immediately. */
-		xmit_descs_task.data = 0;
 		reset_pipe(dbri, info->pipe);
 		break;
 	default:
@@ -2219,8 +2190,8 @@
 
 	ret = bytes_to_frames(substream->runtime, info->offset)
 		% substream->runtime->buffer_size;
-	dprintk(D_USR, "I/O pointer: %ld frames, %d bytes left.\n",
-		ret, info->left);
+	dprintk(D_USR, "I/O pointer: %ld frames of %ld.\n",
+		ret, substream->runtime->buffer_size);
 	return ret;
 }
 
@@ -2254,7 +2225,6 @@
 	pcm->private_data = dbri;
 	pcm->info_flags = 0;
 	strcpy(pcm->name, dbri->card->shortname);
-	dbri->pcm = pcm;
 
 	if ((err = snd_pcm_lib_preallocate_pages_for_all(pcm,
 			SNDRV_DMA_TYPE_CONTINUOUS,
@@ -2303,7 +2273,6 @@
 {
 	struct snd_dbri *dbri = snd_kcontrol_chip(kcontrol);
 	struct dbri_streaminfo *info = &dbri->stream_info[kcontrol->private_value];
-	unsigned long flags;
 	int changed = 0;
 
 	if (info->left_gain != ucontrol->value.integer.value[0]) {
@@ -2318,13 +2287,9 @@
 		/* First mute outputs, and wait 1/8000 sec (125 us)
 		 * to make sure this takes.  This avoids clicking noises.
 		 */
-		spin_lock_irqsave(&dbri->lock, flags);
-
 		cs4215_setdata(dbri, 1);
 		udelay(125);
 		cs4215_setdata(dbri, 0);
-
-		spin_unlock_irqrestore(&dbri->lock, flags);
 	}
 	return changed;
 }
@@ -2371,7 +2336,6 @@
 				 struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_dbri *dbri = snd_kcontrol_chip(kcontrol);
-	unsigned long flags;
 	int elem = kcontrol->private_value & 0xff;
 	int shift = (kcontrol->private_value >> 8) & 0xff;
 	int mask = (kcontrol->private_value >> 16) & 0xff;
@@ -2404,13 +2368,9 @@
 		/* First mute outputs, and wait 1/8000 sec (125 us)
 		 * to make sure this takes.  This avoids clicking noises.
 		 */
-		spin_lock_irqsave(&dbri->lock, flags);
-
 		cs4215_setdata(dbri, 1);
 		udelay(125);
 		cs4215_setdata(dbri, 0);
-
-		spin_unlock_irqrestore(&dbri->lock, flags);
 	}
 	return changed;
 }
@@ -2473,7 +2433,6 @@
 	for (idx = DBRI_REC; idx < DBRI_NO_STREAMS; idx++) {
 		dbri->stream_info[idx].left_gain = 0;
 		dbri->stream_info[idx].right_gain = 0;
-		dbri->stream_info[idx].balance = DBRI_MID_BALANCE;
 	}
 
 	return 0;
@@ -2505,12 +2464,11 @@
 			struct dbri_pipe *pptr = &dbri->pipes[pipe];
 			snd_iprintf(buffer,
 				    "Pipe %d: %s SDP=0x%x desc=%d, "
-				    "len=%d @ %d prev: %d next %d\n",
+				    "len=%d next %d\n",
 				    pipe,
-				    (pptr->direction ==
-				     PIPEinput ? "input" : "output"), pptr->sdp,
-				    pptr->desc, pptr->length, pptr->cycle,
-				    pptr->prevpipe, pptr->nextpipe);
+				   ((pptr->sdp & D_SDP_TO_SER) ? "output" : "input"),
+				    pptr->sdp, pptr->desc,
+				    pptr->length, pptr->nextpipe);
 		}
 	}
 }
@@ -2549,7 +2507,6 @@
 	dbri->card = card;
 	dbri->sdev = sdev;
 	dbri->irq = irq->pri;
-	dbri->dbri_version = sdev->prom_name[9];
 
 	dbri->dma = sbus_alloc_consistent(sdev, sizeof(struct dbri_dma),
 					  &dbri->dma_dvma);
@@ -2669,7 +2626,7 @@
 
 	printk(KERN_INFO "audio%d at %p (irq %d) is DBRI(%c)+CS4215(%d)\n",
 	       dev, dbri->regs,
-	       dbri->irq, dbri->dbri_version, dbri->mm.version);
+	       dbri->irq, sdev->prom_name[9], dbri->mm.version);
 	dev++;
 
 	return 0;
diff --git a/sound/synth/emux/emux_proc.c b/sound/synth/emux/emux_proc.c
index 58b9601..59144ec 100644
--- a/sound/synth/emux/emux_proc.c
+++ b/sound/synth/emux/emux_proc.c
@@ -128,10 +128,8 @@
 
 void snd_emux_proc_free(struct snd_emux *emu)
 {
-	if (emu->proc) {
-		snd_info_unregister(emu->proc);
-		emu->proc = NULL;
-	}
+	snd_info_free_entry(emu->proc);
+	emu->proc = NULL;
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 1b7f499..49248fa 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -68,7 +68,7 @@
 static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;	/* Enable this card */
 static int vid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; /* Vendor ID for this card */
 static int pid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; /* Product ID for this card */
-static int nrpacks = 4;		/* max. number of packets per urb */
+static int nrpacks = 8;		/* max. number of packets per urb */
 static int async_unlink = 1;
 static int device_setup[SNDRV_CARDS]; /* device parameter for this card*/
 
@@ -100,7 +100,7 @@
  *
  */
 
-#define MAX_PACKS	10
+#define MAX_PACKS	20
 #define MAX_PACKS_HS	(MAX_PACKS * 8)	/* in high speed mode */
 #define MAX_URBS	8
 #define SYNC_URBS	4	/* always four urbs for sync */
@@ -123,6 +123,7 @@
 	unsigned int rate_min, rate_max;	/* min/max rates */
 	unsigned int nr_rates;		/* number of rate table entries */
 	unsigned int *rate_table;	/* rate table */
+	unsigned int needs_knot;	/* any unusual rates? */
 };
 
 struct snd_usb_substream;
@@ -1759,6 +1760,9 @@
 		}
 		channels[f->format] |= (1 << f->channels);
 		rates[f->format] |= f->rates;
+		/* needs knot? */
+		if (f->needs_knot)
+			goto __out;
 	}
 	/* check whether channels and rates match for all formats */
 	cmaster = rmaster = 0;
@@ -1799,6 +1803,38 @@
 	return err;
 }
 
+/*
+ *  If the device supports unusual bit rates, does the request meet these?
+ */
+static int snd_usb_pcm_check_knot(struct snd_pcm_runtime *runtime,
+				  struct snd_usb_substream *subs)
+{
+	struct list_head *p;
+	struct snd_pcm_hw_constraint_list constraints_rates;
+	int err;
+
+	list_for_each(p, &subs->fmt_list) {
+		struct audioformat *fp;
+		fp = list_entry(p, struct audioformat, list);
+
+		if (!fp->needs_knot)
+			continue;
+
+		constraints_rates.count = fp->nr_rates;
+		constraints_rates.list = fp->rate_table;
+		constraints_rates.mask = 0;
+
+		err = snd_pcm_hw_constraint_list(runtime, 0,
+			SNDRV_PCM_HW_PARAM_RATE,
+			&constraints_rates);
+
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 
 /*
  * set up the runtime hardware information.
@@ -1861,6 +1897,8 @@
 					       SNDRV_PCM_HW_PARAM_CHANNELS,
 					       -1)) < 0)
 			return err;
+		if ((err = snd_usb_pcm_check_knot(runtime, subs)) < 0)
+			return err;
 	}
 	return 0;
 }
@@ -2049,7 +2087,7 @@
 };
 
 
-#if defined(CONFIG_PROCFS) && defined(CONFIG_SND_VERBOSE_PROCFS)
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_SND_VERBOSE_PROCFS)
 
 /*
  * proc interface for list the supported pcm formats
@@ -2406,6 +2444,7 @@
 				    unsigned char *fmt, int offset)
 {
 	int nr_rates = fmt[offset];
+	int found;
 	if (fmt[0] < offset + 1 + 3 * (nr_rates ? nr_rates : 2)) {
 		snd_printk(KERN_ERR "%d:%u:%d : invalid FORMAT_TYPE desc\n",
 				   chip->dev->devnum, fp->iface, fp->altsetting);
@@ -2428,6 +2467,7 @@
 			return -1;
 		}
 
+		fp->needs_knot = 0;
 		fp->nr_rates = nr_rates;
 		fp->rate_min = fp->rate_max = combine_triple(&fmt[8]);
 		for (r = 0, idx = offset + 1; r < nr_rates; r++, idx += 3) {
@@ -2436,13 +2476,19 @@
 				fp->rate_min = rate;
 			else if (rate > fp->rate_max)
 				fp->rate_max = rate;
+			found = 0;
 			for (c = 0; c < (int)ARRAY_SIZE(conv_rates); c++) {
 				if (rate == conv_rates[c]) {
+					found = 1;
 					fp->rates |= (1 << c);
 					break;
 				}
 			}
+			if (!found)
+				fp->needs_knot = 1;
 		}
+		if (fp->needs_knot)
+			fp->rates |= SNDRV_PCM_RATE_KNOT;
 	} else {
 		/* continuous rates */
 		fp->rates = SNDRV_PCM_RATE_CONTINUOUS;
@@ -3499,7 +3545,7 @@
 		}
 		usb_chip[chip->index] = NULL;
 		mutex_unlock(&register_mutex);
-		snd_card_free(card);
+		snd_card_free_when_closed(card);
 	} else {
 		mutex_unlock(&register_mutex);
 	}
diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index 491e975..e516d6a 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -37,6 +37,7 @@
 #include <sound/control.h>
 #include <sound/hwdep.h>
 #include <sound/info.h>
+#include <sound/tlv.h>
 
 #include "usbaudio.h"
 
@@ -416,6 +417,26 @@
 	return set_ctl_value(cval, SET_CUR, (cval->control << 8) | channel, value);
 }
 
+/*
+ * TLV callback for mixer volume controls
+ */
+static int mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag,
+			 unsigned int size, unsigned int __user *_tlv)
+{
+	struct usb_mixer_elem_info *cval = kcontrol->private_data;
+	DECLARE_TLV_DB_SCALE(scale, 0, 0, 0);
+
+	if (size < sizeof(scale))
+		return -ENOMEM;
+	/* USB descriptions contain the dB scale in 1/256 dB unit
+	 * while ALSA TLV contains in 1/100 dB unit
+	 */
+	scale[2] = (convert_signed_value(cval, cval->min) * 100) / 256;
+	scale[3] = (convert_signed_value(cval, cval->res) * 100) / 256;
+	if (copy_to_user(_tlv, scale, sizeof(scale)))
+		return -EFAULT;
+	return 0;
+}
 
 /*
  * parser routines begin here...
@@ -933,6 +954,12 @@
 		}
 		strlcat(kctl->id.name + len, control == USB_FEATURE_MUTE ? " Switch" : " Volume",
 			sizeof(kctl->id.name));
+		if (control == USB_FEATURE_VOLUME) {
+			kctl->tlv.c = mixer_vol_tlv;
+			kctl->vd[0].access |= 
+				SNDRV_CTL_ELEM_ACCESS_TLV_READ |
+				SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+		}
 		break;
 
 	default:
diff --git a/sound/usb/usbmixer_maps.c b/sound/usb/usbmixer_maps.c
index 37accb6..7c4dcb3 100644
--- a/sound/usb/usbmixer_maps.c
+++ b/sound/usb/usbmixer_maps.c
@@ -234,6 +234,26 @@
 	{ 0 } /* terminator */
 };
 
+/* TerraTec Aureon 5.1 MkII USB */
+static struct usbmix_name_map aureon_51_2_map[] = {
+	/* 1: IT USB */
+	/* 2: IT Mic */
+	/* 3: IT Line */
+	/* 4: IT SPDIF */
+	/* 5: OT SPDIF */
+	/* 6: OT Speaker */
+	/* 7: OT USB */
+	{ 8, "Capture Source" }, /* SU */
+	{ 9, "Master Playback" }, /* FU */
+	{ 10, "Mic Capture" }, /* FU */
+	{ 11, "Line Capture" }, /* FU */
+	{ 12, "IEC958 In Capture" }, /* FU */
+	{ 13, "Mic Playback" }, /* FU */
+	{ 14, "Line Playback" }, /* FU */
+	/* 15: MU */
+	{} /* terminator */
+};
+
 /*
  * Control map entries
  */
@@ -276,6 +296,10 @@
 		.id = USB_ID(0x0c45, 0x1158),
 		.map = justlink_map,
 	},
+	{
+		.id = USB_ID(0x0ccd, 0x0028),
+		.map = aureon_51_2_map,
+	},
 	{ 0 } /* terminator */
 };
 
diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index 9351846..a7e9563 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -123,6 +123,10 @@
 YAMAHA_DEVICE(0x103f, NULL),
 YAMAHA_DEVICE(0x1040, NULL),
 YAMAHA_DEVICE(0x1041, NULL),
+YAMAHA_DEVICE(0x1042, NULL),
+YAMAHA_DEVICE(0x1043, NULL),
+YAMAHA_DEVICE(0x1044, NULL),
+YAMAHA_DEVICE(0x1045, NULL),
 YAMAHA_DEVICE(0x2000, "DGP-7"),
 YAMAHA_DEVICE(0x2001, "DGP-5"),
 YAMAHA_DEVICE(0x2002, NULL),
@@ -141,6 +145,7 @@
 YAMAHA_DEVICE(0x500c, "DME24N"),
 YAMAHA_DEVICE(0x500d, NULL),
 YAMAHA_DEVICE(0x500e, NULL),
+YAMAHA_DEVICE(0x500f, NULL),
 YAMAHA_DEVICE(0x7000, "DTX"),
 YAMAHA_DEVICE(0x7010, "UB99"),
 #undef YAMAHA_DEVICE